2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
438 * Error bit for low memory mode check.
440 * Currently no caller cares about it yet. Just internal use for error
443 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
448 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
449 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
450 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
452 static void *print_status_check(void *p)
454 struct task_ctx *priv = p;
455 const char work_indicator[] = { '.', 'o', 'O', 'o' };
457 static char *task_position_string[] = {
459 "checking free space cache",
463 task_period_start(priv->info, 1000 /* 1s */);
465 if (priv->tp == TASK_NOTHING)
469 printf("%s [%c]\r", task_position_string[priv->tp],
470 work_indicator[count % 4]);
473 task_period_wait(priv->info);
478 static int print_status_return(void *p)
486 /* Compatible function to allow reuse of old codes */
487 static u64 first_extent_gap(struct rb_root *holes)
489 struct file_extent_hole *hole;
491 if (RB_EMPTY_ROOT(holes))
494 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
498 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
500 struct file_extent_hole *hole1;
501 struct file_extent_hole *hole2;
503 hole1 = rb_entry(node1, struct file_extent_hole, node);
504 hole2 = rb_entry(node2, struct file_extent_hole, node);
506 if (hole1->start > hole2->start)
508 if (hole1->start < hole2->start)
510 /* Now hole1->start == hole2->start */
511 if (hole1->len >= hole2->len)
513 * Hole 1 will be merge center
514 * Same hole will be merged later
517 /* Hole 2 will be merge center */
522 * Add a hole to the record
524 * This will do hole merge for copy_file_extent_holes(),
525 * which will ensure there won't be continuous holes.
527 static int add_file_extent_hole(struct rb_root *holes,
530 struct file_extent_hole *hole;
531 struct file_extent_hole *prev = NULL;
532 struct file_extent_hole *next = NULL;
534 hole = malloc(sizeof(*hole));
539 /* Since compare will not return 0, no -EEXIST will happen */
540 rb_insert(holes, &hole->node, compare_hole);
542 /* simple merge with previous hole */
543 if (rb_prev(&hole->node))
544 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
546 if (prev && prev->start + prev->len >= hole->start) {
547 hole->len = hole->start + hole->len - prev->start;
548 hole->start = prev->start;
549 rb_erase(&prev->node, holes);
554 /* iterate merge with next holes */
556 if (!rb_next(&hole->node))
558 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
560 if (hole->start + hole->len >= next->start) {
561 if (hole->start + hole->len <= next->start + next->len)
562 hole->len = next->start + next->len -
564 rb_erase(&next->node, holes);
573 static int compare_hole_range(struct rb_node *node, void *data)
575 struct file_extent_hole *hole;
578 hole = (struct file_extent_hole *)data;
581 hole = rb_entry(node, struct file_extent_hole, node);
582 if (start < hole->start)
584 if (start >= hole->start && start < hole->start + hole->len)
590 * Delete a hole in the record
592 * This will do the hole split and is much restrict than add.
594 static int del_file_extent_hole(struct rb_root *holes,
597 struct file_extent_hole *hole;
598 struct file_extent_hole tmp;
603 struct rb_node *node;
610 node = rb_search(holes, &tmp, compare_hole_range, NULL);
613 hole = rb_entry(node, struct file_extent_hole, node);
614 if (start + len > hole->start + hole->len)
618 * Now there will be no overlap, delete the hole and re-add the
619 * split(s) if they exists.
621 if (start > hole->start) {
622 prev_start = hole->start;
623 prev_len = start - hole->start;
626 if (hole->start + hole->len > start + len) {
627 next_start = start + len;
628 next_len = hole->start + hole->len - start - len;
631 rb_erase(node, holes);
634 ret = add_file_extent_hole(holes, prev_start, prev_len);
639 ret = add_file_extent_hole(holes, next_start, next_len);
646 static int copy_file_extent_holes(struct rb_root *dst,
649 struct file_extent_hole *hole;
650 struct rb_node *node;
653 node = rb_first(src);
655 hole = rb_entry(node, struct file_extent_hole, node);
656 ret = add_file_extent_hole(dst, hole->start, hole->len);
659 node = rb_next(node);
664 static void free_file_extent_holes(struct rb_root *holes)
666 struct rb_node *node;
667 struct file_extent_hole *hole;
669 node = rb_first(holes);
671 hole = rb_entry(node, struct file_extent_hole, node);
672 rb_erase(node, holes);
674 node = rb_first(holes);
678 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
680 static void record_root_in_trans(struct btrfs_trans_handle *trans,
681 struct btrfs_root *root)
683 if (root->last_trans != trans->transid) {
684 root->track_dirty = 1;
685 root->last_trans = trans->transid;
686 root->commit_root = root->node;
687 extent_buffer_get(root->node);
691 static u8 imode_to_type(u32 imode)
694 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
695 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
696 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
697 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
698 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
699 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
700 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
701 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
704 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
708 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
710 struct device_record *rec1;
711 struct device_record *rec2;
713 rec1 = rb_entry(node1, struct device_record, node);
714 rec2 = rb_entry(node2, struct device_record, node);
715 if (rec1->devid > rec2->devid)
717 else if (rec1->devid < rec2->devid)
723 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
725 struct inode_record *rec;
726 struct inode_backref *backref;
727 struct inode_backref *orig;
728 struct inode_backref *tmp;
729 struct orphan_data_extent *src_orphan;
730 struct orphan_data_extent *dst_orphan;
734 rec = malloc(sizeof(*rec));
736 return ERR_PTR(-ENOMEM);
737 memcpy(rec, orig_rec, sizeof(*rec));
739 INIT_LIST_HEAD(&rec->backrefs);
740 INIT_LIST_HEAD(&rec->orphan_extents);
741 rec->holes = RB_ROOT;
743 list_for_each_entry(orig, &orig_rec->backrefs, list) {
744 size = sizeof(*orig) + orig->namelen + 1;
745 backref = malloc(size);
750 memcpy(backref, orig, size);
751 list_add_tail(&backref->list, &rec->backrefs);
753 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
754 dst_orphan = malloc(sizeof(*dst_orphan));
759 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
760 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
762 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
768 if (!list_empty(&rec->backrefs))
769 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
770 list_del(&orig->list);
774 if (!list_empty(&rec->orphan_extents))
775 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
776 list_del(&orig->list);
785 static void print_orphan_data_extents(struct list_head *orphan_extents,
788 struct orphan_data_extent *orphan;
790 if (list_empty(orphan_extents))
792 printf("The following data extent is lost in tree %llu:\n",
794 list_for_each_entry(orphan, orphan_extents, list) {
795 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
796 orphan->objectid, orphan->offset, orphan->disk_bytenr,
801 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
803 u64 root_objectid = root->root_key.objectid;
804 int errors = rec->errors;
808 /* reloc root errors, we print its corresponding fs root objectid*/
809 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
810 root_objectid = root->root_key.offset;
811 fprintf(stderr, "reloc");
813 fprintf(stderr, "root %llu inode %llu errors %x",
814 (unsigned long long) root_objectid,
815 (unsigned long long) rec->ino, rec->errors);
817 if (errors & I_ERR_NO_INODE_ITEM)
818 fprintf(stderr, ", no inode item");
819 if (errors & I_ERR_NO_ORPHAN_ITEM)
820 fprintf(stderr, ", no orphan item");
821 if (errors & I_ERR_DUP_INODE_ITEM)
822 fprintf(stderr, ", dup inode item");
823 if (errors & I_ERR_DUP_DIR_INDEX)
824 fprintf(stderr, ", dup dir index");
825 if (errors & I_ERR_ODD_DIR_ITEM)
826 fprintf(stderr, ", odd dir item");
827 if (errors & I_ERR_ODD_FILE_EXTENT)
828 fprintf(stderr, ", odd file extent");
829 if (errors & I_ERR_BAD_FILE_EXTENT)
830 fprintf(stderr, ", bad file extent");
831 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
832 fprintf(stderr, ", file extent overlap");
833 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
834 fprintf(stderr, ", file extent discount");
835 if (errors & I_ERR_DIR_ISIZE_WRONG)
836 fprintf(stderr, ", dir isize wrong");
837 if (errors & I_ERR_FILE_NBYTES_WRONG)
838 fprintf(stderr, ", nbytes wrong");
839 if (errors & I_ERR_ODD_CSUM_ITEM)
840 fprintf(stderr, ", odd csum item");
841 if (errors & I_ERR_SOME_CSUM_MISSING)
842 fprintf(stderr, ", some csum missing");
843 if (errors & I_ERR_LINK_COUNT_WRONG)
844 fprintf(stderr, ", link count wrong");
845 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
846 fprintf(stderr, ", orphan file extent");
847 fprintf(stderr, "\n");
848 /* Print the orphan extents if needed */
849 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
850 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
852 /* Print the holes if needed */
853 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
854 struct file_extent_hole *hole;
855 struct rb_node *node;
858 node = rb_first(&rec->holes);
859 fprintf(stderr, "Found file extent holes:\n");
862 hole = rb_entry(node, struct file_extent_hole, node);
863 fprintf(stderr, "\tstart: %llu, len: %llu\n",
864 hole->start, hole->len);
865 node = rb_next(node);
868 fprintf(stderr, "\tstart: 0, len: %llu\n",
869 round_up(rec->isize, root->sectorsize));
873 static void print_ref_error(int errors)
875 if (errors & REF_ERR_NO_DIR_ITEM)
876 fprintf(stderr, ", no dir item");
877 if (errors & REF_ERR_NO_DIR_INDEX)
878 fprintf(stderr, ", no dir index");
879 if (errors & REF_ERR_NO_INODE_REF)
880 fprintf(stderr, ", no inode ref");
881 if (errors & REF_ERR_DUP_DIR_ITEM)
882 fprintf(stderr, ", dup dir item");
883 if (errors & REF_ERR_DUP_DIR_INDEX)
884 fprintf(stderr, ", dup dir index");
885 if (errors & REF_ERR_DUP_INODE_REF)
886 fprintf(stderr, ", dup inode ref");
887 if (errors & REF_ERR_INDEX_UNMATCH)
888 fprintf(stderr, ", index mismatch");
889 if (errors & REF_ERR_FILETYPE_UNMATCH)
890 fprintf(stderr, ", filetype mismatch");
891 if (errors & REF_ERR_NAME_TOO_LONG)
892 fprintf(stderr, ", name too long");
893 if (errors & REF_ERR_NO_ROOT_REF)
894 fprintf(stderr, ", no root ref");
895 if (errors & REF_ERR_NO_ROOT_BACKREF)
896 fprintf(stderr, ", no root backref");
897 if (errors & REF_ERR_DUP_ROOT_REF)
898 fprintf(stderr, ", dup root ref");
899 if (errors & REF_ERR_DUP_ROOT_BACKREF)
900 fprintf(stderr, ", dup root backref");
901 fprintf(stderr, "\n");
904 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
907 struct ptr_node *node;
908 struct cache_extent *cache;
909 struct inode_record *rec = NULL;
912 cache = lookup_cache_extent(inode_cache, ino, 1);
914 node = container_of(cache, struct ptr_node, cache);
916 if (mod && rec->refs > 1) {
917 node->data = clone_inode_rec(rec);
918 if (IS_ERR(node->data))
924 rec = calloc(1, sizeof(*rec));
926 return ERR_PTR(-ENOMEM);
928 rec->extent_start = (u64)-1;
930 INIT_LIST_HEAD(&rec->backrefs);
931 INIT_LIST_HEAD(&rec->orphan_extents);
932 rec->holes = RB_ROOT;
934 node = malloc(sizeof(*node));
937 return ERR_PTR(-ENOMEM);
939 node->cache.start = ino;
940 node->cache.size = 1;
943 if (ino == BTRFS_FREE_INO_OBJECTID)
946 ret = insert_cache_extent(inode_cache, &node->cache);
948 return ERR_PTR(-EEXIST);
953 static void free_orphan_data_extents(struct list_head *orphan_extents)
955 struct orphan_data_extent *orphan;
957 while (!list_empty(orphan_extents)) {
958 orphan = list_entry(orphan_extents->next,
959 struct orphan_data_extent, list);
960 list_del(&orphan->list);
965 static void free_inode_rec(struct inode_record *rec)
967 struct inode_backref *backref;
972 while (!list_empty(&rec->backrefs)) {
973 backref = to_inode_backref(rec->backrefs.next);
974 list_del(&backref->list);
977 free_orphan_data_extents(&rec->orphan_extents);
978 free_file_extent_holes(&rec->holes);
982 static int can_free_inode_rec(struct inode_record *rec)
984 if (!rec->errors && rec->checked && rec->found_inode_item &&
985 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
990 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
991 struct inode_record *rec)
993 struct cache_extent *cache;
994 struct inode_backref *tmp, *backref;
995 struct ptr_node *node;
996 unsigned char filetype;
998 if (!rec->found_inode_item)
1001 filetype = imode_to_type(rec->imode);
1002 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1003 if (backref->found_dir_item && backref->found_dir_index) {
1004 if (backref->filetype != filetype)
1005 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1006 if (!backref->errors && backref->found_inode_ref &&
1007 rec->nlink == rec->found_link) {
1008 list_del(&backref->list);
1014 if (!rec->checked || rec->merging)
1017 if (S_ISDIR(rec->imode)) {
1018 if (rec->found_size != rec->isize)
1019 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1020 if (rec->found_file_extent)
1021 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1022 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1023 if (rec->found_dir_item)
1024 rec->errors |= I_ERR_ODD_DIR_ITEM;
1025 if (rec->found_size != rec->nbytes)
1026 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1027 if (rec->nlink > 0 && !no_holes &&
1028 (rec->extent_end < rec->isize ||
1029 first_extent_gap(&rec->holes) < rec->isize))
1030 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1033 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1034 if (rec->found_csum_item && rec->nodatasum)
1035 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1036 if (rec->some_csum_missing && !rec->nodatasum)
1037 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1040 BUG_ON(rec->refs != 1);
1041 if (can_free_inode_rec(rec)) {
1042 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1043 node = container_of(cache, struct ptr_node, cache);
1044 BUG_ON(node->data != rec);
1045 remove_cache_extent(inode_cache, &node->cache);
1047 free_inode_rec(rec);
1051 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1053 struct btrfs_path path;
1054 struct btrfs_key key;
1057 key.objectid = BTRFS_ORPHAN_OBJECTID;
1058 key.type = BTRFS_ORPHAN_ITEM_KEY;
1061 btrfs_init_path(&path);
1062 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1063 btrfs_release_path(&path);
1069 static int process_inode_item(struct extent_buffer *eb,
1070 int slot, struct btrfs_key *key,
1071 struct shared_node *active_node)
1073 struct inode_record *rec;
1074 struct btrfs_inode_item *item;
1076 rec = active_node->current;
1077 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1078 if (rec->found_inode_item) {
1079 rec->errors |= I_ERR_DUP_INODE_ITEM;
1082 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1083 rec->nlink = btrfs_inode_nlink(eb, item);
1084 rec->isize = btrfs_inode_size(eb, item);
1085 rec->nbytes = btrfs_inode_nbytes(eb, item);
1086 rec->imode = btrfs_inode_mode(eb, item);
1087 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1089 rec->found_inode_item = 1;
1090 if (rec->nlink == 0)
1091 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1092 maybe_free_inode_rec(&active_node->inode_cache, rec);
1096 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1098 int namelen, u64 dir)
1100 struct inode_backref *backref;
1102 list_for_each_entry(backref, &rec->backrefs, list) {
1103 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1105 if (backref->dir != dir || backref->namelen != namelen)
1107 if (memcmp(name, backref->name, namelen))
1112 backref = malloc(sizeof(*backref) + namelen + 1);
1115 memset(backref, 0, sizeof(*backref));
1117 backref->namelen = namelen;
1118 memcpy(backref->name, name, namelen);
1119 backref->name[namelen] = '\0';
1120 list_add_tail(&backref->list, &rec->backrefs);
1124 static int add_inode_backref(struct cache_tree *inode_cache,
1125 u64 ino, u64 dir, u64 index,
1126 const char *name, int namelen,
1127 int filetype, int itemtype, int errors)
1129 struct inode_record *rec;
1130 struct inode_backref *backref;
1132 rec = get_inode_rec(inode_cache, ino, 1);
1133 BUG_ON(IS_ERR(rec));
1134 backref = get_inode_backref(rec, name, namelen, dir);
1137 backref->errors |= errors;
1138 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1139 if (backref->found_dir_index)
1140 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1141 if (backref->found_inode_ref && backref->index != index)
1142 backref->errors |= REF_ERR_INDEX_UNMATCH;
1143 if (backref->found_dir_item && backref->filetype != filetype)
1144 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1146 backref->index = index;
1147 backref->filetype = filetype;
1148 backref->found_dir_index = 1;
1149 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1151 if (backref->found_dir_item)
1152 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1153 if (backref->found_dir_index && backref->filetype != filetype)
1154 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1156 backref->filetype = filetype;
1157 backref->found_dir_item = 1;
1158 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1159 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1160 if (backref->found_inode_ref)
1161 backref->errors |= REF_ERR_DUP_INODE_REF;
1162 if (backref->found_dir_index && backref->index != index)
1163 backref->errors |= REF_ERR_INDEX_UNMATCH;
1165 backref->index = index;
1167 backref->ref_type = itemtype;
1168 backref->found_inode_ref = 1;
1173 maybe_free_inode_rec(inode_cache, rec);
1177 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1178 struct cache_tree *dst_cache)
1180 struct inode_backref *backref;
1185 list_for_each_entry(backref, &src->backrefs, list) {
1186 if (backref->found_dir_index) {
1187 add_inode_backref(dst_cache, dst->ino, backref->dir,
1188 backref->index, backref->name,
1189 backref->namelen, backref->filetype,
1190 BTRFS_DIR_INDEX_KEY, backref->errors);
1192 if (backref->found_dir_item) {
1194 add_inode_backref(dst_cache, dst->ino,
1195 backref->dir, 0, backref->name,
1196 backref->namelen, backref->filetype,
1197 BTRFS_DIR_ITEM_KEY, backref->errors);
1199 if (backref->found_inode_ref) {
1200 add_inode_backref(dst_cache, dst->ino,
1201 backref->dir, backref->index,
1202 backref->name, backref->namelen, 0,
1203 backref->ref_type, backref->errors);
1207 if (src->found_dir_item)
1208 dst->found_dir_item = 1;
1209 if (src->found_file_extent)
1210 dst->found_file_extent = 1;
1211 if (src->found_csum_item)
1212 dst->found_csum_item = 1;
1213 if (src->some_csum_missing)
1214 dst->some_csum_missing = 1;
1215 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1216 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1221 BUG_ON(src->found_link < dir_count);
1222 dst->found_link += src->found_link - dir_count;
1223 dst->found_size += src->found_size;
1224 if (src->extent_start != (u64)-1) {
1225 if (dst->extent_start == (u64)-1) {
1226 dst->extent_start = src->extent_start;
1227 dst->extent_end = src->extent_end;
1229 if (dst->extent_end > src->extent_start)
1230 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1231 else if (dst->extent_end < src->extent_start) {
1232 ret = add_file_extent_hole(&dst->holes,
1234 src->extent_start - dst->extent_end);
1236 if (dst->extent_end < src->extent_end)
1237 dst->extent_end = src->extent_end;
1241 dst->errors |= src->errors;
1242 if (src->found_inode_item) {
1243 if (!dst->found_inode_item) {
1244 dst->nlink = src->nlink;
1245 dst->isize = src->isize;
1246 dst->nbytes = src->nbytes;
1247 dst->imode = src->imode;
1248 dst->nodatasum = src->nodatasum;
1249 dst->found_inode_item = 1;
1251 dst->errors |= I_ERR_DUP_INODE_ITEM;
1259 static int splice_shared_node(struct shared_node *src_node,
1260 struct shared_node *dst_node)
1262 struct cache_extent *cache;
1263 struct ptr_node *node, *ins;
1264 struct cache_tree *src, *dst;
1265 struct inode_record *rec, *conflict;
1266 u64 current_ino = 0;
1270 if (--src_node->refs == 0)
1272 if (src_node->current)
1273 current_ino = src_node->current->ino;
1275 src = &src_node->root_cache;
1276 dst = &dst_node->root_cache;
1278 cache = search_cache_extent(src, 0);
1280 node = container_of(cache, struct ptr_node, cache);
1282 cache = next_cache_extent(cache);
1285 remove_cache_extent(src, &node->cache);
1288 ins = malloc(sizeof(*ins));
1290 ins->cache.start = node->cache.start;
1291 ins->cache.size = node->cache.size;
1295 ret = insert_cache_extent(dst, &ins->cache);
1296 if (ret == -EEXIST) {
1297 conflict = get_inode_rec(dst, rec->ino, 1);
1298 BUG_ON(IS_ERR(conflict));
1299 merge_inode_recs(rec, conflict, dst);
1301 conflict->checked = 1;
1302 if (dst_node->current == conflict)
1303 dst_node->current = NULL;
1305 maybe_free_inode_rec(dst, conflict);
1306 free_inode_rec(rec);
1313 if (src == &src_node->root_cache) {
1314 src = &src_node->inode_cache;
1315 dst = &dst_node->inode_cache;
1319 if (current_ino > 0 && (!dst_node->current ||
1320 current_ino > dst_node->current->ino)) {
1321 if (dst_node->current) {
1322 dst_node->current->checked = 1;
1323 maybe_free_inode_rec(dst, dst_node->current);
1325 dst_node->current = get_inode_rec(dst, current_ino, 1);
1326 BUG_ON(IS_ERR(dst_node->current));
1331 static void free_inode_ptr(struct cache_extent *cache)
1333 struct ptr_node *node;
1334 struct inode_record *rec;
1336 node = container_of(cache, struct ptr_node, cache);
1338 free_inode_rec(rec);
1342 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1344 static struct shared_node *find_shared_node(struct cache_tree *shared,
1347 struct cache_extent *cache;
1348 struct shared_node *node;
1350 cache = lookup_cache_extent(shared, bytenr, 1);
1352 node = container_of(cache, struct shared_node, cache);
1358 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1361 struct shared_node *node;
1363 node = calloc(1, sizeof(*node));
1366 node->cache.start = bytenr;
1367 node->cache.size = 1;
1368 cache_tree_init(&node->root_cache);
1369 cache_tree_init(&node->inode_cache);
1372 ret = insert_cache_extent(shared, &node->cache);
1377 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1378 struct walk_control *wc, int level)
1380 struct shared_node *node;
1381 struct shared_node *dest;
1384 if (level == wc->active_node)
1387 BUG_ON(wc->active_node <= level);
1388 node = find_shared_node(&wc->shared, bytenr);
1390 ret = add_shared_node(&wc->shared, bytenr, refs);
1392 node = find_shared_node(&wc->shared, bytenr);
1393 wc->nodes[level] = node;
1394 wc->active_node = level;
1398 if (wc->root_level == wc->active_node &&
1399 btrfs_root_refs(&root->root_item) == 0) {
1400 if (--node->refs == 0) {
1401 free_inode_recs_tree(&node->root_cache);
1402 free_inode_recs_tree(&node->inode_cache);
1403 remove_cache_extent(&wc->shared, &node->cache);
1409 dest = wc->nodes[wc->active_node];
1410 splice_shared_node(node, dest);
1411 if (node->refs == 0) {
1412 remove_cache_extent(&wc->shared, &node->cache);
1418 static int leave_shared_node(struct btrfs_root *root,
1419 struct walk_control *wc, int level)
1421 struct shared_node *node;
1422 struct shared_node *dest;
1425 if (level == wc->root_level)
1428 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1432 BUG_ON(i >= BTRFS_MAX_LEVEL);
1434 node = wc->nodes[wc->active_node];
1435 wc->nodes[wc->active_node] = NULL;
1436 wc->active_node = i;
1438 dest = wc->nodes[wc->active_node];
1439 if (wc->active_node < wc->root_level ||
1440 btrfs_root_refs(&root->root_item) > 0) {
1441 BUG_ON(node->refs <= 1);
1442 splice_shared_node(node, dest);
1444 BUG_ON(node->refs < 2);
1453 * 1 - if the root with id child_root_id is a child of root parent_root_id
1454 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1455 * has other root(s) as parent(s)
1456 * 2 - if the root child_root_id doesn't have any parent roots
1458 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1461 struct btrfs_path path;
1462 struct btrfs_key key;
1463 struct extent_buffer *leaf;
1467 btrfs_init_path(&path);
1469 key.objectid = parent_root_id;
1470 key.type = BTRFS_ROOT_REF_KEY;
1471 key.offset = child_root_id;
1472 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1476 btrfs_release_path(&path);
1480 key.objectid = child_root_id;
1481 key.type = BTRFS_ROOT_BACKREF_KEY;
1483 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1489 leaf = path.nodes[0];
1490 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1491 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1494 leaf = path.nodes[0];
1497 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1498 if (key.objectid != child_root_id ||
1499 key.type != BTRFS_ROOT_BACKREF_KEY)
1504 if (key.offset == parent_root_id) {
1505 btrfs_release_path(&path);
1512 btrfs_release_path(&path);
1515 return has_parent ? 0 : 2;
1518 static int process_dir_item(struct btrfs_root *root,
1519 struct extent_buffer *eb,
1520 int slot, struct btrfs_key *key,
1521 struct shared_node *active_node)
1531 struct btrfs_dir_item *di;
1532 struct inode_record *rec;
1533 struct cache_tree *root_cache;
1534 struct cache_tree *inode_cache;
1535 struct btrfs_key location;
1536 char namebuf[BTRFS_NAME_LEN];
1538 root_cache = &active_node->root_cache;
1539 inode_cache = &active_node->inode_cache;
1540 rec = active_node->current;
1541 rec->found_dir_item = 1;
1543 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1544 total = btrfs_item_size_nr(eb, slot);
1545 while (cur < total) {
1547 btrfs_dir_item_key_to_cpu(eb, di, &location);
1548 name_len = btrfs_dir_name_len(eb, di);
1549 data_len = btrfs_dir_data_len(eb, di);
1550 filetype = btrfs_dir_type(eb, di);
1552 rec->found_size += name_len;
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1562 if (location.type == BTRFS_INODE_ITEM_KEY) {
1563 add_inode_backref(inode_cache, location.objectid,
1564 key->objectid, key->offset, namebuf,
1565 len, filetype, key->type, error);
1566 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1567 add_inode_backref(root_cache, location.objectid,
1568 key->objectid, key->offset,
1569 namebuf, len, filetype,
1572 fprintf(stderr, "invalid location in dir item %u\n",
1574 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1575 key->objectid, key->offset, namebuf,
1576 len, filetype, key->type, error);
1579 len = sizeof(*di) + name_len + data_len;
1580 di = (struct btrfs_dir_item *)((char *)di + len);
1583 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1584 rec->errors |= I_ERR_DUP_DIR_INDEX;
1589 static int process_inode_ref(struct extent_buffer *eb,
1590 int slot, struct btrfs_key *key,
1591 struct shared_node *active_node)
1599 struct cache_tree *inode_cache;
1600 struct btrfs_inode_ref *ref;
1601 char namebuf[BTRFS_NAME_LEN];
1603 inode_cache = &active_node->inode_cache;
1605 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1606 total = btrfs_item_size_nr(eb, slot);
1607 while (cur < total) {
1608 name_len = btrfs_inode_ref_name_len(eb, ref);
1609 index = btrfs_inode_ref_index(eb, ref);
1610 if (name_len <= BTRFS_NAME_LEN) {
1614 len = BTRFS_NAME_LEN;
1615 error = REF_ERR_NAME_TOO_LONG;
1617 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1618 add_inode_backref(inode_cache, key->objectid, key->offset,
1619 index, namebuf, len, 0, key->type, error);
1621 len = sizeof(*ref) + name_len;
1622 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1628 static int process_inode_extref(struct extent_buffer *eb,
1629 int slot, struct btrfs_key *key,
1630 struct shared_node *active_node)
1639 struct cache_tree *inode_cache;
1640 struct btrfs_inode_extref *extref;
1641 char namebuf[BTRFS_NAME_LEN];
1643 inode_cache = &active_node->inode_cache;
1645 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1646 total = btrfs_item_size_nr(eb, slot);
1647 while (cur < total) {
1648 name_len = btrfs_inode_extref_name_len(eb, extref);
1649 index = btrfs_inode_extref_index(eb, extref);
1650 parent = btrfs_inode_extref_parent(eb, extref);
1651 if (name_len <= BTRFS_NAME_LEN) {
1655 len = BTRFS_NAME_LEN;
1656 error = REF_ERR_NAME_TOO_LONG;
1658 read_extent_buffer(eb, namebuf,
1659 (unsigned long)(extref + 1), len);
1660 add_inode_backref(inode_cache, key->objectid, parent,
1661 index, namebuf, len, 0, key->type, error);
1663 len = sizeof(*extref) + name_len;
1664 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1671 static int count_csum_range(struct btrfs_root *root, u64 start,
1672 u64 len, u64 *found)
1674 struct btrfs_key key;
1675 struct btrfs_path path;
1676 struct extent_buffer *leaf;
1681 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1683 btrfs_init_path(&path);
1685 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1687 key.type = BTRFS_EXTENT_CSUM_KEY;
1689 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1693 if (ret > 0 && path.slots[0] > 0) {
1694 leaf = path.nodes[0];
1695 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1696 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1697 key.type == BTRFS_EXTENT_CSUM_KEY)
1702 leaf = path.nodes[0];
1703 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1704 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1709 leaf = path.nodes[0];
1712 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1713 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1714 key.type != BTRFS_EXTENT_CSUM_KEY)
1717 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1718 if (key.offset >= start + len)
1721 if (key.offset > start)
1724 size = btrfs_item_size_nr(leaf, path.slots[0]);
1725 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1726 if (csum_end > start) {
1727 size = min(csum_end - start, len);
1736 btrfs_release_path(&path);
1742 static int process_file_extent(struct btrfs_root *root,
1743 struct extent_buffer *eb,
1744 int slot, struct btrfs_key *key,
1745 struct shared_node *active_node)
1747 struct inode_record *rec;
1748 struct btrfs_file_extent_item *fi;
1750 u64 disk_bytenr = 0;
1751 u64 extent_offset = 0;
1752 u64 mask = root->sectorsize - 1;
1756 rec = active_node->current;
1757 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1758 rec->found_file_extent = 1;
1760 if (rec->extent_start == (u64)-1) {
1761 rec->extent_start = key->offset;
1762 rec->extent_end = key->offset;
1765 if (rec->extent_end > key->offset)
1766 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1767 else if (rec->extent_end < key->offset) {
1768 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1769 key->offset - rec->extent_end);
1774 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1775 extent_type = btrfs_file_extent_type(eb, fi);
1777 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1778 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1780 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1781 rec->found_size += num_bytes;
1782 num_bytes = (num_bytes + mask) & ~mask;
1783 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1784 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1785 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1786 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1787 extent_offset = btrfs_file_extent_offset(eb, fi);
1788 if (num_bytes == 0 || (num_bytes & mask))
1789 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790 if (num_bytes + extent_offset >
1791 btrfs_file_extent_ram_bytes(eb, fi))
1792 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1793 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1794 (btrfs_file_extent_compression(eb, fi) ||
1795 btrfs_file_extent_encryption(eb, fi) ||
1796 btrfs_file_extent_other_encoding(eb, fi)))
1797 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1798 if (disk_bytenr > 0)
1799 rec->found_size += num_bytes;
1801 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1803 rec->extent_end = key->offset + num_bytes;
1806 * The data reloc tree will copy full extents into its inode and then
1807 * copy the corresponding csums. Because the extent it copied could be
1808 * a preallocated extent that hasn't been written to yet there may be no
1809 * csums to copy, ergo we won't have csums for our file extent. This is
1810 * ok so just don't bother checking csums if the inode belongs to the
1813 if (disk_bytenr > 0 &&
1814 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1816 if (btrfs_file_extent_compression(eb, fi))
1817 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1819 disk_bytenr += extent_offset;
1821 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1824 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1826 rec->found_csum_item = 1;
1827 if (found < num_bytes)
1828 rec->some_csum_missing = 1;
1829 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1831 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1837 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1838 struct walk_control *wc)
1840 struct btrfs_key key;
1844 struct cache_tree *inode_cache;
1845 struct shared_node *active_node;
1847 if (wc->root_level == wc->active_node &&
1848 btrfs_root_refs(&root->root_item) == 0)
1851 active_node = wc->nodes[wc->active_node];
1852 inode_cache = &active_node->inode_cache;
1853 nritems = btrfs_header_nritems(eb);
1854 for (i = 0; i < nritems; i++) {
1855 btrfs_item_key_to_cpu(eb, &key, i);
1857 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1859 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1862 if (active_node->current == NULL ||
1863 active_node->current->ino < key.objectid) {
1864 if (active_node->current) {
1865 active_node->current->checked = 1;
1866 maybe_free_inode_rec(inode_cache,
1867 active_node->current);
1869 active_node->current = get_inode_rec(inode_cache,
1871 BUG_ON(IS_ERR(active_node->current));
1874 case BTRFS_DIR_ITEM_KEY:
1875 case BTRFS_DIR_INDEX_KEY:
1876 ret = process_dir_item(root, eb, i, &key, active_node);
1878 case BTRFS_INODE_REF_KEY:
1879 ret = process_inode_ref(eb, i, &key, active_node);
1881 case BTRFS_INODE_EXTREF_KEY:
1882 ret = process_inode_extref(eb, i, &key, active_node);
1884 case BTRFS_INODE_ITEM_KEY:
1885 ret = process_inode_item(eb, i, &key, active_node);
1887 case BTRFS_EXTENT_DATA_KEY:
1888 ret = process_file_extent(root, eb, i, &key,
1898 static void reada_walk_down(struct btrfs_root *root,
1899 struct extent_buffer *node, int slot)
1908 level = btrfs_header_level(node);
1912 nritems = btrfs_header_nritems(node);
1913 blocksize = root->nodesize;
1914 for (i = slot; i < nritems; i++) {
1915 bytenr = btrfs_node_blockptr(node, i);
1916 ptr_gen = btrfs_node_ptr_generation(node, i);
1917 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1922 * Check the child node/leaf by the following condition:
1923 * 1. the first item key of the node/leaf should be the same with the one
1925 * 2. block in parent node should match the child node/leaf.
1926 * 3. generation of parent node and child's header should be consistent.
1928 * Or the child node/leaf pointed by the key in parent is not valid.
1930 * We hope to check leaf owner too, but since subvol may share leaves,
1931 * which makes leaf owner check not so strong, key check should be
1932 * sufficient enough for that case.
1934 static int check_child_node(struct btrfs_root *root,
1935 struct extent_buffer *parent, int slot,
1936 struct extent_buffer *child)
1938 struct btrfs_key parent_key;
1939 struct btrfs_key child_key;
1942 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1943 if (btrfs_header_level(child) == 0)
1944 btrfs_item_key_to_cpu(child, &child_key, 0);
1946 btrfs_node_key_to_cpu(child, &child_key, 0);
1948 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1951 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1952 parent_key.objectid, parent_key.type, parent_key.offset,
1953 child_key.objectid, child_key.type, child_key.offset);
1955 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1957 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1958 btrfs_node_blockptr(parent, slot),
1959 btrfs_header_bytenr(child));
1961 if (btrfs_node_ptr_generation(parent, slot) !=
1962 btrfs_header_generation(child)) {
1964 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1965 btrfs_header_generation(child),
1966 btrfs_node_ptr_generation(parent, slot));
1972 u64 bytenr[BTRFS_MAX_LEVEL];
1973 u64 refs[BTRFS_MAX_LEVEL];
1976 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1977 struct walk_control *wc, int *level,
1978 struct node_refs *nrefs)
1980 enum btrfs_tree_block_status status;
1983 struct extent_buffer *next;
1984 struct extent_buffer *cur;
1989 WARN_ON(*level < 0);
1990 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1992 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1993 refs = nrefs->refs[*level];
1996 ret = btrfs_lookup_extent_info(NULL, root,
1997 path->nodes[*level]->start,
1998 *level, 1, &refs, NULL);
2003 nrefs->bytenr[*level] = path->nodes[*level]->start;
2004 nrefs->refs[*level] = refs;
2008 ret = enter_shared_node(root, path->nodes[*level]->start,
2016 while (*level >= 0) {
2017 WARN_ON(*level < 0);
2018 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2019 cur = path->nodes[*level];
2021 if (btrfs_header_level(cur) != *level)
2024 if (path->slots[*level] >= btrfs_header_nritems(cur))
2027 ret = process_one_leaf(root, cur, wc);
2032 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2033 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2034 blocksize = root->nodesize;
2036 if (bytenr == nrefs->bytenr[*level - 1]) {
2037 refs = nrefs->refs[*level - 1];
2039 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2040 *level - 1, 1, &refs, NULL);
2044 nrefs->bytenr[*level - 1] = bytenr;
2045 nrefs->refs[*level - 1] = refs;
2050 ret = enter_shared_node(root, bytenr, refs,
2053 path->slots[*level]++;
2058 next = btrfs_find_tree_block(root, bytenr, blocksize);
2059 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2060 free_extent_buffer(next);
2061 reada_walk_down(root, cur, path->slots[*level]);
2062 next = read_tree_block(root, bytenr, blocksize,
2064 if (!extent_buffer_uptodate(next)) {
2065 struct btrfs_key node_key;
2067 btrfs_node_key_to_cpu(path->nodes[*level],
2069 path->slots[*level]);
2070 btrfs_add_corrupt_extent_record(root->fs_info,
2072 path->nodes[*level]->start,
2073 root->nodesize, *level);
2079 ret = check_child_node(root, cur, path->slots[*level], next);
2085 if (btrfs_is_leaf(next))
2086 status = btrfs_check_leaf(root, NULL, next);
2088 status = btrfs_check_node(root, NULL, next);
2089 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2090 free_extent_buffer(next);
2095 *level = *level - 1;
2096 free_extent_buffer(path->nodes[*level]);
2097 path->nodes[*level] = next;
2098 path->slots[*level] = 0;
2101 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2105 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2106 struct walk_control *wc, int *level)
2109 struct extent_buffer *leaf;
2111 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2112 leaf = path->nodes[i];
2113 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2118 free_extent_buffer(path->nodes[*level]);
2119 path->nodes[*level] = NULL;
2120 BUG_ON(*level > wc->active_node);
2121 if (*level == wc->active_node)
2122 leave_shared_node(root, wc, *level);
2129 static int check_root_dir(struct inode_record *rec)
2131 struct inode_backref *backref;
2134 if (!rec->found_inode_item || rec->errors)
2136 if (rec->nlink != 1 || rec->found_link != 0)
2138 if (list_empty(&rec->backrefs))
2140 backref = to_inode_backref(rec->backrefs.next);
2141 if (!backref->found_inode_ref)
2143 if (backref->index != 0 || backref->namelen != 2 ||
2144 memcmp(backref->name, "..", 2))
2146 if (backref->found_dir_index || backref->found_dir_item)
2153 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2154 struct btrfs_root *root, struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2163 key.offset = (u64)-1;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2169 if (!path->slots[0]) {
2176 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2177 if (key.objectid != rec->ino) {
2182 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2183 struct btrfs_inode_item);
2184 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2185 btrfs_mark_buffer_dirty(path->nodes[0]);
2186 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2187 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2188 root->root_key.objectid);
2190 btrfs_release_path(path);
2194 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2195 struct btrfs_root *root,
2196 struct btrfs_path *path,
2197 struct inode_record *rec)
2201 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2202 btrfs_release_path(path);
2204 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2208 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2209 struct btrfs_root *root,
2210 struct btrfs_path *path,
2211 struct inode_record *rec)
2213 struct btrfs_inode_item *ei;
2214 struct btrfs_key key;
2217 key.objectid = rec->ino;
2218 key.type = BTRFS_INODE_ITEM_KEY;
2221 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2228 /* Since ret == 0, no need to check anything */
2229 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2230 struct btrfs_inode_item);
2231 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2232 btrfs_mark_buffer_dirty(path->nodes[0]);
2233 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2234 printf("reset nbytes for ino %llu root %llu\n",
2235 rec->ino, root->root_key.objectid);
2237 btrfs_release_path(path);
2241 static int add_missing_dir_index(struct btrfs_root *root,
2242 struct cache_tree *inode_cache,
2243 struct inode_record *rec,
2244 struct inode_backref *backref)
2246 struct btrfs_path *path;
2247 struct btrfs_trans_handle *trans;
2248 struct btrfs_dir_item *dir_item;
2249 struct extent_buffer *leaf;
2250 struct btrfs_key key;
2251 struct btrfs_disk_key disk_key;
2252 struct inode_record *dir_rec;
2253 unsigned long name_ptr;
2254 u32 data_size = sizeof(*dir_item) + backref->namelen;
2257 path = btrfs_alloc_path();
2261 trans = btrfs_start_transaction(root, 1);
2262 if (IS_ERR(trans)) {
2263 btrfs_free_path(path);
2264 return PTR_ERR(trans);
2267 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2268 (unsigned long long)rec->ino);
2269 key.objectid = backref->dir;
2270 key.type = BTRFS_DIR_INDEX_KEY;
2271 key.offset = backref->index;
2273 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2276 leaf = path->nodes[0];
2277 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2279 disk_key.objectid = cpu_to_le64(rec->ino);
2280 disk_key.type = BTRFS_INODE_ITEM_KEY;
2281 disk_key.offset = 0;
2283 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2284 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2285 btrfs_set_dir_data_len(leaf, dir_item, 0);
2286 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2287 name_ptr = (unsigned long)(dir_item + 1);
2288 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2289 btrfs_mark_buffer_dirty(leaf);
2290 btrfs_free_path(path);
2291 btrfs_commit_transaction(trans, root);
2293 backref->found_dir_index = 1;
2294 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2295 BUG_ON(IS_ERR(dir_rec));
2298 dir_rec->found_size += backref->namelen;
2299 if (dir_rec->found_size == dir_rec->isize &&
2300 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2301 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2302 if (dir_rec->found_size != dir_rec->isize)
2303 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2308 static int delete_dir_index(struct btrfs_root *root,
2309 struct cache_tree *inode_cache,
2310 struct inode_record *rec,
2311 struct inode_backref *backref)
2313 struct btrfs_trans_handle *trans;
2314 struct btrfs_dir_item *di;
2315 struct btrfs_path *path;
2318 path = btrfs_alloc_path();
2322 trans = btrfs_start_transaction(root, 1);
2323 if (IS_ERR(trans)) {
2324 btrfs_free_path(path);
2325 return PTR_ERR(trans);
2329 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2330 (unsigned long long)backref->dir,
2331 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2332 (unsigned long long)root->objectid);
2334 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2335 backref->name, backref->namelen,
2336 backref->index, -1);
2339 btrfs_free_path(path);
2340 btrfs_commit_transaction(trans, root);
2347 ret = btrfs_del_item(trans, root, path);
2349 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2351 btrfs_free_path(path);
2352 btrfs_commit_transaction(trans, root);
2356 static int create_inode_item(struct btrfs_root *root,
2357 struct inode_record *rec,
2358 struct inode_backref *backref, int root_dir)
2360 struct btrfs_trans_handle *trans;
2361 struct btrfs_inode_item inode_item;
2362 time_t now = time(NULL);
2365 trans = btrfs_start_transaction(root, 1);
2366 if (IS_ERR(trans)) {
2367 ret = PTR_ERR(trans);
2371 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2372 "be incomplete, please check permissions and content after "
2373 "the fsck completes.\n", (unsigned long long)root->objectid,
2374 (unsigned long long)rec->ino);
2376 memset(&inode_item, 0, sizeof(inode_item));
2377 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2379 btrfs_set_stack_inode_nlink(&inode_item, 1);
2381 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2382 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2383 if (rec->found_dir_item) {
2384 if (rec->found_file_extent)
2385 fprintf(stderr, "root %llu inode %llu has both a dir "
2386 "item and extents, unsure if it is a dir or a "
2387 "regular file so setting it as a directory\n",
2388 (unsigned long long)root->objectid,
2389 (unsigned long long)rec->ino);
2390 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2391 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2392 } else if (!rec->found_dir_item) {
2393 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2394 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2396 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2397 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2398 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2399 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2400 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2401 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2402 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2403 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2405 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2407 btrfs_commit_transaction(trans, root);
2411 static int repair_inode_backrefs(struct btrfs_root *root,
2412 struct inode_record *rec,
2413 struct cache_tree *inode_cache,
2416 struct inode_backref *tmp, *backref;
2417 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2421 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2422 if (!delete && rec->ino == root_dirid) {
2423 if (!rec->found_inode_item) {
2424 ret = create_inode_item(root, rec, backref, 1);
2431 /* Index 0 for root dir's are special, don't mess with it */
2432 if (rec->ino == root_dirid && backref->index == 0)
2436 ((backref->found_dir_index && !backref->found_inode_ref) ||
2437 (backref->found_dir_index && backref->found_inode_ref &&
2438 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2439 ret = delete_dir_index(root, inode_cache, rec, backref);
2443 list_del(&backref->list);
2447 if (!delete && !backref->found_dir_index &&
2448 backref->found_dir_item && backref->found_inode_ref) {
2449 ret = add_missing_dir_index(root, inode_cache, rec,
2454 if (backref->found_dir_item &&
2455 backref->found_dir_index &&
2456 backref->found_dir_index) {
2457 if (!backref->errors &&
2458 backref->found_inode_ref) {
2459 list_del(&backref->list);
2465 if (!delete && (!backref->found_dir_index &&
2466 !backref->found_dir_item &&
2467 backref->found_inode_ref)) {
2468 struct btrfs_trans_handle *trans;
2469 struct btrfs_key location;
2471 ret = check_dir_conflict(root, backref->name,
2477 * let nlink fixing routine to handle it,
2478 * which can do it better.
2483 location.objectid = rec->ino;
2484 location.type = BTRFS_INODE_ITEM_KEY;
2485 location.offset = 0;
2487 trans = btrfs_start_transaction(root, 1);
2488 if (IS_ERR(trans)) {
2489 ret = PTR_ERR(trans);
2492 fprintf(stderr, "adding missing dir index/item pair "
2494 (unsigned long long)rec->ino);
2495 ret = btrfs_insert_dir_item(trans, root, backref->name,
2497 backref->dir, &location,
2498 imode_to_type(rec->imode),
2501 btrfs_commit_transaction(trans, root);
2505 if (!delete && (backref->found_inode_ref &&
2506 backref->found_dir_index &&
2507 backref->found_dir_item &&
2508 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2509 !rec->found_inode_item)) {
2510 ret = create_inode_item(root, rec, backref, 0);
2517 return ret ? ret : repaired;
2521 * To determine the file type for nlink/inode_item repair
2523 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2524 * Return -ENOENT if file type is not found.
2526 static int find_file_type(struct inode_record *rec, u8 *type)
2528 struct inode_backref *backref;
2530 /* For inode item recovered case */
2531 if (rec->found_inode_item) {
2532 *type = imode_to_type(rec->imode);
2536 list_for_each_entry(backref, &rec->backrefs, list) {
2537 if (backref->found_dir_index || backref->found_dir_item) {
2538 *type = backref->filetype;
2546 * To determine the file name for nlink repair
2548 * Return 0 if file name is found, set name and namelen.
2549 * Return -ENOENT if file name is not found.
2551 static int find_file_name(struct inode_record *rec,
2552 char *name, int *namelen)
2554 struct inode_backref *backref;
2556 list_for_each_entry(backref, &rec->backrefs, list) {
2557 if (backref->found_dir_index || backref->found_dir_item ||
2558 backref->found_inode_ref) {
2559 memcpy(name, backref->name, backref->namelen);
2560 *namelen = backref->namelen;
2567 /* Reset the nlink of the inode to the correct one */
2568 static int reset_nlink(struct btrfs_trans_handle *trans,
2569 struct btrfs_root *root,
2570 struct btrfs_path *path,
2571 struct inode_record *rec)
2573 struct inode_backref *backref;
2574 struct inode_backref *tmp;
2575 struct btrfs_key key;
2576 struct btrfs_inode_item *inode_item;
2579 /* We don't believe this either, reset it and iterate backref */
2580 rec->found_link = 0;
2582 /* Remove all backref including the valid ones */
2583 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2584 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2585 backref->index, backref->name,
2586 backref->namelen, 0);
2590 /* remove invalid backref, so it won't be added back */
2591 if (!(backref->found_dir_index &&
2592 backref->found_dir_item &&
2593 backref->found_inode_ref)) {
2594 list_del(&backref->list);
2601 /* Set nlink to 0 */
2602 key.objectid = rec->ino;
2603 key.type = BTRFS_INODE_ITEM_KEY;
2605 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2612 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2613 struct btrfs_inode_item);
2614 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2615 btrfs_mark_buffer_dirty(path->nodes[0]);
2616 btrfs_release_path(path);
2619 * Add back valid inode_ref/dir_item/dir_index,
2620 * add_link() will handle the nlink inc, so new nlink must be correct
2622 list_for_each_entry(backref, &rec->backrefs, list) {
2623 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2624 backref->name, backref->namelen,
2625 backref->filetype, &backref->index, 1);
2630 btrfs_release_path(path);
2634 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2635 struct btrfs_root *root,
2636 struct btrfs_path *path,
2637 struct inode_record *rec)
2639 char *dir_name = "lost+found";
2640 char namebuf[BTRFS_NAME_LEN] = {0};
2645 int name_recovered = 0;
2646 int type_recovered = 0;
2650 * Get file name and type first before these invalid inode ref
2651 * are deleted by remove_all_invalid_backref()
2653 name_recovered = !find_file_name(rec, namebuf, &namelen);
2654 type_recovered = !find_file_type(rec, &type);
2656 if (!name_recovered) {
2657 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2658 rec->ino, rec->ino);
2659 namelen = count_digits(rec->ino);
2660 sprintf(namebuf, "%llu", rec->ino);
2663 if (!type_recovered) {
2664 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2666 type = BTRFS_FT_REG_FILE;
2670 ret = reset_nlink(trans, root, path, rec);
2673 "Failed to reset nlink for inode %llu: %s\n",
2674 rec->ino, strerror(-ret));
2678 if (rec->found_link == 0) {
2679 lost_found_ino = root->highest_inode;
2680 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2685 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2686 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2689 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2690 dir_name, strerror(-ret));
2693 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2694 namebuf, namelen, type, NULL, 1);
2696 * Add ".INO" suffix several times to handle case where
2697 * "FILENAME.INO" is already taken by another file.
2699 while (ret == -EEXIST) {
2701 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2703 if (namelen + count_digits(rec->ino) + 1 >
2708 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2710 namelen += count_digits(rec->ino) + 1;
2711 ret = btrfs_add_link(trans, root, rec->ino,
2712 lost_found_ino, namebuf,
2713 namelen, type, NULL, 1);
2717 "Failed to link the inode %llu to %s dir: %s\n",
2718 rec->ino, dir_name, strerror(-ret));
2722 * Just increase the found_link, don't actually add the
2723 * backref. This will make things easier and this inode
2724 * record will be freed after the repair is done.
2725 * So fsck will not report problem about this inode.
2728 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2729 namelen, namebuf, dir_name);
2731 printf("Fixed the nlink of inode %llu\n", rec->ino);
2734 * Clear the flag anyway, or we will loop forever for the same inode
2735 * as it will not be removed from the bad inode list and the dead loop
2738 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2739 btrfs_release_path(path);
2744 * Check if there is any normal(reg or prealloc) file extent for given
2746 * This is used to determine the file type when neither its dir_index/item or
2747 * inode_item exists.
2749 * This will *NOT* report error, if any error happens, just consider it does
2750 * not have any normal file extent.
2752 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2754 struct btrfs_path *path;
2755 struct btrfs_key key;
2756 struct btrfs_key found_key;
2757 struct btrfs_file_extent_item *fi;
2761 path = btrfs_alloc_path();
2765 key.type = BTRFS_EXTENT_DATA_KEY;
2768 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2773 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2774 ret = btrfs_next_leaf(root, path);
2781 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2783 if (found_key.objectid != ino ||
2784 found_key.type != BTRFS_EXTENT_DATA_KEY)
2786 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2787 struct btrfs_file_extent_item);
2788 type = btrfs_file_extent_type(path->nodes[0], fi);
2789 if (type != BTRFS_FILE_EXTENT_INLINE) {
2795 btrfs_free_path(path);
2799 static u32 btrfs_type_to_imode(u8 type)
2801 static u32 imode_by_btrfs_type[] = {
2802 [BTRFS_FT_REG_FILE] = S_IFREG,
2803 [BTRFS_FT_DIR] = S_IFDIR,
2804 [BTRFS_FT_CHRDEV] = S_IFCHR,
2805 [BTRFS_FT_BLKDEV] = S_IFBLK,
2806 [BTRFS_FT_FIFO] = S_IFIFO,
2807 [BTRFS_FT_SOCK] = S_IFSOCK,
2808 [BTRFS_FT_SYMLINK] = S_IFLNK,
2811 return imode_by_btrfs_type[(type)];
2814 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct btrfs_path *path,
2817 struct inode_record *rec)
2821 int type_recovered = 0;
2824 printf("Trying to rebuild inode:%llu\n", rec->ino);
2826 type_recovered = !find_file_type(rec, &filetype);
2829 * Try to determine inode type if type not found.
2831 * For found regular file extent, it must be FILE.
2832 * For found dir_item/index, it must be DIR.
2834 * For undetermined one, use FILE as fallback.
2837 * 1. If found backref(inode_index/item is already handled) to it,
2839 * Need new inode-inode ref structure to allow search for that.
2841 if (!type_recovered) {
2842 if (rec->found_file_extent &&
2843 find_normal_file_extent(root, rec->ino)) {
2845 filetype = BTRFS_FT_REG_FILE;
2846 } else if (rec->found_dir_item) {
2848 filetype = BTRFS_FT_DIR;
2849 } else if (!list_empty(&rec->orphan_extents)) {
2851 filetype = BTRFS_FT_REG_FILE;
2853 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2856 filetype = BTRFS_FT_REG_FILE;
2860 ret = btrfs_new_inode(trans, root, rec->ino,
2861 mode | btrfs_type_to_imode(filetype));
2866 * Here inode rebuild is done, we only rebuild the inode item,
2867 * don't repair the nlink(like move to lost+found).
2868 * That is the job of nlink repair.
2870 * We just fill the record and return
2872 rec->found_dir_item = 1;
2873 rec->imode = mode | btrfs_type_to_imode(filetype);
2875 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2876 /* Ensure the inode_nlinks repair function will be called */
2877 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2882 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2883 struct btrfs_root *root,
2884 struct btrfs_path *path,
2885 struct inode_record *rec)
2887 struct orphan_data_extent *orphan;
2888 struct orphan_data_extent *tmp;
2891 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2893 * Check for conflicting file extents
2895 * Here we don't know whether the extents is compressed or not,
2896 * so we can only assume it not compressed nor data offset,
2897 * and use its disk_len as extent length.
2899 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2900 orphan->offset, orphan->disk_len, 0);
2901 btrfs_release_path(path);
2906 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2907 orphan->disk_bytenr, orphan->disk_len);
2908 ret = btrfs_free_extent(trans,
2909 root->fs_info->extent_root,
2910 orphan->disk_bytenr, orphan->disk_len,
2911 0, root->objectid, orphan->objectid,
2916 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2917 orphan->offset, orphan->disk_bytenr,
2918 orphan->disk_len, orphan->disk_len);
2922 /* Update file size info */
2923 rec->found_size += orphan->disk_len;
2924 if (rec->found_size == rec->nbytes)
2925 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2927 /* Update the file extent hole info too */
2928 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2932 if (RB_EMPTY_ROOT(&rec->holes))
2933 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2935 list_del(&orphan->list);
2938 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2943 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root,
2945 struct btrfs_path *path,
2946 struct inode_record *rec)
2948 struct rb_node *node;
2949 struct file_extent_hole *hole;
2953 node = rb_first(&rec->holes);
2957 hole = rb_entry(node, struct file_extent_hole, node);
2958 ret = btrfs_punch_hole(trans, root, rec->ino,
2959 hole->start, hole->len);
2962 ret = del_file_extent_hole(&rec->holes, hole->start,
2966 if (RB_EMPTY_ROOT(&rec->holes))
2967 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2968 node = rb_first(&rec->holes);
2970 /* special case for a file losing all its file extent */
2972 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2973 round_up(rec->isize, root->sectorsize));
2977 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2978 rec->ino, root->objectid);
2983 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2985 struct btrfs_trans_handle *trans;
2986 struct btrfs_path *path;
2989 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2990 I_ERR_NO_ORPHAN_ITEM |
2991 I_ERR_LINK_COUNT_WRONG |
2992 I_ERR_NO_INODE_ITEM |
2993 I_ERR_FILE_EXTENT_ORPHAN |
2994 I_ERR_FILE_EXTENT_DISCOUNT|
2995 I_ERR_FILE_NBYTES_WRONG)))
2998 path = btrfs_alloc_path();
3003 * For nlink repair, it may create a dir and add link, so
3004 * 2 for parent(256)'s dir_index and dir_item
3005 * 2 for lost+found dir's inode_item and inode_ref
3006 * 1 for the new inode_ref of the file
3007 * 2 for lost+found dir's dir_index and dir_item for the file
3009 trans = btrfs_start_transaction(root, 7);
3010 if (IS_ERR(trans)) {
3011 btrfs_free_path(path);
3012 return PTR_ERR(trans);
3015 if (rec->errors & I_ERR_NO_INODE_ITEM)
3016 ret = repair_inode_no_item(trans, root, path, rec);
3017 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3018 ret = repair_inode_orphan_extent(trans, root, path, rec);
3019 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3020 ret = repair_inode_discount_extent(trans, root, path, rec);
3021 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3022 ret = repair_inode_isize(trans, root, path, rec);
3023 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3024 ret = repair_inode_orphan_item(trans, root, path, rec);
3025 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3026 ret = repair_inode_nlinks(trans, root, path, rec);
3027 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3028 ret = repair_inode_nbytes(trans, root, path, rec);
3029 btrfs_commit_transaction(trans, root);
3030 btrfs_free_path(path);
3034 static int check_inode_recs(struct btrfs_root *root,
3035 struct cache_tree *inode_cache)
3037 struct cache_extent *cache;
3038 struct ptr_node *node;
3039 struct inode_record *rec;
3040 struct inode_backref *backref;
3045 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3047 if (btrfs_root_refs(&root->root_item) == 0) {
3048 if (!cache_tree_empty(inode_cache))
3049 fprintf(stderr, "warning line %d\n", __LINE__);
3054 * We need to record the highest inode number for later 'lost+found'
3056 * We must select an ino not used/referred by any existing inode, or
3057 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3058 * this may cause 'lost+found' dir has wrong nlinks.
3060 cache = last_cache_extent(inode_cache);
3062 node = container_of(cache, struct ptr_node, cache);
3064 if (rec->ino > root->highest_inode)
3065 root->highest_inode = rec->ino;
3069 * We need to repair backrefs first because we could change some of the
3070 * errors in the inode recs.
3072 * We also need to go through and delete invalid backrefs first and then
3073 * add the correct ones second. We do this because we may get EEXIST
3074 * when adding back the correct index because we hadn't yet deleted the
3077 * For example, if we were missing a dir index then the directories
3078 * isize would be wrong, so if we fixed the isize to what we thought it
3079 * would be and then fixed the backref we'd still have a invalid fs, so
3080 * we need to add back the dir index and then check to see if the isize
3085 if (stage == 3 && !err)
3088 cache = search_cache_extent(inode_cache, 0);
3089 while (repair && cache) {
3090 node = container_of(cache, struct ptr_node, cache);
3092 cache = next_cache_extent(cache);
3094 /* Need to free everything up and rescan */
3096 remove_cache_extent(inode_cache, &node->cache);
3098 free_inode_rec(rec);
3102 if (list_empty(&rec->backrefs))
3105 ret = repair_inode_backrefs(root, rec, inode_cache,
3119 rec = get_inode_rec(inode_cache, root_dirid, 0);
3120 BUG_ON(IS_ERR(rec));
3122 ret = check_root_dir(rec);
3124 fprintf(stderr, "root %llu root dir %llu error\n",
3125 (unsigned long long)root->root_key.objectid,
3126 (unsigned long long)root_dirid);
3127 print_inode_error(root, rec);
3132 struct btrfs_trans_handle *trans;
3134 trans = btrfs_start_transaction(root, 1);
3135 if (IS_ERR(trans)) {
3136 err = PTR_ERR(trans);
3141 "root %llu missing its root dir, recreating\n",
3142 (unsigned long long)root->objectid);
3144 ret = btrfs_make_root_dir(trans, root, root_dirid);
3147 btrfs_commit_transaction(trans, root);
3151 fprintf(stderr, "root %llu root dir %llu not found\n",
3152 (unsigned long long)root->root_key.objectid,
3153 (unsigned long long)root_dirid);
3157 cache = search_cache_extent(inode_cache, 0);
3160 node = container_of(cache, struct ptr_node, cache);
3162 remove_cache_extent(inode_cache, &node->cache);
3164 if (rec->ino == root_dirid ||
3165 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3166 free_inode_rec(rec);
3170 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3171 ret = check_orphan_item(root, rec->ino);
3173 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3174 if (can_free_inode_rec(rec)) {
3175 free_inode_rec(rec);
3180 if (!rec->found_inode_item)
3181 rec->errors |= I_ERR_NO_INODE_ITEM;
3182 if (rec->found_link != rec->nlink)
3183 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3185 ret = try_repair_inode(root, rec);
3186 if (ret == 0 && can_free_inode_rec(rec)) {
3187 free_inode_rec(rec);
3193 if (!(repair && ret == 0))
3195 print_inode_error(root, rec);
3196 list_for_each_entry(backref, &rec->backrefs, list) {
3197 if (!backref->found_dir_item)
3198 backref->errors |= REF_ERR_NO_DIR_ITEM;
3199 if (!backref->found_dir_index)
3200 backref->errors |= REF_ERR_NO_DIR_INDEX;
3201 if (!backref->found_inode_ref)
3202 backref->errors |= REF_ERR_NO_INODE_REF;
3203 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3204 " namelen %u name %s filetype %d errors %x",
3205 (unsigned long long)backref->dir,
3206 (unsigned long long)backref->index,
3207 backref->namelen, backref->name,
3208 backref->filetype, backref->errors);
3209 print_ref_error(backref->errors);
3211 free_inode_rec(rec);
3213 return (error > 0) ? -1 : 0;
3216 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3219 struct cache_extent *cache;
3220 struct root_record *rec = NULL;
3223 cache = lookup_cache_extent(root_cache, objectid, 1);
3225 rec = container_of(cache, struct root_record, cache);
3227 rec = calloc(1, sizeof(*rec));
3229 return ERR_PTR(-ENOMEM);
3230 rec->objectid = objectid;
3231 INIT_LIST_HEAD(&rec->backrefs);
3232 rec->cache.start = objectid;
3233 rec->cache.size = 1;
3235 ret = insert_cache_extent(root_cache, &rec->cache);
3237 return ERR_PTR(-EEXIST);
3242 static struct root_backref *get_root_backref(struct root_record *rec,
3243 u64 ref_root, u64 dir, u64 index,
3244 const char *name, int namelen)
3246 struct root_backref *backref;
3248 list_for_each_entry(backref, &rec->backrefs, list) {
3249 if (backref->ref_root != ref_root || backref->dir != dir ||
3250 backref->namelen != namelen)
3252 if (memcmp(name, backref->name, namelen))
3257 backref = calloc(1, sizeof(*backref) + namelen + 1);
3260 backref->ref_root = ref_root;
3262 backref->index = index;
3263 backref->namelen = namelen;
3264 memcpy(backref->name, name, namelen);
3265 backref->name[namelen] = '\0';
3266 list_add_tail(&backref->list, &rec->backrefs);
3270 static void free_root_record(struct cache_extent *cache)
3272 struct root_record *rec;
3273 struct root_backref *backref;
3275 rec = container_of(cache, struct root_record, cache);
3276 while (!list_empty(&rec->backrefs)) {
3277 backref = to_root_backref(rec->backrefs.next);
3278 list_del(&backref->list);
3285 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3287 static int add_root_backref(struct cache_tree *root_cache,
3288 u64 root_id, u64 ref_root, u64 dir, u64 index,
3289 const char *name, int namelen,
3290 int item_type, int errors)
3292 struct root_record *rec;
3293 struct root_backref *backref;
3295 rec = get_root_rec(root_cache, root_id);
3296 BUG_ON(IS_ERR(rec));
3297 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3300 backref->errors |= errors;
3302 if (item_type != BTRFS_DIR_ITEM_KEY) {
3303 if (backref->found_dir_index || backref->found_back_ref ||
3304 backref->found_forward_ref) {
3305 if (backref->index != index)
3306 backref->errors |= REF_ERR_INDEX_UNMATCH;
3308 backref->index = index;
3312 if (item_type == BTRFS_DIR_ITEM_KEY) {
3313 if (backref->found_forward_ref)
3315 backref->found_dir_item = 1;
3316 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3317 backref->found_dir_index = 1;
3318 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3319 if (backref->found_forward_ref)
3320 backref->errors |= REF_ERR_DUP_ROOT_REF;
3321 else if (backref->found_dir_item)
3323 backref->found_forward_ref = 1;
3324 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3325 if (backref->found_back_ref)
3326 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3327 backref->found_back_ref = 1;
3332 if (backref->found_forward_ref && backref->found_dir_item)
3333 backref->reachable = 1;
3337 static int merge_root_recs(struct btrfs_root *root,
3338 struct cache_tree *src_cache,
3339 struct cache_tree *dst_cache)
3341 struct cache_extent *cache;
3342 struct ptr_node *node;
3343 struct inode_record *rec;
3344 struct inode_backref *backref;
3347 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3348 free_inode_recs_tree(src_cache);
3353 cache = search_cache_extent(src_cache, 0);
3356 node = container_of(cache, struct ptr_node, cache);
3358 remove_cache_extent(src_cache, &node->cache);
3361 ret = is_child_root(root, root->objectid, rec->ino);
3367 list_for_each_entry(backref, &rec->backrefs, list) {
3368 BUG_ON(backref->found_inode_ref);
3369 if (backref->found_dir_item)
3370 add_root_backref(dst_cache, rec->ino,
3371 root->root_key.objectid, backref->dir,
3372 backref->index, backref->name,
3373 backref->namelen, BTRFS_DIR_ITEM_KEY,
3375 if (backref->found_dir_index)
3376 add_root_backref(dst_cache, rec->ino,
3377 root->root_key.objectid, backref->dir,
3378 backref->index, backref->name,
3379 backref->namelen, BTRFS_DIR_INDEX_KEY,
3383 free_inode_rec(rec);
3390 static int check_root_refs(struct btrfs_root *root,
3391 struct cache_tree *root_cache)
3393 struct root_record *rec;
3394 struct root_record *ref_root;
3395 struct root_backref *backref;
3396 struct cache_extent *cache;
3402 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3403 BUG_ON(IS_ERR(rec));
3406 /* fixme: this can not detect circular references */
3409 cache = search_cache_extent(root_cache, 0);
3413 rec = container_of(cache, struct root_record, cache);
3414 cache = next_cache_extent(cache);
3416 if (rec->found_ref == 0)
3419 list_for_each_entry(backref, &rec->backrefs, list) {
3420 if (!backref->reachable)
3423 ref_root = get_root_rec(root_cache,
3425 BUG_ON(IS_ERR(ref_root));
3426 if (ref_root->found_ref > 0)
3429 backref->reachable = 0;
3431 if (rec->found_ref == 0)
3437 cache = search_cache_extent(root_cache, 0);
3441 rec = container_of(cache, struct root_record, cache);
3442 cache = next_cache_extent(cache);
3444 if (rec->found_ref == 0 &&
3445 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3446 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3447 ret = check_orphan_item(root->fs_info->tree_root,
3453 * If we don't have a root item then we likely just have
3454 * a dir item in a snapshot for this root but no actual
3455 * ref key or anything so it's meaningless.
3457 if (!rec->found_root_item)
3460 fprintf(stderr, "fs tree %llu not referenced\n",
3461 (unsigned long long)rec->objectid);
3465 if (rec->found_ref > 0 && !rec->found_root_item)
3467 list_for_each_entry(backref, &rec->backrefs, list) {
3468 if (!backref->found_dir_item)
3469 backref->errors |= REF_ERR_NO_DIR_ITEM;
3470 if (!backref->found_dir_index)
3471 backref->errors |= REF_ERR_NO_DIR_INDEX;
3472 if (!backref->found_back_ref)
3473 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3474 if (!backref->found_forward_ref)
3475 backref->errors |= REF_ERR_NO_ROOT_REF;
3476 if (backref->reachable && backref->errors)
3483 fprintf(stderr, "fs tree %llu refs %u %s\n",
3484 (unsigned long long)rec->objectid, rec->found_ref,
3485 rec->found_root_item ? "" : "not found");
3487 list_for_each_entry(backref, &rec->backrefs, list) {
3488 if (!backref->reachable)
3490 if (!backref->errors && rec->found_root_item)
3492 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3493 " index %llu namelen %u name %s errors %x\n",
3494 (unsigned long long)backref->ref_root,
3495 (unsigned long long)backref->dir,
3496 (unsigned long long)backref->index,
3497 backref->namelen, backref->name,
3499 print_ref_error(backref->errors);
3502 return errors > 0 ? 1 : 0;
3505 static int process_root_ref(struct extent_buffer *eb, int slot,
3506 struct btrfs_key *key,
3507 struct cache_tree *root_cache)
3513 struct btrfs_root_ref *ref;
3514 char namebuf[BTRFS_NAME_LEN];
3517 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3519 dirid = btrfs_root_ref_dirid(eb, ref);
3520 index = btrfs_root_ref_sequence(eb, ref);
3521 name_len = btrfs_root_ref_name_len(eb, ref);
3523 if (name_len <= BTRFS_NAME_LEN) {
3527 len = BTRFS_NAME_LEN;
3528 error = REF_ERR_NAME_TOO_LONG;
3530 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3532 if (key->type == BTRFS_ROOT_REF_KEY) {
3533 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3534 index, namebuf, len, key->type, error);
3536 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3537 index, namebuf, len, key->type, error);
3542 static void free_corrupt_block(struct cache_extent *cache)
3544 struct btrfs_corrupt_block *corrupt;
3546 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3550 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3553 * Repair the btree of the given root.
3555 * The fix is to remove the node key in corrupt_blocks cache_tree.
3556 * and rebalance the tree.
3557 * After the fix, the btree should be writeable.
3559 static int repair_btree(struct btrfs_root *root,
3560 struct cache_tree *corrupt_blocks)
3562 struct btrfs_trans_handle *trans;
3563 struct btrfs_path *path;
3564 struct btrfs_corrupt_block *corrupt;
3565 struct cache_extent *cache;
3566 struct btrfs_key key;
3571 if (cache_tree_empty(corrupt_blocks))
3574 path = btrfs_alloc_path();
3578 trans = btrfs_start_transaction(root, 1);
3579 if (IS_ERR(trans)) {
3580 ret = PTR_ERR(trans);
3581 fprintf(stderr, "Error starting transaction: %s\n",
3585 cache = first_cache_extent(corrupt_blocks);
3587 corrupt = container_of(cache, struct btrfs_corrupt_block,
3589 level = corrupt->level;
3590 path->lowest_level = level;
3591 key.objectid = corrupt->key.objectid;
3592 key.type = corrupt->key.type;
3593 key.offset = corrupt->key.offset;
3596 * Here we don't want to do any tree balance, since it may
3597 * cause a balance with corrupted brother leaf/node,
3598 * so ins_len set to 0 here.
3599 * Balance will be done after all corrupt node/leaf is deleted.
3601 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3604 offset = btrfs_node_blockptr(path->nodes[level],
3605 path->slots[level]);
3607 /* Remove the ptr */
3608 ret = btrfs_del_ptr(trans, root, path, level,
3609 path->slots[level]);
3613 * Remove the corresponding extent
3614 * return value is not concerned.
3616 btrfs_release_path(path);
3617 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3618 0, root->root_key.objectid,
3620 cache = next_cache_extent(cache);
3623 /* Balance the btree using btrfs_search_slot() */
3624 cache = first_cache_extent(corrupt_blocks);
3626 corrupt = container_of(cache, struct btrfs_corrupt_block,
3628 memcpy(&key, &corrupt->key, sizeof(key));
3629 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3632 /* return will always >0 since it won't find the item */
3634 btrfs_release_path(path);
3635 cache = next_cache_extent(cache);
3638 btrfs_commit_transaction(trans, root);
3640 btrfs_free_path(path);
3644 static int check_fs_root(struct btrfs_root *root,
3645 struct cache_tree *root_cache,
3646 struct walk_control *wc)
3652 struct btrfs_path path;
3653 struct shared_node root_node;
3654 struct root_record *rec;
3655 struct btrfs_root_item *root_item = &root->root_item;
3656 struct cache_tree corrupt_blocks;
3657 struct orphan_data_extent *orphan;
3658 struct orphan_data_extent *tmp;
3659 enum btrfs_tree_block_status status;
3660 struct node_refs nrefs;
3663 * Reuse the corrupt_block cache tree to record corrupted tree block
3665 * Unlike the usage in extent tree check, here we do it in a per
3666 * fs/subvol tree base.
3668 cache_tree_init(&corrupt_blocks);
3669 root->fs_info->corrupt_blocks = &corrupt_blocks;
3671 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3672 rec = get_root_rec(root_cache, root->root_key.objectid);
3673 BUG_ON(IS_ERR(rec));
3674 if (btrfs_root_refs(root_item) > 0)
3675 rec->found_root_item = 1;
3678 btrfs_init_path(&path);
3679 memset(&root_node, 0, sizeof(root_node));
3680 cache_tree_init(&root_node.root_cache);
3681 cache_tree_init(&root_node.inode_cache);
3682 memset(&nrefs, 0, sizeof(nrefs));
3684 /* Move the orphan extent record to corresponding inode_record */
3685 list_for_each_entry_safe(orphan, tmp,
3686 &root->orphan_data_extents, list) {
3687 struct inode_record *inode;
3689 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3691 BUG_ON(IS_ERR(inode));
3692 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3693 list_move(&orphan->list, &inode->orphan_extents);
3696 level = btrfs_header_level(root->node);
3697 memset(wc->nodes, 0, sizeof(wc->nodes));
3698 wc->nodes[level] = &root_node;
3699 wc->active_node = level;
3700 wc->root_level = level;
3702 /* We may not have checked the root block, lets do that now */
3703 if (btrfs_is_leaf(root->node))
3704 status = btrfs_check_leaf(root, NULL, root->node);
3706 status = btrfs_check_node(root, NULL, root->node);
3707 if (status != BTRFS_TREE_BLOCK_CLEAN)
3710 if (btrfs_root_refs(root_item) > 0 ||
3711 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3712 path.nodes[level] = root->node;
3713 extent_buffer_get(root->node);
3714 path.slots[level] = 0;
3716 struct btrfs_key key;
3717 struct btrfs_disk_key found_key;
3719 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3720 level = root_item->drop_level;
3721 path.lowest_level = level;
3722 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3725 btrfs_node_key(path.nodes[level], &found_key,
3727 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3728 sizeof(found_key)));
3732 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3738 wret = walk_up_tree(root, &path, wc, &level);
3745 btrfs_release_path(&path);
3747 if (!cache_tree_empty(&corrupt_blocks)) {
3748 struct cache_extent *cache;
3749 struct btrfs_corrupt_block *corrupt;
3751 printf("The following tree block(s) is corrupted in tree %llu:\n",
3752 root->root_key.objectid);
3753 cache = first_cache_extent(&corrupt_blocks);
3755 corrupt = container_of(cache,
3756 struct btrfs_corrupt_block,
3758 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3759 cache->start, corrupt->level,
3760 corrupt->key.objectid, corrupt->key.type,
3761 corrupt->key.offset);
3762 cache = next_cache_extent(cache);
3765 printf("Try to repair the btree for root %llu\n",
3766 root->root_key.objectid);
3767 ret = repair_btree(root, &corrupt_blocks);
3769 fprintf(stderr, "Failed to repair btree: %s\n",
3772 printf("Btree for root %llu is fixed\n",
3773 root->root_key.objectid);
3777 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3781 if (root_node.current) {
3782 root_node.current->checked = 1;
3783 maybe_free_inode_rec(&root_node.inode_cache,
3787 err = check_inode_recs(root, &root_node.inode_cache);
3791 free_corrupt_blocks_tree(&corrupt_blocks);
3792 root->fs_info->corrupt_blocks = NULL;
3793 free_orphan_data_extents(&root->orphan_data_extents);
3797 static int fs_root_objectid(u64 objectid)
3799 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3800 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3802 return is_fstree(objectid);
3805 static int check_fs_roots(struct btrfs_root *root,
3806 struct cache_tree *root_cache)
3808 struct btrfs_path path;
3809 struct btrfs_key key;
3810 struct walk_control wc;
3811 struct extent_buffer *leaf, *tree_node;
3812 struct btrfs_root *tmp_root;
3813 struct btrfs_root *tree_root = root->fs_info->tree_root;
3817 if (ctx.progress_enabled) {
3818 ctx.tp = TASK_FS_ROOTS;
3819 task_start(ctx.info);
3823 * Just in case we made any changes to the extent tree that weren't
3824 * reflected into the free space cache yet.
3827 reset_cached_block_groups(root->fs_info);
3828 memset(&wc, 0, sizeof(wc));
3829 cache_tree_init(&wc.shared);
3830 btrfs_init_path(&path);
3835 key.type = BTRFS_ROOT_ITEM_KEY;
3836 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3841 tree_node = tree_root->node;
3843 if (tree_node != tree_root->node) {
3844 free_root_recs_tree(root_cache);
3845 btrfs_release_path(&path);
3848 leaf = path.nodes[0];
3849 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3850 ret = btrfs_next_leaf(tree_root, &path);
3856 leaf = path.nodes[0];
3858 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3859 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3860 fs_root_objectid(key.objectid)) {
3861 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3862 tmp_root = btrfs_read_fs_root_no_cache(
3863 root->fs_info, &key);
3865 key.offset = (u64)-1;
3866 tmp_root = btrfs_read_fs_root(
3867 root->fs_info, &key);
3869 if (IS_ERR(tmp_root)) {
3873 ret = check_fs_root(tmp_root, root_cache, &wc);
3874 if (ret == -EAGAIN) {
3875 free_root_recs_tree(root_cache);
3876 btrfs_release_path(&path);
3881 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3882 btrfs_free_fs_root(tmp_root);
3883 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3884 key.type == BTRFS_ROOT_BACKREF_KEY) {
3885 process_root_ref(leaf, path.slots[0], &key,
3892 btrfs_release_path(&path);
3894 free_extent_cache_tree(&wc.shared);
3895 if (!cache_tree_empty(&wc.shared))
3896 fprintf(stderr, "warning line %d\n", __LINE__);
3898 task_stop(ctx.info);
3903 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3906 struct extent_backref *back;
3907 struct tree_backref *tback;
3908 struct data_backref *dback;
3912 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3913 back = rb_node_to_extent_backref(n);
3914 if (!back->found_extent_tree) {
3918 if (back->is_data) {
3919 dback = to_data_backref(back);
3920 fprintf(stderr, "Backref %llu %s %llu"
3921 " owner %llu offset %llu num_refs %lu"
3922 " not found in extent tree\n",
3923 (unsigned long long)rec->start,
3924 back->full_backref ?
3926 back->full_backref ?
3927 (unsigned long long)dback->parent:
3928 (unsigned long long)dback->root,
3929 (unsigned long long)dback->owner,
3930 (unsigned long long)dback->offset,
3931 (unsigned long)dback->num_refs);
3933 tback = to_tree_backref(back);
3934 fprintf(stderr, "Backref %llu parent %llu"
3935 " root %llu not found in extent tree\n",
3936 (unsigned long long)rec->start,
3937 (unsigned long long)tback->parent,
3938 (unsigned long long)tback->root);
3941 if (!back->is_data && !back->found_ref) {
3945 tback = to_tree_backref(back);
3946 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3947 (unsigned long long)rec->start,
3948 back->full_backref ? "parent" : "root",
3949 back->full_backref ?
3950 (unsigned long long)tback->parent :
3951 (unsigned long long)tback->root, back);
3953 if (back->is_data) {
3954 dback = to_data_backref(back);
3955 if (dback->found_ref != dback->num_refs) {
3959 fprintf(stderr, "Incorrect local backref count"
3960 " on %llu %s %llu owner %llu"
3961 " offset %llu found %u wanted %u back %p\n",
3962 (unsigned long long)rec->start,
3963 back->full_backref ?
3965 back->full_backref ?
3966 (unsigned long long)dback->parent:
3967 (unsigned long long)dback->root,
3968 (unsigned long long)dback->owner,
3969 (unsigned long long)dback->offset,
3970 dback->found_ref, dback->num_refs, back);
3972 if (dback->disk_bytenr != rec->start) {
3976 fprintf(stderr, "Backref disk bytenr does not"
3977 " match extent record, bytenr=%llu, "
3978 "ref bytenr=%llu\n",
3979 (unsigned long long)rec->start,
3980 (unsigned long long)dback->disk_bytenr);
3983 if (dback->bytes != rec->nr) {
3987 fprintf(stderr, "Backref bytes do not match "
3988 "extent backref, bytenr=%llu, ref "
3989 "bytes=%llu, backref bytes=%llu\n",
3990 (unsigned long long)rec->start,
3991 (unsigned long long)rec->nr,
3992 (unsigned long long)dback->bytes);
3995 if (!back->is_data) {
3998 dback = to_data_backref(back);
3999 found += dback->found_ref;
4002 if (found != rec->refs) {
4006 fprintf(stderr, "Incorrect global backref count "
4007 "on %llu found %llu wanted %llu\n",
4008 (unsigned long long)rec->start,
4009 (unsigned long long)found,
4010 (unsigned long long)rec->refs);
4016 static void __free_one_backref(struct rb_node *node)
4018 struct extent_backref *back = rb_node_to_extent_backref(node);
4023 static void free_all_extent_backrefs(struct extent_record *rec)
4025 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4028 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4029 struct cache_tree *extent_cache)
4031 struct cache_extent *cache;
4032 struct extent_record *rec;
4035 cache = first_cache_extent(extent_cache);
4038 rec = container_of(cache, struct extent_record, cache);
4039 remove_cache_extent(extent_cache, cache);
4040 free_all_extent_backrefs(rec);
4045 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4046 struct extent_record *rec)
4048 if (rec->content_checked && rec->owner_ref_checked &&
4049 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4050 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4051 !rec->bad_full_backref && !rec->crossing_stripes &&
4052 !rec->wrong_chunk_type) {
4053 remove_cache_extent(extent_cache, &rec->cache);
4054 free_all_extent_backrefs(rec);
4055 list_del_init(&rec->list);
4061 static int check_owner_ref(struct btrfs_root *root,
4062 struct extent_record *rec,
4063 struct extent_buffer *buf)
4065 struct extent_backref *node, *tmp;
4066 struct tree_backref *back;
4067 struct btrfs_root *ref_root;
4068 struct btrfs_key key;
4069 struct btrfs_path path;
4070 struct extent_buffer *parent;
4075 rbtree_postorder_for_each_entry_safe(node, tmp,
4076 &rec->backref_tree, node) {
4079 if (!node->found_ref)
4081 if (node->full_backref)
4083 back = to_tree_backref(node);
4084 if (btrfs_header_owner(buf) == back->root)
4087 BUG_ON(rec->is_root);
4089 /* try to find the block by search corresponding fs tree */
4090 key.objectid = btrfs_header_owner(buf);
4091 key.type = BTRFS_ROOT_ITEM_KEY;
4092 key.offset = (u64)-1;
4094 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4095 if (IS_ERR(ref_root))
4098 level = btrfs_header_level(buf);
4100 btrfs_item_key_to_cpu(buf, &key, 0);
4102 btrfs_node_key_to_cpu(buf, &key, 0);
4104 btrfs_init_path(&path);
4105 path.lowest_level = level + 1;
4106 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4110 parent = path.nodes[level + 1];
4111 if (parent && buf->start == btrfs_node_blockptr(parent,
4112 path.slots[level + 1]))
4115 btrfs_release_path(&path);
4116 return found ? 0 : 1;
4119 static int is_extent_tree_record(struct extent_record *rec)
4121 struct extent_backref *ref, *tmp;
4122 struct tree_backref *back;
4125 rbtree_postorder_for_each_entry_safe(ref, tmp,
4126 &rec->backref_tree, node) {
4129 back = to_tree_backref(ref);
4130 if (ref->full_backref)
4132 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4139 static int record_bad_block_io(struct btrfs_fs_info *info,
4140 struct cache_tree *extent_cache,
4143 struct extent_record *rec;
4144 struct cache_extent *cache;
4145 struct btrfs_key key;
4147 cache = lookup_cache_extent(extent_cache, start, len);
4151 rec = container_of(cache, struct extent_record, cache);
4152 if (!is_extent_tree_record(rec))
4155 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4156 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4159 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4160 struct extent_buffer *buf, int slot)
4162 if (btrfs_header_level(buf)) {
4163 struct btrfs_key_ptr ptr1, ptr2;
4165 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4166 sizeof(struct btrfs_key_ptr));
4167 read_extent_buffer(buf, &ptr2,
4168 btrfs_node_key_ptr_offset(slot + 1),
4169 sizeof(struct btrfs_key_ptr));
4170 write_extent_buffer(buf, &ptr1,
4171 btrfs_node_key_ptr_offset(slot + 1),
4172 sizeof(struct btrfs_key_ptr));
4173 write_extent_buffer(buf, &ptr2,
4174 btrfs_node_key_ptr_offset(slot),
4175 sizeof(struct btrfs_key_ptr));
4177 struct btrfs_disk_key key;
4178 btrfs_node_key(buf, &key, 0);
4179 btrfs_fixup_low_keys(root, path, &key,
4180 btrfs_header_level(buf) + 1);
4183 struct btrfs_item *item1, *item2;
4184 struct btrfs_key k1, k2;
4185 char *item1_data, *item2_data;
4186 u32 item1_offset, item2_offset, item1_size, item2_size;
4188 item1 = btrfs_item_nr(slot);
4189 item2 = btrfs_item_nr(slot + 1);
4190 btrfs_item_key_to_cpu(buf, &k1, slot);
4191 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4192 item1_offset = btrfs_item_offset(buf, item1);
4193 item2_offset = btrfs_item_offset(buf, item2);
4194 item1_size = btrfs_item_size(buf, item1);
4195 item2_size = btrfs_item_size(buf, item2);
4197 item1_data = malloc(item1_size);
4200 item2_data = malloc(item2_size);
4206 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4207 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4209 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4210 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4214 btrfs_set_item_offset(buf, item1, item2_offset);
4215 btrfs_set_item_offset(buf, item2, item1_offset);
4216 btrfs_set_item_size(buf, item1, item2_size);
4217 btrfs_set_item_size(buf, item2, item1_size);
4219 path->slots[0] = slot;
4220 btrfs_set_item_key_unsafe(root, path, &k2);
4221 path->slots[0] = slot + 1;
4222 btrfs_set_item_key_unsafe(root, path, &k1);
4227 static int fix_key_order(struct btrfs_trans_handle *trans,
4228 struct btrfs_root *root,
4229 struct btrfs_path *path)
4231 struct extent_buffer *buf;
4232 struct btrfs_key k1, k2;
4234 int level = path->lowest_level;
4237 buf = path->nodes[level];
4238 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4240 btrfs_node_key_to_cpu(buf, &k1, i);
4241 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4243 btrfs_item_key_to_cpu(buf, &k1, i);
4244 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4246 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4248 ret = swap_values(root, path, buf, i);
4251 btrfs_mark_buffer_dirty(buf);
4257 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4258 struct btrfs_root *root,
4259 struct btrfs_path *path,
4260 struct extent_buffer *buf, int slot)
4262 struct btrfs_key key;
4263 int nritems = btrfs_header_nritems(buf);
4265 btrfs_item_key_to_cpu(buf, &key, slot);
4267 /* These are all the keys we can deal with missing. */
4268 if (key.type != BTRFS_DIR_INDEX_KEY &&
4269 key.type != BTRFS_EXTENT_ITEM_KEY &&
4270 key.type != BTRFS_METADATA_ITEM_KEY &&
4271 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4272 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4275 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4276 (unsigned long long)key.objectid, key.type,
4277 (unsigned long long)key.offset, slot, buf->start);
4278 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4279 btrfs_item_nr_offset(slot + 1),
4280 sizeof(struct btrfs_item) *
4281 (nritems - slot - 1));
4282 btrfs_set_header_nritems(buf, nritems - 1);
4284 struct btrfs_disk_key disk_key;
4286 btrfs_item_key(buf, &disk_key, 0);
4287 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4289 btrfs_mark_buffer_dirty(buf);
4293 static int fix_item_offset(struct btrfs_trans_handle *trans,
4294 struct btrfs_root *root,
4295 struct btrfs_path *path)
4297 struct extent_buffer *buf;
4301 /* We should only get this for leaves */
4302 BUG_ON(path->lowest_level);
4303 buf = path->nodes[0];
4305 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4306 unsigned int shift = 0, offset;
4308 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4309 BTRFS_LEAF_DATA_SIZE(root)) {
4310 if (btrfs_item_end_nr(buf, i) >
4311 BTRFS_LEAF_DATA_SIZE(root)) {
4312 ret = delete_bogus_item(trans, root, path,
4316 fprintf(stderr, "item is off the end of the "
4317 "leaf, can't fix\n");
4321 shift = BTRFS_LEAF_DATA_SIZE(root) -
4322 btrfs_item_end_nr(buf, i);
4323 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4324 btrfs_item_offset_nr(buf, i - 1)) {
4325 if (btrfs_item_end_nr(buf, i) >
4326 btrfs_item_offset_nr(buf, i - 1)) {
4327 ret = delete_bogus_item(trans, root, path,
4331 fprintf(stderr, "items overlap, can't fix\n");
4335 shift = btrfs_item_offset_nr(buf, i - 1) -
4336 btrfs_item_end_nr(buf, i);
4341 printf("Shifting item nr %d by %u bytes in block %llu\n",
4342 i, shift, (unsigned long long)buf->start);
4343 offset = btrfs_item_offset_nr(buf, i);
4344 memmove_extent_buffer(buf,
4345 btrfs_leaf_data(buf) + offset + shift,
4346 btrfs_leaf_data(buf) + offset,
4347 btrfs_item_size_nr(buf, i));
4348 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4350 btrfs_mark_buffer_dirty(buf);
4354 * We may have moved things, in which case we want to exit so we don't
4355 * write those changes out. Once we have proper abort functionality in
4356 * progs this can be changed to something nicer.
4363 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4364 * then just return -EIO.
4366 static int try_to_fix_bad_block(struct btrfs_root *root,
4367 struct extent_buffer *buf,
4368 enum btrfs_tree_block_status status)
4370 struct btrfs_trans_handle *trans;
4371 struct ulist *roots;
4372 struct ulist_node *node;
4373 struct btrfs_root *search_root;
4374 struct btrfs_path *path;
4375 struct ulist_iterator iter;
4376 struct btrfs_key root_key, key;
4379 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4380 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4383 path = btrfs_alloc_path();
4387 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4390 btrfs_free_path(path);
4394 ULIST_ITER_INIT(&iter);
4395 while ((node = ulist_next(roots, &iter))) {
4396 root_key.objectid = node->val;
4397 root_key.type = BTRFS_ROOT_ITEM_KEY;
4398 root_key.offset = (u64)-1;
4400 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4407 trans = btrfs_start_transaction(search_root, 0);
4408 if (IS_ERR(trans)) {
4409 ret = PTR_ERR(trans);
4413 path->lowest_level = btrfs_header_level(buf);
4414 path->skip_check_block = 1;
4415 if (path->lowest_level)
4416 btrfs_node_key_to_cpu(buf, &key, 0);
4418 btrfs_item_key_to_cpu(buf, &key, 0);
4419 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4422 btrfs_commit_transaction(trans, search_root);
4425 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4426 ret = fix_key_order(trans, search_root, path);
4427 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4428 ret = fix_item_offset(trans, search_root, path);
4430 btrfs_commit_transaction(trans, search_root);
4433 btrfs_release_path(path);
4434 btrfs_commit_transaction(trans, search_root);
4437 btrfs_free_path(path);
4441 static int check_block(struct btrfs_root *root,
4442 struct cache_tree *extent_cache,
4443 struct extent_buffer *buf, u64 flags)
4445 struct extent_record *rec;
4446 struct cache_extent *cache;
4447 struct btrfs_key key;
4448 enum btrfs_tree_block_status status;
4452 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4455 rec = container_of(cache, struct extent_record, cache);
4456 rec->generation = btrfs_header_generation(buf);
4458 level = btrfs_header_level(buf);
4459 if (btrfs_header_nritems(buf) > 0) {
4462 btrfs_item_key_to_cpu(buf, &key, 0);
4464 btrfs_node_key_to_cpu(buf, &key, 0);
4466 rec->info_objectid = key.objectid;
4468 rec->info_level = level;
4470 if (btrfs_is_leaf(buf))
4471 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4473 status = btrfs_check_node(root, &rec->parent_key, buf);
4475 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4477 status = try_to_fix_bad_block(root, buf, status);
4478 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4480 fprintf(stderr, "bad block %llu\n",
4481 (unsigned long long)buf->start);
4484 * Signal to callers we need to start the scan over
4485 * again since we'll have cowed blocks.
4490 rec->content_checked = 1;
4491 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4492 rec->owner_ref_checked = 1;
4494 ret = check_owner_ref(root, rec, buf);
4496 rec->owner_ref_checked = 1;
4500 maybe_free_extent_rec(extent_cache, rec);
4505 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4506 u64 parent, u64 root)
4508 struct rb_node *node;
4509 struct tree_backref *back = NULL;
4510 struct tree_backref match = {
4517 match.parent = parent;
4518 match.node.full_backref = 1;
4523 node = rb_search(&rec->backref_tree, &match.node.node,
4524 (rb_compare_keys)compare_extent_backref, NULL);
4526 back = to_tree_backref(rb_node_to_extent_backref(node));
4531 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4532 u64 parent, u64 root)
4534 struct tree_backref *ref = malloc(sizeof(*ref));
4538 memset(&ref->node, 0, sizeof(ref->node));
4540 ref->parent = parent;
4541 ref->node.full_backref = 1;
4544 ref->node.full_backref = 0;
4546 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4551 static struct data_backref *find_data_backref(struct extent_record *rec,
4552 u64 parent, u64 root,
4553 u64 owner, u64 offset,
4555 u64 disk_bytenr, u64 bytes)
4557 struct rb_node *node;
4558 struct data_backref *back = NULL;
4559 struct data_backref match = {
4566 .found_ref = found_ref,
4567 .disk_bytenr = disk_bytenr,
4571 match.parent = parent;
4572 match.node.full_backref = 1;
4577 node = rb_search(&rec->backref_tree, &match.node.node,
4578 (rb_compare_keys)compare_extent_backref, NULL);
4580 back = to_data_backref(rb_node_to_extent_backref(node));
4585 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4586 u64 parent, u64 root,
4587 u64 owner, u64 offset,
4590 struct data_backref *ref = malloc(sizeof(*ref));
4594 memset(&ref->node, 0, sizeof(ref->node));
4595 ref->node.is_data = 1;
4598 ref->parent = parent;
4601 ref->node.full_backref = 1;
4605 ref->offset = offset;
4606 ref->node.full_backref = 0;
4608 ref->bytes = max_size;
4611 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4612 if (max_size > rec->max_size)
4613 rec->max_size = max_size;
4617 /* Check if the type of extent matches with its chunk */
4618 static void check_extent_type(struct extent_record *rec)
4620 struct btrfs_block_group_cache *bg_cache;
4622 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4626 /* data extent, check chunk directly*/
4627 if (!rec->metadata) {
4628 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4629 rec->wrong_chunk_type = 1;
4633 /* metadata extent, check the obvious case first */
4634 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4635 BTRFS_BLOCK_GROUP_METADATA))) {
4636 rec->wrong_chunk_type = 1;
4641 * Check SYSTEM extent, as it's also marked as metadata, we can only
4642 * make sure it's a SYSTEM extent by its backref
4644 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4645 struct extent_backref *node;
4646 struct tree_backref *tback;
4649 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4650 if (node->is_data) {
4651 /* tree block shouldn't have data backref */
4652 rec->wrong_chunk_type = 1;
4655 tback = container_of(node, struct tree_backref, node);
4657 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4658 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4660 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4661 if (!(bg_cache->flags & bg_type))
4662 rec->wrong_chunk_type = 1;
4667 * Allocate a new extent record, fill default values from @tmpl and insert int
4668 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4669 * the cache, otherwise it fails.
4671 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4672 struct extent_record *tmpl)
4674 struct extent_record *rec;
4677 rec = malloc(sizeof(*rec));
4680 rec->start = tmpl->start;
4681 rec->max_size = tmpl->max_size;
4682 rec->nr = max(tmpl->nr, tmpl->max_size);
4683 rec->found_rec = tmpl->found_rec;
4684 rec->content_checked = tmpl->content_checked;
4685 rec->owner_ref_checked = tmpl->owner_ref_checked;
4686 rec->num_duplicates = 0;
4687 rec->metadata = tmpl->metadata;
4688 rec->flag_block_full_backref = FLAG_UNSET;
4689 rec->bad_full_backref = 0;
4690 rec->crossing_stripes = 0;
4691 rec->wrong_chunk_type = 0;
4692 rec->is_root = tmpl->is_root;
4693 rec->refs = tmpl->refs;
4694 rec->extent_item_refs = tmpl->extent_item_refs;
4695 rec->parent_generation = tmpl->parent_generation;
4696 INIT_LIST_HEAD(&rec->backrefs);
4697 INIT_LIST_HEAD(&rec->dups);
4698 INIT_LIST_HEAD(&rec->list);
4699 rec->backref_tree = RB_ROOT;
4700 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4701 rec->cache.start = tmpl->start;
4702 rec->cache.size = tmpl->nr;
4703 ret = insert_cache_extent(extent_cache, &rec->cache);
4705 bytes_used += rec->nr;
4708 rec->crossing_stripes = check_crossing_stripes(rec->start,
4709 global_info->tree_root->nodesize);
4710 check_extent_type(rec);
4715 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4717 * - refs - if found, increase refs
4718 * - is_root - if found, set
4719 * - content_checked - if found, set
4720 * - owner_ref_checked - if found, set
4722 * If not found, create a new one, initialize and insert.
4724 static int add_extent_rec(struct cache_tree *extent_cache,
4725 struct extent_record *tmpl)
4727 struct extent_record *rec;
4728 struct cache_extent *cache;
4732 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4734 rec = container_of(cache, struct extent_record, cache);
4738 rec->nr = max(tmpl->nr, tmpl->max_size);
4741 * We need to make sure to reset nr to whatever the extent
4742 * record says was the real size, this way we can compare it to
4745 if (tmpl->found_rec) {
4746 if (tmpl->start != rec->start || rec->found_rec) {
4747 struct extent_record *tmp;
4750 if (list_empty(&rec->list))
4751 list_add_tail(&rec->list,
4752 &duplicate_extents);
4755 * We have to do this song and dance in case we
4756 * find an extent record that falls inside of
4757 * our current extent record but does not have
4758 * the same objectid.
4760 tmp = malloc(sizeof(*tmp));
4763 tmp->start = tmpl->start;
4764 tmp->max_size = tmpl->max_size;
4767 tmp->metadata = tmpl->metadata;
4768 tmp->extent_item_refs = tmpl->extent_item_refs;
4769 INIT_LIST_HEAD(&tmp->list);
4770 list_add_tail(&tmp->list, &rec->dups);
4771 rec->num_duplicates++;
4778 if (tmpl->extent_item_refs && !dup) {
4779 if (rec->extent_item_refs) {
4780 fprintf(stderr, "block %llu rec "
4781 "extent_item_refs %llu, passed %llu\n",
4782 (unsigned long long)tmpl->start,
4783 (unsigned long long)
4784 rec->extent_item_refs,
4785 (unsigned long long)tmpl->extent_item_refs);
4787 rec->extent_item_refs = tmpl->extent_item_refs;
4791 if (tmpl->content_checked)
4792 rec->content_checked = 1;
4793 if (tmpl->owner_ref_checked)
4794 rec->owner_ref_checked = 1;
4795 memcpy(&rec->parent_key, &tmpl->parent_key,
4796 sizeof(tmpl->parent_key));
4797 if (tmpl->parent_generation)
4798 rec->parent_generation = tmpl->parent_generation;
4799 if (rec->max_size < tmpl->max_size)
4800 rec->max_size = tmpl->max_size;
4803 * A metadata extent can't cross stripe_len boundary, otherwise
4804 * kernel scrub won't be able to handle it.
4805 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4809 rec->crossing_stripes = check_crossing_stripes(
4810 rec->start, global_info->tree_root->nodesize);
4811 check_extent_type(rec);
4812 maybe_free_extent_rec(extent_cache, rec);
4816 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4821 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4822 u64 parent, u64 root, int found_ref)
4824 struct extent_record *rec;
4825 struct tree_backref *back;
4826 struct cache_extent *cache;
4828 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4830 struct extent_record tmpl;
4832 memset(&tmpl, 0, sizeof(tmpl));
4833 tmpl.start = bytenr;
4837 add_extent_rec_nolookup(extent_cache, &tmpl);
4839 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4844 rec = container_of(cache, struct extent_record, cache);
4845 if (rec->start != bytenr) {
4849 back = find_tree_backref(rec, parent, root);
4851 back = alloc_tree_backref(rec, parent, root);
4856 if (back->node.found_ref) {
4857 fprintf(stderr, "Extent back ref already exists "
4858 "for %llu parent %llu root %llu \n",
4859 (unsigned long long)bytenr,
4860 (unsigned long long)parent,
4861 (unsigned long long)root);
4863 back->node.found_ref = 1;
4865 if (back->node.found_extent_tree) {
4866 fprintf(stderr, "Extent back ref already exists "
4867 "for %llu parent %llu root %llu \n",
4868 (unsigned long long)bytenr,
4869 (unsigned long long)parent,
4870 (unsigned long long)root);
4872 back->node.found_extent_tree = 1;
4874 check_extent_type(rec);
4875 maybe_free_extent_rec(extent_cache, rec);
4879 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4880 u64 parent, u64 root, u64 owner, u64 offset,
4881 u32 num_refs, int found_ref, u64 max_size)
4883 struct extent_record *rec;
4884 struct data_backref *back;
4885 struct cache_extent *cache;
4887 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4889 struct extent_record tmpl;
4891 memset(&tmpl, 0, sizeof(tmpl));
4892 tmpl.start = bytenr;
4894 tmpl.max_size = max_size;
4896 add_extent_rec_nolookup(extent_cache, &tmpl);
4898 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4903 rec = container_of(cache, struct extent_record, cache);
4904 if (rec->max_size < max_size)
4905 rec->max_size = max_size;
4908 * If found_ref is set then max_size is the real size and must match the
4909 * existing refs. So if we have already found a ref then we need to
4910 * make sure that this ref matches the existing one, otherwise we need
4911 * to add a new backref so we can notice that the backrefs don't match
4912 * and we need to figure out who is telling the truth. This is to
4913 * account for that awful fsync bug I introduced where we'd end up with
4914 * a btrfs_file_extent_item that would have its length include multiple
4915 * prealloc extents or point inside of a prealloc extent.
4917 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4920 back = alloc_data_backref(rec, parent, root, owner, offset,
4926 BUG_ON(num_refs != 1);
4927 if (back->node.found_ref)
4928 BUG_ON(back->bytes != max_size);
4929 back->node.found_ref = 1;
4930 back->found_ref += 1;
4931 back->bytes = max_size;
4932 back->disk_bytenr = bytenr;
4934 rec->content_checked = 1;
4935 rec->owner_ref_checked = 1;
4937 if (back->node.found_extent_tree) {
4938 fprintf(stderr, "Extent back ref already exists "
4939 "for %llu parent %llu root %llu "
4940 "owner %llu offset %llu num_refs %lu\n",
4941 (unsigned long long)bytenr,
4942 (unsigned long long)parent,
4943 (unsigned long long)root,
4944 (unsigned long long)owner,
4945 (unsigned long long)offset,
4946 (unsigned long)num_refs);
4948 back->num_refs = num_refs;
4949 back->node.found_extent_tree = 1;
4951 maybe_free_extent_rec(extent_cache, rec);
4955 static int add_pending(struct cache_tree *pending,
4956 struct cache_tree *seen, u64 bytenr, u32 size)
4959 ret = add_cache_extent(seen, bytenr, size);
4962 add_cache_extent(pending, bytenr, size);
4966 static int pick_next_pending(struct cache_tree *pending,
4967 struct cache_tree *reada,
4968 struct cache_tree *nodes,
4969 u64 last, struct block_info *bits, int bits_nr,
4972 unsigned long node_start = last;
4973 struct cache_extent *cache;
4976 cache = search_cache_extent(reada, 0);
4978 bits[0].start = cache->start;
4979 bits[0].size = cache->size;
4984 if (node_start > 32768)
4985 node_start -= 32768;
4987 cache = search_cache_extent(nodes, node_start);
4989 cache = search_cache_extent(nodes, 0);
4992 cache = search_cache_extent(pending, 0);
4997 bits[ret].start = cache->start;
4998 bits[ret].size = cache->size;
4999 cache = next_cache_extent(cache);
5001 } while (cache && ret < bits_nr);
5007 bits[ret].start = cache->start;
5008 bits[ret].size = cache->size;
5009 cache = next_cache_extent(cache);
5011 } while (cache && ret < bits_nr);
5013 if (bits_nr - ret > 8) {
5014 u64 lookup = bits[0].start + bits[0].size;
5015 struct cache_extent *next;
5016 next = search_cache_extent(pending, lookup);
5018 if (next->start - lookup > 32768)
5020 bits[ret].start = next->start;
5021 bits[ret].size = next->size;
5022 lookup = next->start + next->size;
5026 next = next_cache_extent(next);
5034 static void free_chunk_record(struct cache_extent *cache)
5036 struct chunk_record *rec;
5038 rec = container_of(cache, struct chunk_record, cache);
5039 list_del_init(&rec->list);
5040 list_del_init(&rec->dextents);
5044 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5046 cache_tree_free_extents(chunk_cache, free_chunk_record);
5049 static void free_device_record(struct rb_node *node)
5051 struct device_record *rec;
5053 rec = container_of(node, struct device_record, node);
5057 FREE_RB_BASED_TREE(device_cache, free_device_record);
5059 int insert_block_group_record(struct block_group_tree *tree,
5060 struct block_group_record *bg_rec)
5064 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5068 list_add_tail(&bg_rec->list, &tree->block_groups);
5072 static void free_block_group_record(struct cache_extent *cache)
5074 struct block_group_record *rec;
5076 rec = container_of(cache, struct block_group_record, cache);
5077 list_del_init(&rec->list);
5081 void free_block_group_tree(struct block_group_tree *tree)
5083 cache_tree_free_extents(&tree->tree, free_block_group_record);
5086 int insert_device_extent_record(struct device_extent_tree *tree,
5087 struct device_extent_record *de_rec)
5092 * Device extent is a bit different from the other extents, because
5093 * the extents which belong to the different devices may have the
5094 * same start and size, so we need use the special extent cache
5095 * search/insert functions.
5097 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5101 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5102 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5106 static void free_device_extent_record(struct cache_extent *cache)
5108 struct device_extent_record *rec;
5110 rec = container_of(cache, struct device_extent_record, cache);
5111 if (!list_empty(&rec->chunk_list))
5112 list_del_init(&rec->chunk_list);
5113 if (!list_empty(&rec->device_list))
5114 list_del_init(&rec->device_list);
5118 void free_device_extent_tree(struct device_extent_tree *tree)
5120 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5123 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5124 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5125 struct extent_buffer *leaf, int slot)
5127 struct btrfs_extent_ref_v0 *ref0;
5128 struct btrfs_key key;
5130 btrfs_item_key_to_cpu(leaf, &key, slot);
5131 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5132 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5133 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5135 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5136 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5142 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5143 struct btrfs_key *key,
5146 struct btrfs_chunk *ptr;
5147 struct chunk_record *rec;
5150 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5151 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5153 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5155 fprintf(stderr, "memory allocation failed\n");
5159 INIT_LIST_HEAD(&rec->list);
5160 INIT_LIST_HEAD(&rec->dextents);
5163 rec->cache.start = key->offset;
5164 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5166 rec->generation = btrfs_header_generation(leaf);
5168 rec->objectid = key->objectid;
5169 rec->type = key->type;
5170 rec->offset = key->offset;
5172 rec->length = rec->cache.size;
5173 rec->owner = btrfs_chunk_owner(leaf, ptr);
5174 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5175 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5176 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5177 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5178 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5179 rec->num_stripes = num_stripes;
5180 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5182 for (i = 0; i < rec->num_stripes; ++i) {
5183 rec->stripes[i].devid =
5184 btrfs_stripe_devid_nr(leaf, ptr, i);
5185 rec->stripes[i].offset =
5186 btrfs_stripe_offset_nr(leaf, ptr, i);
5187 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5188 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5195 static int process_chunk_item(struct cache_tree *chunk_cache,
5196 struct btrfs_key *key, struct extent_buffer *eb,
5199 struct chunk_record *rec;
5202 rec = btrfs_new_chunk_record(eb, key, slot);
5203 ret = insert_cache_extent(chunk_cache, &rec->cache);
5205 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5206 rec->offset, rec->length);
5213 static int process_device_item(struct rb_root *dev_cache,
5214 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5216 struct btrfs_dev_item *ptr;
5217 struct device_record *rec;
5220 ptr = btrfs_item_ptr(eb,
5221 slot, struct btrfs_dev_item);
5223 rec = malloc(sizeof(*rec));
5225 fprintf(stderr, "memory allocation failed\n");
5229 rec->devid = key->offset;
5230 rec->generation = btrfs_header_generation(eb);
5232 rec->objectid = key->objectid;
5233 rec->type = key->type;
5234 rec->offset = key->offset;
5236 rec->devid = btrfs_device_id(eb, ptr);
5237 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5238 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5240 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5242 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5249 struct block_group_record *
5250 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5253 struct btrfs_block_group_item *ptr;
5254 struct block_group_record *rec;
5256 rec = calloc(1, sizeof(*rec));
5258 fprintf(stderr, "memory allocation failed\n");
5262 rec->cache.start = key->objectid;
5263 rec->cache.size = key->offset;
5265 rec->generation = btrfs_header_generation(leaf);
5267 rec->objectid = key->objectid;
5268 rec->type = key->type;
5269 rec->offset = key->offset;
5271 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5272 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5274 INIT_LIST_HEAD(&rec->list);
5279 static int process_block_group_item(struct block_group_tree *block_group_cache,
5280 struct btrfs_key *key,
5281 struct extent_buffer *eb, int slot)
5283 struct block_group_record *rec;
5286 rec = btrfs_new_block_group_record(eb, key, slot);
5287 ret = insert_block_group_record(block_group_cache, rec);
5289 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5290 rec->objectid, rec->offset);
5297 struct device_extent_record *
5298 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5299 struct btrfs_key *key, int slot)
5301 struct device_extent_record *rec;
5302 struct btrfs_dev_extent *ptr;
5304 rec = calloc(1, sizeof(*rec));
5306 fprintf(stderr, "memory allocation failed\n");
5310 rec->cache.objectid = key->objectid;
5311 rec->cache.start = key->offset;
5313 rec->generation = btrfs_header_generation(leaf);
5315 rec->objectid = key->objectid;
5316 rec->type = key->type;
5317 rec->offset = key->offset;
5319 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5320 rec->chunk_objecteid =
5321 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5323 btrfs_dev_extent_chunk_offset(leaf, ptr);
5324 rec->length = btrfs_dev_extent_length(leaf, ptr);
5325 rec->cache.size = rec->length;
5327 INIT_LIST_HEAD(&rec->chunk_list);
5328 INIT_LIST_HEAD(&rec->device_list);
5334 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5335 struct btrfs_key *key, struct extent_buffer *eb,
5338 struct device_extent_record *rec;
5341 rec = btrfs_new_device_extent_record(eb, key, slot);
5342 ret = insert_device_extent_record(dev_extent_cache, rec);
5345 "Device extent[%llu, %llu, %llu] existed.\n",
5346 rec->objectid, rec->offset, rec->length);
5353 static int process_extent_item(struct btrfs_root *root,
5354 struct cache_tree *extent_cache,
5355 struct extent_buffer *eb, int slot)
5357 struct btrfs_extent_item *ei;
5358 struct btrfs_extent_inline_ref *iref;
5359 struct btrfs_extent_data_ref *dref;
5360 struct btrfs_shared_data_ref *sref;
5361 struct btrfs_key key;
5362 struct extent_record tmpl;
5366 u32 item_size = btrfs_item_size_nr(eb, slot);
5372 btrfs_item_key_to_cpu(eb, &key, slot);
5374 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5376 num_bytes = root->nodesize;
5378 num_bytes = key.offset;
5381 if (item_size < sizeof(*ei)) {
5382 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5383 struct btrfs_extent_item_v0 *ei0;
5384 BUG_ON(item_size != sizeof(*ei0));
5385 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5386 refs = btrfs_extent_refs_v0(eb, ei0);
5390 memset(&tmpl, 0, sizeof(tmpl));
5391 tmpl.start = key.objectid;
5392 tmpl.nr = num_bytes;
5393 tmpl.extent_item_refs = refs;
5394 tmpl.metadata = metadata;
5396 tmpl.max_size = num_bytes;
5398 return add_extent_rec(extent_cache, &tmpl);
5401 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5402 refs = btrfs_extent_refs(eb, ei);
5403 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5408 memset(&tmpl, 0, sizeof(tmpl));
5409 tmpl.start = key.objectid;
5410 tmpl.nr = num_bytes;
5411 tmpl.extent_item_refs = refs;
5412 tmpl.metadata = metadata;
5414 tmpl.max_size = num_bytes;
5415 add_extent_rec(extent_cache, &tmpl);
5417 ptr = (unsigned long)(ei + 1);
5418 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5419 key.type == BTRFS_EXTENT_ITEM_KEY)
5420 ptr += sizeof(struct btrfs_tree_block_info);
5422 end = (unsigned long)ei + item_size;
5424 iref = (struct btrfs_extent_inline_ref *)ptr;
5425 type = btrfs_extent_inline_ref_type(eb, iref);
5426 offset = btrfs_extent_inline_ref_offset(eb, iref);
5428 case BTRFS_TREE_BLOCK_REF_KEY:
5429 add_tree_backref(extent_cache, key.objectid,
5432 case BTRFS_SHARED_BLOCK_REF_KEY:
5433 add_tree_backref(extent_cache, key.objectid,
5436 case BTRFS_EXTENT_DATA_REF_KEY:
5437 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5438 add_data_backref(extent_cache, key.objectid, 0,
5439 btrfs_extent_data_ref_root(eb, dref),
5440 btrfs_extent_data_ref_objectid(eb,
5442 btrfs_extent_data_ref_offset(eb, dref),
5443 btrfs_extent_data_ref_count(eb, dref),
5446 case BTRFS_SHARED_DATA_REF_KEY:
5447 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5448 add_data_backref(extent_cache, key.objectid, offset,
5450 btrfs_shared_data_ref_count(eb, sref),
5454 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5455 key.objectid, key.type, num_bytes);
5458 ptr += btrfs_extent_inline_ref_size(type);
5465 static int check_cache_range(struct btrfs_root *root,
5466 struct btrfs_block_group_cache *cache,
5467 u64 offset, u64 bytes)
5469 struct btrfs_free_space *entry;
5475 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5476 bytenr = btrfs_sb_offset(i);
5477 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5478 cache->key.objectid, bytenr, 0,
5479 &logical, &nr, &stripe_len);
5484 if (logical[nr] + stripe_len <= offset)
5486 if (offset + bytes <= logical[nr])
5488 if (logical[nr] == offset) {
5489 if (stripe_len >= bytes) {
5493 bytes -= stripe_len;
5494 offset += stripe_len;
5495 } else if (logical[nr] < offset) {
5496 if (logical[nr] + stripe_len >=
5501 bytes = (offset + bytes) -
5502 (logical[nr] + stripe_len);
5503 offset = logical[nr] + stripe_len;
5506 * Could be tricky, the super may land in the
5507 * middle of the area we're checking. First
5508 * check the easiest case, it's at the end.
5510 if (logical[nr] + stripe_len >=
5512 bytes = logical[nr] - offset;
5516 /* Check the left side */
5517 ret = check_cache_range(root, cache,
5519 logical[nr] - offset);
5525 /* Now we continue with the right side */
5526 bytes = (offset + bytes) -
5527 (logical[nr] + stripe_len);
5528 offset = logical[nr] + stripe_len;
5535 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5537 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5538 offset, offset+bytes);
5542 if (entry->offset != offset) {
5543 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5548 if (entry->bytes != bytes) {
5549 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5550 bytes, entry->bytes, offset);
5554 unlink_free_space(cache->free_space_ctl, entry);
5559 static int verify_space_cache(struct btrfs_root *root,
5560 struct btrfs_block_group_cache *cache)
5562 struct btrfs_path *path;
5563 struct extent_buffer *leaf;
5564 struct btrfs_key key;
5568 path = btrfs_alloc_path();
5572 root = root->fs_info->extent_root;
5574 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5576 key.objectid = last;
5578 key.type = BTRFS_EXTENT_ITEM_KEY;
5580 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5585 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5586 ret = btrfs_next_leaf(root, path);
5594 leaf = path->nodes[0];
5595 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5596 if (key.objectid >= cache->key.offset + cache->key.objectid)
5598 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5599 key.type != BTRFS_METADATA_ITEM_KEY) {
5604 if (last == key.objectid) {
5605 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5606 last = key.objectid + key.offset;
5608 last = key.objectid + root->nodesize;
5613 ret = check_cache_range(root, cache, last,
5614 key.objectid - last);
5617 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5618 last = key.objectid + key.offset;
5620 last = key.objectid + root->nodesize;
5624 if (last < cache->key.objectid + cache->key.offset)
5625 ret = check_cache_range(root, cache, last,
5626 cache->key.objectid +
5627 cache->key.offset - last);
5630 btrfs_free_path(path);
5633 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5634 fprintf(stderr, "There are still entries left in the space "
5642 static int check_space_cache(struct btrfs_root *root)
5644 struct btrfs_block_group_cache *cache;
5645 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5649 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5650 btrfs_super_generation(root->fs_info->super_copy) !=
5651 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5652 printf("cache and super generation don't match, space cache "
5653 "will be invalidated\n");
5657 if (ctx.progress_enabled) {
5658 ctx.tp = TASK_FREE_SPACE;
5659 task_start(ctx.info);
5663 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5667 start = cache->key.objectid + cache->key.offset;
5668 if (!cache->free_space_ctl) {
5669 if (btrfs_init_free_space_ctl(cache,
5670 root->sectorsize)) {
5675 btrfs_remove_free_space_cache(cache);
5678 if (btrfs_fs_compat_ro(root->fs_info,
5679 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5680 ret = exclude_super_stripes(root, cache);
5682 fprintf(stderr, "could not exclude super stripes: %s\n",
5687 ret = load_free_space_tree(root->fs_info, cache);
5688 free_excluded_extents(root, cache);
5690 fprintf(stderr, "could not load free space tree: %s\n",
5697 ret = load_free_space_cache(root->fs_info, cache);
5702 ret = verify_space_cache(root, cache);
5704 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5705 cache->key.objectid);
5710 task_stop(ctx.info);
5712 return error ? -EINVAL : 0;
5715 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5716 u64 num_bytes, unsigned long leaf_offset,
5717 struct extent_buffer *eb) {
5720 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5722 unsigned long csum_offset;
5726 u64 data_checked = 0;
5732 if (num_bytes % root->sectorsize)
5735 data = malloc(num_bytes);
5739 while (offset < num_bytes) {
5742 read_len = num_bytes - offset;
5743 /* read as much space once a time */
5744 ret = read_extent_data(root, data + offset,
5745 bytenr + offset, &read_len, mirror);
5749 /* verify every 4k data's checksum */
5750 while (data_checked < read_len) {
5752 tmp = offset + data_checked;
5754 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5755 csum, root->sectorsize);
5756 btrfs_csum_final(csum, (char *)&csum);
5758 csum_offset = leaf_offset +
5759 tmp / root->sectorsize * csum_size;
5760 read_extent_buffer(eb, (char *)&csum_expected,
5761 csum_offset, csum_size);
5762 /* try another mirror */
5763 if (csum != csum_expected) {
5764 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5765 mirror, bytenr + tmp,
5766 csum, csum_expected);
5767 num_copies = btrfs_num_copies(
5768 &root->fs_info->mapping_tree,
5770 if (mirror < num_copies - 1) {
5775 data_checked += root->sectorsize;
5784 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5787 struct btrfs_path *path;
5788 struct extent_buffer *leaf;
5789 struct btrfs_key key;
5792 path = btrfs_alloc_path();
5794 fprintf(stderr, "Error allocating path\n");
5798 key.objectid = bytenr;
5799 key.type = BTRFS_EXTENT_ITEM_KEY;
5800 key.offset = (u64)-1;
5803 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5806 fprintf(stderr, "Error looking up extent record %d\n", ret);
5807 btrfs_free_path(path);
5810 if (path->slots[0] > 0) {
5813 ret = btrfs_prev_leaf(root, path);
5816 } else if (ret > 0) {
5823 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5826 * Block group items come before extent items if they have the same
5827 * bytenr, so walk back one more just in case. Dear future traveller,
5828 * first congrats on mastering time travel. Now if it's not too much
5829 * trouble could you go back to 2006 and tell Chris to make the
5830 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5831 * EXTENT_ITEM_KEY please?
5833 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5834 if (path->slots[0] > 0) {
5837 ret = btrfs_prev_leaf(root, path);
5840 } else if (ret > 0) {
5845 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5849 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5850 ret = btrfs_next_leaf(root, path);
5852 fprintf(stderr, "Error going to next leaf "
5854 btrfs_free_path(path);
5860 leaf = path->nodes[0];
5861 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5862 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5866 if (key.objectid + key.offset < bytenr) {
5870 if (key.objectid > bytenr + num_bytes)
5873 if (key.objectid == bytenr) {
5874 if (key.offset >= num_bytes) {
5878 num_bytes -= key.offset;
5879 bytenr += key.offset;
5880 } else if (key.objectid < bytenr) {
5881 if (key.objectid + key.offset >= bytenr + num_bytes) {
5885 num_bytes = (bytenr + num_bytes) -
5886 (key.objectid + key.offset);
5887 bytenr = key.objectid + key.offset;
5889 if (key.objectid + key.offset < bytenr + num_bytes) {
5890 u64 new_start = key.objectid + key.offset;
5891 u64 new_bytes = bytenr + num_bytes - new_start;
5894 * Weird case, the extent is in the middle of
5895 * our range, we'll have to search one side
5896 * and then the other. Not sure if this happens
5897 * in real life, but no harm in coding it up
5898 * anyway just in case.
5900 btrfs_release_path(path);
5901 ret = check_extent_exists(root, new_start,
5904 fprintf(stderr, "Right section didn't "
5908 num_bytes = key.objectid - bytenr;
5911 num_bytes = key.objectid - bytenr;
5918 if (num_bytes && !ret) {
5919 fprintf(stderr, "There are no extents for csum range "
5920 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5924 btrfs_free_path(path);
5928 static int check_csums(struct btrfs_root *root)
5930 struct btrfs_path *path;
5931 struct extent_buffer *leaf;
5932 struct btrfs_key key;
5933 u64 offset = 0, num_bytes = 0;
5934 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5938 unsigned long leaf_offset;
5940 root = root->fs_info->csum_root;
5941 if (!extent_buffer_uptodate(root->node)) {
5942 fprintf(stderr, "No valid csum tree found\n");
5946 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5947 key.type = BTRFS_EXTENT_CSUM_KEY;
5950 path = btrfs_alloc_path();
5954 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5956 fprintf(stderr, "Error searching csum tree %d\n", ret);
5957 btrfs_free_path(path);
5961 if (ret > 0 && path->slots[0])
5966 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5967 ret = btrfs_next_leaf(root, path);
5969 fprintf(stderr, "Error going to next leaf "
5976 leaf = path->nodes[0];
5978 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5979 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5984 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5985 csum_size) * root->sectorsize;
5986 if (!check_data_csum)
5987 goto skip_csum_check;
5988 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5989 ret = check_extent_csums(root, key.offset, data_len,
5995 offset = key.offset;
5996 } else if (key.offset != offset + num_bytes) {
5997 ret = check_extent_exists(root, offset, num_bytes);
5999 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6000 "there is no extent record\n",
6001 offset, offset+num_bytes);
6004 offset = key.offset;
6007 num_bytes += data_len;
6011 btrfs_free_path(path);
6015 static int is_dropped_key(struct btrfs_key *key,
6016 struct btrfs_key *drop_key) {
6017 if (key->objectid < drop_key->objectid)
6019 else if (key->objectid == drop_key->objectid) {
6020 if (key->type < drop_key->type)
6022 else if (key->type == drop_key->type) {
6023 if (key->offset < drop_key->offset)
6031 * Here are the rules for FULL_BACKREF.
6033 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6034 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6036 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6037 * if it happened after the relocation occurred since we'll have dropped the
6038 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6039 * have no real way to know for sure.
6041 * We process the blocks one root at a time, and we start from the lowest root
6042 * objectid and go to the highest. So we can just lookup the owner backref for
6043 * the record and if we don't find it then we know it doesn't exist and we have
6046 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6047 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6048 * be set or not and then we can check later once we've gathered all the refs.
6050 static int calc_extent_flag(struct btrfs_root *root,
6051 struct cache_tree *extent_cache,
6052 struct extent_buffer *buf,
6053 struct root_item_record *ri,
6056 struct extent_record *rec;
6057 struct cache_extent *cache;
6058 struct tree_backref *tback;
6061 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6062 /* we have added this extent before */
6064 rec = container_of(cache, struct extent_record, cache);
6067 * Except file/reloc tree, we can not have
6070 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6075 if (buf->start == ri->bytenr)
6078 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6081 owner = btrfs_header_owner(buf);
6082 if (owner == ri->objectid)
6085 tback = find_tree_backref(rec, 0, owner);
6090 if (rec->flag_block_full_backref != FLAG_UNSET &&
6091 rec->flag_block_full_backref != 0)
6092 rec->bad_full_backref = 1;
6095 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6096 if (rec->flag_block_full_backref != FLAG_UNSET &&
6097 rec->flag_block_full_backref != 1)
6098 rec->bad_full_backref = 1;
6102 static int run_next_block(struct btrfs_root *root,
6103 struct block_info *bits,
6106 struct cache_tree *pending,
6107 struct cache_tree *seen,
6108 struct cache_tree *reada,
6109 struct cache_tree *nodes,
6110 struct cache_tree *extent_cache,
6111 struct cache_tree *chunk_cache,
6112 struct rb_root *dev_cache,
6113 struct block_group_tree *block_group_cache,
6114 struct device_extent_tree *dev_extent_cache,
6115 struct root_item_record *ri)
6117 struct extent_buffer *buf;
6118 struct extent_record *rec = NULL;
6129 struct btrfs_key key;
6130 struct cache_extent *cache;
6133 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6134 bits_nr, &reada_bits);
6139 for(i = 0; i < nritems; i++) {
6140 ret = add_cache_extent(reada, bits[i].start,
6145 /* fixme, get the parent transid */
6146 readahead_tree_block(root, bits[i].start,
6150 *last = bits[0].start;
6151 bytenr = bits[0].start;
6152 size = bits[0].size;
6154 cache = lookup_cache_extent(pending, bytenr, size);
6156 remove_cache_extent(pending, cache);
6159 cache = lookup_cache_extent(reada, bytenr, size);
6161 remove_cache_extent(reada, cache);
6164 cache = lookup_cache_extent(nodes, bytenr, size);
6166 remove_cache_extent(nodes, cache);
6169 cache = lookup_cache_extent(extent_cache, bytenr, size);
6171 rec = container_of(cache, struct extent_record, cache);
6172 gen = rec->parent_generation;
6175 /* fixme, get the real parent transid */
6176 buf = read_tree_block(root, bytenr, size, gen);
6177 if (!extent_buffer_uptodate(buf)) {
6178 record_bad_block_io(root->fs_info,
6179 extent_cache, bytenr, size);
6183 nritems = btrfs_header_nritems(buf);
6186 if (!init_extent_tree) {
6187 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6188 btrfs_header_level(buf), 1, NULL,
6191 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6193 fprintf(stderr, "Couldn't calc extent flags\n");
6194 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6199 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6201 fprintf(stderr, "Couldn't calc extent flags\n");
6202 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6206 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6208 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6209 ri->objectid == btrfs_header_owner(buf)) {
6211 * Ok we got to this block from it's original owner and
6212 * we have FULL_BACKREF set. Relocation can leave
6213 * converted blocks over so this is altogether possible,
6214 * however it's not possible if the generation > the
6215 * last snapshot, so check for this case.
6217 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6218 btrfs_header_generation(buf) > ri->last_snapshot) {
6219 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6220 rec->bad_full_backref = 1;
6225 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6226 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6227 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6228 rec->bad_full_backref = 1;
6232 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6233 rec->flag_block_full_backref = 1;
6237 rec->flag_block_full_backref = 0;
6239 owner = btrfs_header_owner(buf);
6242 ret = check_block(root, extent_cache, buf, flags);
6246 if (btrfs_is_leaf(buf)) {
6247 btree_space_waste += btrfs_leaf_free_space(root, buf);
6248 for (i = 0; i < nritems; i++) {
6249 struct btrfs_file_extent_item *fi;
6250 btrfs_item_key_to_cpu(buf, &key, i);
6251 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6252 process_extent_item(root, extent_cache, buf,
6256 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6257 process_extent_item(root, extent_cache, buf,
6261 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6263 btrfs_item_size_nr(buf, i);
6266 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6267 process_chunk_item(chunk_cache, &key, buf, i);
6270 if (key.type == BTRFS_DEV_ITEM_KEY) {
6271 process_device_item(dev_cache, &key, buf, i);
6274 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6275 process_block_group_item(block_group_cache,
6279 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6280 process_device_extent_item(dev_extent_cache,
6285 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6286 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6287 process_extent_ref_v0(extent_cache, buf, i);
6294 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6295 add_tree_backref(extent_cache, key.objectid, 0,
6299 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6300 add_tree_backref(extent_cache, key.objectid,
6304 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6305 struct btrfs_extent_data_ref *ref;
6306 ref = btrfs_item_ptr(buf, i,
6307 struct btrfs_extent_data_ref);
6308 add_data_backref(extent_cache,
6310 btrfs_extent_data_ref_root(buf, ref),
6311 btrfs_extent_data_ref_objectid(buf,
6313 btrfs_extent_data_ref_offset(buf, ref),
6314 btrfs_extent_data_ref_count(buf, ref),
6315 0, root->sectorsize);
6318 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6319 struct btrfs_shared_data_ref *ref;
6320 ref = btrfs_item_ptr(buf, i,
6321 struct btrfs_shared_data_ref);
6322 add_data_backref(extent_cache,
6323 key.objectid, key.offset, 0, 0, 0,
6324 btrfs_shared_data_ref_count(buf, ref),
6325 0, root->sectorsize);
6328 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6329 struct bad_item *bad;
6331 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6335 bad = malloc(sizeof(struct bad_item));
6338 INIT_LIST_HEAD(&bad->list);
6339 memcpy(&bad->key, &key,
6340 sizeof(struct btrfs_key));
6341 bad->root_id = owner;
6342 list_add_tail(&bad->list, &delete_items);
6345 if (key.type != BTRFS_EXTENT_DATA_KEY)
6347 fi = btrfs_item_ptr(buf, i,
6348 struct btrfs_file_extent_item);
6349 if (btrfs_file_extent_type(buf, fi) ==
6350 BTRFS_FILE_EXTENT_INLINE)
6352 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6355 data_bytes_allocated +=
6356 btrfs_file_extent_disk_num_bytes(buf, fi);
6357 if (data_bytes_allocated < root->sectorsize) {
6360 data_bytes_referenced +=
6361 btrfs_file_extent_num_bytes(buf, fi);
6362 add_data_backref(extent_cache,
6363 btrfs_file_extent_disk_bytenr(buf, fi),
6364 parent, owner, key.objectid, key.offset -
6365 btrfs_file_extent_offset(buf, fi), 1, 1,
6366 btrfs_file_extent_disk_num_bytes(buf, fi));
6370 struct btrfs_key first_key;
6372 first_key.objectid = 0;
6375 btrfs_item_key_to_cpu(buf, &first_key, 0);
6376 level = btrfs_header_level(buf);
6377 for (i = 0; i < nritems; i++) {
6378 struct extent_record tmpl;
6380 ptr = btrfs_node_blockptr(buf, i);
6381 size = root->nodesize;
6382 btrfs_node_key_to_cpu(buf, &key, i);
6384 if ((level == ri->drop_level)
6385 && is_dropped_key(&key, &ri->drop_key)) {
6390 memset(&tmpl, 0, sizeof(tmpl));
6391 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6392 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6397 tmpl.max_size = size;
6398 ret = add_extent_rec(extent_cache, &tmpl);
6401 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6404 add_pending(nodes, seen, ptr, size);
6406 add_pending(pending, seen, ptr, size);
6409 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6410 nritems) * sizeof(struct btrfs_key_ptr);
6412 total_btree_bytes += buf->len;
6413 if (fs_root_objectid(btrfs_header_owner(buf)))
6414 total_fs_tree_bytes += buf->len;
6415 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6416 total_extent_tree_bytes += buf->len;
6417 if (!found_old_backref &&
6418 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6419 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6420 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6421 found_old_backref = 1;
6423 free_extent_buffer(buf);
6427 static int add_root_to_pending(struct extent_buffer *buf,
6428 struct cache_tree *extent_cache,
6429 struct cache_tree *pending,
6430 struct cache_tree *seen,
6431 struct cache_tree *nodes,
6434 struct extent_record tmpl;
6436 if (btrfs_header_level(buf) > 0)
6437 add_pending(nodes, seen, buf->start, buf->len);
6439 add_pending(pending, seen, buf->start, buf->len);
6441 memset(&tmpl, 0, sizeof(tmpl));
6442 tmpl.start = buf->start;
6447 tmpl.max_size = buf->len;
6448 add_extent_rec(extent_cache, &tmpl);
6450 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6451 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6452 add_tree_backref(extent_cache, buf->start, buf->start,
6455 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6459 /* as we fix the tree, we might be deleting blocks that
6460 * we're tracking for repair. This hook makes sure we
6461 * remove any backrefs for blocks as we are fixing them.
6463 static int free_extent_hook(struct btrfs_trans_handle *trans,
6464 struct btrfs_root *root,
6465 u64 bytenr, u64 num_bytes, u64 parent,
6466 u64 root_objectid, u64 owner, u64 offset,
6469 struct extent_record *rec;
6470 struct cache_extent *cache;
6472 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6474 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6475 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6479 rec = container_of(cache, struct extent_record, cache);
6481 struct data_backref *back;
6482 back = find_data_backref(rec, parent, root_objectid, owner,
6483 offset, 1, bytenr, num_bytes);
6486 if (back->node.found_ref) {
6487 back->found_ref -= refs_to_drop;
6489 rec->refs -= refs_to_drop;
6491 if (back->node.found_extent_tree) {
6492 back->num_refs -= refs_to_drop;
6493 if (rec->extent_item_refs)
6494 rec->extent_item_refs -= refs_to_drop;
6496 if (back->found_ref == 0)
6497 back->node.found_ref = 0;
6498 if (back->num_refs == 0)
6499 back->node.found_extent_tree = 0;
6501 if (!back->node.found_extent_tree && back->node.found_ref) {
6502 rb_erase(&back->node.node, &rec->backref_tree);
6506 struct tree_backref *back;
6507 back = find_tree_backref(rec, parent, root_objectid);
6510 if (back->node.found_ref) {
6513 back->node.found_ref = 0;
6515 if (back->node.found_extent_tree) {
6516 if (rec->extent_item_refs)
6517 rec->extent_item_refs--;
6518 back->node.found_extent_tree = 0;
6520 if (!back->node.found_extent_tree && back->node.found_ref) {
6521 rb_erase(&back->node.node, &rec->backref_tree);
6525 maybe_free_extent_rec(extent_cache, rec);
6530 static int delete_extent_records(struct btrfs_trans_handle *trans,
6531 struct btrfs_root *root,
6532 struct btrfs_path *path,
6533 u64 bytenr, u64 new_len)
6535 struct btrfs_key key;
6536 struct btrfs_key found_key;
6537 struct extent_buffer *leaf;
6542 key.objectid = bytenr;
6544 key.offset = (u64)-1;
6547 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6554 if (path->slots[0] == 0)
6560 leaf = path->nodes[0];
6561 slot = path->slots[0];
6563 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6564 if (found_key.objectid != bytenr)
6567 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6568 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6569 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6570 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6571 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6572 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6573 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6574 btrfs_release_path(path);
6575 if (found_key.type == 0) {
6576 if (found_key.offset == 0)
6578 key.offset = found_key.offset - 1;
6579 key.type = found_key.type;
6581 key.type = found_key.type - 1;
6582 key.offset = (u64)-1;
6586 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6587 found_key.objectid, found_key.type, found_key.offset);
6589 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6592 btrfs_release_path(path);
6594 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6595 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6596 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6597 found_key.offset : root->nodesize;
6599 ret = btrfs_update_block_group(trans, root, bytenr,
6606 btrfs_release_path(path);
6611 * for a single backref, this will allocate a new extent
6612 * and add the backref to it.
6614 static int record_extent(struct btrfs_trans_handle *trans,
6615 struct btrfs_fs_info *info,
6616 struct btrfs_path *path,
6617 struct extent_record *rec,
6618 struct extent_backref *back,
6619 int allocated, u64 flags)
6622 struct btrfs_root *extent_root = info->extent_root;
6623 struct extent_buffer *leaf;
6624 struct btrfs_key ins_key;
6625 struct btrfs_extent_item *ei;
6626 struct tree_backref *tback;
6627 struct data_backref *dback;
6628 struct btrfs_tree_block_info *bi;
6631 rec->max_size = max_t(u64, rec->max_size,
6632 info->extent_root->nodesize);
6635 u32 item_size = sizeof(*ei);
6638 item_size += sizeof(*bi);
6640 ins_key.objectid = rec->start;
6641 ins_key.offset = rec->max_size;
6642 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6644 ret = btrfs_insert_empty_item(trans, extent_root, path,
6645 &ins_key, item_size);
6649 leaf = path->nodes[0];
6650 ei = btrfs_item_ptr(leaf, path->slots[0],
6651 struct btrfs_extent_item);
6653 btrfs_set_extent_refs(leaf, ei, 0);
6654 btrfs_set_extent_generation(leaf, ei, rec->generation);
6656 if (back->is_data) {
6657 btrfs_set_extent_flags(leaf, ei,
6658 BTRFS_EXTENT_FLAG_DATA);
6660 struct btrfs_disk_key copy_key;;
6662 tback = to_tree_backref(back);
6663 bi = (struct btrfs_tree_block_info *)(ei + 1);
6664 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6667 btrfs_set_disk_key_objectid(©_key,
6668 rec->info_objectid);
6669 btrfs_set_disk_key_type(©_key, 0);
6670 btrfs_set_disk_key_offset(©_key, 0);
6672 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6673 btrfs_set_tree_block_key(leaf, bi, ©_key);
6675 btrfs_set_extent_flags(leaf, ei,
6676 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6679 btrfs_mark_buffer_dirty(leaf);
6680 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6681 rec->max_size, 1, 0);
6684 btrfs_release_path(path);
6687 if (back->is_data) {
6691 dback = to_data_backref(back);
6692 if (back->full_backref)
6693 parent = dback->parent;
6697 for (i = 0; i < dback->found_ref; i++) {
6698 /* if parent != 0, we're doing a full backref
6699 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6700 * just makes the backref allocator create a data
6703 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6704 rec->start, rec->max_size,
6708 BTRFS_FIRST_FREE_OBJECTID :
6714 fprintf(stderr, "adding new data backref"
6715 " on %llu %s %llu owner %llu"
6716 " offset %llu found %d\n",
6717 (unsigned long long)rec->start,
6718 back->full_backref ?
6720 back->full_backref ?
6721 (unsigned long long)parent :
6722 (unsigned long long)dback->root,
6723 (unsigned long long)dback->owner,
6724 (unsigned long long)dback->offset,
6729 tback = to_tree_backref(back);
6730 if (back->full_backref)
6731 parent = tback->parent;
6735 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6736 rec->start, rec->max_size,
6737 parent, tback->root, 0, 0);
6738 fprintf(stderr, "adding new tree backref on "
6739 "start %llu len %llu parent %llu root %llu\n",
6740 rec->start, rec->max_size, parent, tback->root);
6743 btrfs_release_path(path);
6747 static struct extent_entry *find_entry(struct list_head *entries,
6748 u64 bytenr, u64 bytes)
6750 struct extent_entry *entry = NULL;
6752 list_for_each_entry(entry, entries, list) {
6753 if (entry->bytenr == bytenr && entry->bytes == bytes)
6760 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6762 struct extent_entry *entry, *best = NULL, *prev = NULL;
6764 list_for_each_entry(entry, entries, list) {
6771 * If there are as many broken entries as entries then we know
6772 * not to trust this particular entry.
6774 if (entry->broken == entry->count)
6778 * If our current entry == best then we can't be sure our best
6779 * is really the best, so we need to keep searching.
6781 if (best && best->count == entry->count) {
6787 /* Prev == entry, not good enough, have to keep searching */
6788 if (!prev->broken && prev->count == entry->count)
6792 best = (prev->count > entry->count) ? prev : entry;
6793 else if (best->count < entry->count)
6801 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6802 struct data_backref *dback, struct extent_entry *entry)
6804 struct btrfs_trans_handle *trans;
6805 struct btrfs_root *root;
6806 struct btrfs_file_extent_item *fi;
6807 struct extent_buffer *leaf;
6808 struct btrfs_key key;
6812 key.objectid = dback->root;
6813 key.type = BTRFS_ROOT_ITEM_KEY;
6814 key.offset = (u64)-1;
6815 root = btrfs_read_fs_root(info, &key);
6817 fprintf(stderr, "Couldn't find root for our ref\n");
6822 * The backref points to the original offset of the extent if it was
6823 * split, so we need to search down to the offset we have and then walk
6824 * forward until we find the backref we're looking for.
6826 key.objectid = dback->owner;
6827 key.type = BTRFS_EXTENT_DATA_KEY;
6828 key.offset = dback->offset;
6829 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6831 fprintf(stderr, "Error looking up ref %d\n", ret);
6836 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6837 ret = btrfs_next_leaf(root, path);
6839 fprintf(stderr, "Couldn't find our ref, next\n");
6843 leaf = path->nodes[0];
6844 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6845 if (key.objectid != dback->owner ||
6846 key.type != BTRFS_EXTENT_DATA_KEY) {
6847 fprintf(stderr, "Couldn't find our ref, search\n");
6850 fi = btrfs_item_ptr(leaf, path->slots[0],
6851 struct btrfs_file_extent_item);
6852 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6853 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6855 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6860 btrfs_release_path(path);
6862 trans = btrfs_start_transaction(root, 1);
6864 return PTR_ERR(trans);
6867 * Ok we have the key of the file extent we want to fix, now we can cow
6868 * down to the thing and fix it.
6870 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6872 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6873 key.objectid, key.type, key.offset, ret);
6877 fprintf(stderr, "Well that's odd, we just found this key "
6878 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6883 leaf = path->nodes[0];
6884 fi = btrfs_item_ptr(leaf, path->slots[0],
6885 struct btrfs_file_extent_item);
6887 if (btrfs_file_extent_compression(leaf, fi) &&
6888 dback->disk_bytenr != entry->bytenr) {
6889 fprintf(stderr, "Ref doesn't match the record start and is "
6890 "compressed, please take a btrfs-image of this file "
6891 "system and send it to a btrfs developer so they can "
6892 "complete this functionality for bytenr %Lu\n",
6893 dback->disk_bytenr);
6898 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6899 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6900 } else if (dback->disk_bytenr > entry->bytenr) {
6901 u64 off_diff, offset;
6903 off_diff = dback->disk_bytenr - entry->bytenr;
6904 offset = btrfs_file_extent_offset(leaf, fi);
6905 if (dback->disk_bytenr + offset +
6906 btrfs_file_extent_num_bytes(leaf, fi) >
6907 entry->bytenr + entry->bytes) {
6908 fprintf(stderr, "Ref is past the entry end, please "
6909 "take a btrfs-image of this file system and "
6910 "send it to a btrfs developer, ref %Lu\n",
6911 dback->disk_bytenr);
6916 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6917 btrfs_set_file_extent_offset(leaf, fi, offset);
6918 } else if (dback->disk_bytenr < entry->bytenr) {
6921 offset = btrfs_file_extent_offset(leaf, fi);
6922 if (dback->disk_bytenr + offset < entry->bytenr) {
6923 fprintf(stderr, "Ref is before the entry start, please"
6924 " take a btrfs-image of this file system and "
6925 "send it to a btrfs developer, ref %Lu\n",
6926 dback->disk_bytenr);
6931 offset += dback->disk_bytenr;
6932 offset -= entry->bytenr;
6933 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6934 btrfs_set_file_extent_offset(leaf, fi, offset);
6937 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6940 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6941 * only do this if we aren't using compression, otherwise it's a
6944 if (!btrfs_file_extent_compression(leaf, fi))
6945 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6947 printf("ram bytes may be wrong?\n");
6948 btrfs_mark_buffer_dirty(leaf);
6950 err = btrfs_commit_transaction(trans, root);
6951 btrfs_release_path(path);
6952 return ret ? ret : err;
6955 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6956 struct extent_record *rec)
6958 struct extent_backref *back, *tmp;
6959 struct data_backref *dback;
6960 struct extent_entry *entry, *best = NULL;
6963 int broken_entries = 0;
6968 * Metadata is easy and the backrefs should always agree on bytenr and
6969 * size, if not we've got bigger issues.
6974 rbtree_postorder_for_each_entry_safe(back, tmp,
6975 &rec->backref_tree, node) {
6976 if (back->full_backref || !back->is_data)
6979 dback = to_data_backref(back);
6982 * We only pay attention to backrefs that we found a real
6985 if (dback->found_ref == 0)
6989 * For now we only catch when the bytes don't match, not the
6990 * bytenr. We can easily do this at the same time, but I want
6991 * to have a fs image to test on before we just add repair
6992 * functionality willy-nilly so we know we won't screw up the
6996 entry = find_entry(&entries, dback->disk_bytenr,
6999 entry = malloc(sizeof(struct extent_entry));
7004 memset(entry, 0, sizeof(*entry));
7005 entry->bytenr = dback->disk_bytenr;
7006 entry->bytes = dback->bytes;
7007 list_add_tail(&entry->list, &entries);
7012 * If we only have on entry we may think the entries agree when
7013 * in reality they don't so we have to do some extra checking.
7015 if (dback->disk_bytenr != rec->start ||
7016 dback->bytes != rec->nr || back->broken)
7027 /* Yay all the backrefs agree, carry on good sir */
7028 if (nr_entries <= 1 && !mismatch)
7031 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7032 "%Lu\n", rec->start);
7035 * First we want to see if the backrefs can agree amongst themselves who
7036 * is right, so figure out which one of the entries has the highest
7039 best = find_most_right_entry(&entries);
7042 * Ok so we may have an even split between what the backrefs think, so
7043 * this is where we use the extent ref to see what it thinks.
7046 entry = find_entry(&entries, rec->start, rec->nr);
7047 if (!entry && (!broken_entries || !rec->found_rec)) {
7048 fprintf(stderr, "Backrefs don't agree with each other "
7049 "and extent record doesn't agree with anybody,"
7050 " so we can't fix bytenr %Lu bytes %Lu\n",
7051 rec->start, rec->nr);
7054 } else if (!entry) {
7056 * Ok our backrefs were broken, we'll assume this is the
7057 * correct value and add an entry for this range.
7059 entry = malloc(sizeof(struct extent_entry));
7064 memset(entry, 0, sizeof(*entry));
7065 entry->bytenr = rec->start;
7066 entry->bytes = rec->nr;
7067 list_add_tail(&entry->list, &entries);
7071 best = find_most_right_entry(&entries);
7073 fprintf(stderr, "Backrefs and extent record evenly "
7074 "split on who is right, this is going to "
7075 "require user input to fix bytenr %Lu bytes "
7076 "%Lu\n", rec->start, rec->nr);
7083 * I don't think this can happen currently as we'll abort() if we catch
7084 * this case higher up, but in case somebody removes that we still can't
7085 * deal with it properly here yet, so just bail out of that's the case.
7087 if (best->bytenr != rec->start) {
7088 fprintf(stderr, "Extent start and backref starts don't match, "
7089 "please use btrfs-image on this file system and send "
7090 "it to a btrfs developer so they can make fsck fix "
7091 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7092 rec->start, rec->nr);
7098 * Ok great we all agreed on an extent record, let's go find the real
7099 * references and fix up the ones that don't match.
7101 rbtree_postorder_for_each_entry_safe(back, tmp,
7102 &rec->backref_tree, node) {
7103 if (back->full_backref || !back->is_data)
7106 dback = to_data_backref(back);
7109 * Still ignoring backrefs that don't have a real ref attached
7112 if (dback->found_ref == 0)
7115 if (dback->bytes == best->bytes &&
7116 dback->disk_bytenr == best->bytenr)
7119 ret = repair_ref(info, path, dback, best);
7125 * Ok we messed with the actual refs, which means we need to drop our
7126 * entire cache and go back and rescan. I know this is a huge pain and
7127 * adds a lot of extra work, but it's the only way to be safe. Once all
7128 * the backrefs agree we may not need to do anything to the extent
7133 while (!list_empty(&entries)) {
7134 entry = list_entry(entries.next, struct extent_entry, list);
7135 list_del_init(&entry->list);
7141 static int process_duplicates(struct btrfs_root *root,
7142 struct cache_tree *extent_cache,
7143 struct extent_record *rec)
7145 struct extent_record *good, *tmp;
7146 struct cache_extent *cache;
7150 * If we found a extent record for this extent then return, or if we
7151 * have more than one duplicate we are likely going to need to delete
7154 if (rec->found_rec || rec->num_duplicates > 1)
7157 /* Shouldn't happen but just in case */
7158 BUG_ON(!rec->num_duplicates);
7161 * So this happens if we end up with a backref that doesn't match the
7162 * actual extent entry. So either the backref is bad or the extent
7163 * entry is bad. Either way we want to have the extent_record actually
7164 * reflect what we found in the extent_tree, so we need to take the
7165 * duplicate out and use that as the extent_record since the only way we
7166 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7168 remove_cache_extent(extent_cache, &rec->cache);
7170 good = to_extent_record(rec->dups.next);
7171 list_del_init(&good->list);
7172 INIT_LIST_HEAD(&good->backrefs);
7173 INIT_LIST_HEAD(&good->dups);
7174 good->cache.start = good->start;
7175 good->cache.size = good->nr;
7176 good->content_checked = 0;
7177 good->owner_ref_checked = 0;
7178 good->num_duplicates = 0;
7179 good->refs = rec->refs;
7180 list_splice_init(&rec->backrefs, &good->backrefs);
7182 cache = lookup_cache_extent(extent_cache, good->start,
7186 tmp = container_of(cache, struct extent_record, cache);
7189 * If we find another overlapping extent and it's found_rec is
7190 * set then it's a duplicate and we need to try and delete
7193 if (tmp->found_rec || tmp->num_duplicates > 0) {
7194 if (list_empty(&good->list))
7195 list_add_tail(&good->list,
7196 &duplicate_extents);
7197 good->num_duplicates += tmp->num_duplicates + 1;
7198 list_splice_init(&tmp->dups, &good->dups);
7199 list_del_init(&tmp->list);
7200 list_add_tail(&tmp->list, &good->dups);
7201 remove_cache_extent(extent_cache, &tmp->cache);
7206 * Ok we have another non extent item backed extent rec, so lets
7207 * just add it to this extent and carry on like we did above.
7209 good->refs += tmp->refs;
7210 list_splice_init(&tmp->backrefs, &good->backrefs);
7211 remove_cache_extent(extent_cache, &tmp->cache);
7214 ret = insert_cache_extent(extent_cache, &good->cache);
7217 return good->num_duplicates ? 0 : 1;
7220 static int delete_duplicate_records(struct btrfs_root *root,
7221 struct extent_record *rec)
7223 struct btrfs_trans_handle *trans;
7224 LIST_HEAD(delete_list);
7225 struct btrfs_path *path;
7226 struct extent_record *tmp, *good, *n;
7229 struct btrfs_key key;
7231 path = btrfs_alloc_path();
7238 /* Find the record that covers all of the duplicates. */
7239 list_for_each_entry(tmp, &rec->dups, list) {
7240 if (good->start < tmp->start)
7242 if (good->nr > tmp->nr)
7245 if (tmp->start + tmp->nr < good->start + good->nr) {
7246 fprintf(stderr, "Ok we have overlapping extents that "
7247 "aren't completely covered by each other, this "
7248 "is going to require more careful thought. "
7249 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7250 tmp->start, tmp->nr, good->start, good->nr);
7257 list_add_tail(&rec->list, &delete_list);
7259 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7262 list_move_tail(&tmp->list, &delete_list);
7265 root = root->fs_info->extent_root;
7266 trans = btrfs_start_transaction(root, 1);
7267 if (IS_ERR(trans)) {
7268 ret = PTR_ERR(trans);
7272 list_for_each_entry(tmp, &delete_list, list) {
7273 if (tmp->found_rec == 0)
7275 key.objectid = tmp->start;
7276 key.type = BTRFS_EXTENT_ITEM_KEY;
7277 key.offset = tmp->nr;
7279 /* Shouldn't happen but just in case */
7280 if (tmp->metadata) {
7281 fprintf(stderr, "Well this shouldn't happen, extent "
7282 "record overlaps but is metadata? "
7283 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7287 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7293 ret = btrfs_del_item(trans, root, path);
7296 btrfs_release_path(path);
7299 err = btrfs_commit_transaction(trans, root);
7303 while (!list_empty(&delete_list)) {
7304 tmp = to_extent_record(delete_list.next);
7305 list_del_init(&tmp->list);
7311 while (!list_empty(&rec->dups)) {
7312 tmp = to_extent_record(rec->dups.next);
7313 list_del_init(&tmp->list);
7317 btrfs_free_path(path);
7319 if (!ret && !nr_del)
7320 rec->num_duplicates = 0;
7322 return ret ? ret : nr_del;
7325 static int find_possible_backrefs(struct btrfs_fs_info *info,
7326 struct btrfs_path *path,
7327 struct cache_tree *extent_cache,
7328 struct extent_record *rec)
7330 struct btrfs_root *root;
7331 struct extent_backref *back, *tmp;
7332 struct data_backref *dback;
7333 struct cache_extent *cache;
7334 struct btrfs_file_extent_item *fi;
7335 struct btrfs_key key;
7339 rbtree_postorder_for_each_entry_safe(back, tmp,
7340 &rec->backref_tree, node) {
7341 /* Don't care about full backrefs (poor unloved backrefs) */
7342 if (back->full_backref || !back->is_data)
7345 dback = to_data_backref(back);
7347 /* We found this one, we don't need to do a lookup */
7348 if (dback->found_ref)
7351 key.objectid = dback->root;
7352 key.type = BTRFS_ROOT_ITEM_KEY;
7353 key.offset = (u64)-1;
7355 root = btrfs_read_fs_root(info, &key);
7357 /* No root, definitely a bad ref, skip */
7358 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7360 /* Other err, exit */
7362 return PTR_ERR(root);
7364 key.objectid = dback->owner;
7365 key.type = BTRFS_EXTENT_DATA_KEY;
7366 key.offset = dback->offset;
7367 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7369 btrfs_release_path(path);
7372 /* Didn't find it, we can carry on */
7377 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7378 struct btrfs_file_extent_item);
7379 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7380 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7381 btrfs_release_path(path);
7382 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7384 struct extent_record *tmp;
7385 tmp = container_of(cache, struct extent_record, cache);
7388 * If we found an extent record for the bytenr for this
7389 * particular backref then we can't add it to our
7390 * current extent record. We only want to add backrefs
7391 * that don't have a corresponding extent item in the
7392 * extent tree since they likely belong to this record
7393 * and we need to fix it if it doesn't match bytenrs.
7399 dback->found_ref += 1;
7400 dback->disk_bytenr = bytenr;
7401 dback->bytes = bytes;
7404 * Set this so the verify backref code knows not to trust the
7405 * values in this backref.
7414 * Record orphan data ref into corresponding root.
7416 * Return 0 if the extent item contains data ref and recorded.
7417 * Return 1 if the extent item contains no useful data ref
7418 * On that case, it may contains only shared_dataref or metadata backref
7419 * or the file extent exists(this should be handled by the extent bytenr
7421 * Return <0 if something goes wrong.
7423 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7424 struct extent_record *rec)
7426 struct btrfs_key key;
7427 struct btrfs_root *dest_root;
7428 struct extent_backref *back, *tmp;
7429 struct data_backref *dback;
7430 struct orphan_data_extent *orphan;
7431 struct btrfs_path *path;
7432 int recorded_data_ref = 0;
7437 path = btrfs_alloc_path();
7440 rbtree_postorder_for_each_entry_safe(back, tmp,
7441 &rec->backref_tree, node) {
7442 if (back->full_backref || !back->is_data ||
7443 !back->found_extent_tree)
7445 dback = to_data_backref(back);
7446 if (dback->found_ref)
7448 key.objectid = dback->root;
7449 key.type = BTRFS_ROOT_ITEM_KEY;
7450 key.offset = (u64)-1;
7452 dest_root = btrfs_read_fs_root(fs_info, &key);
7454 /* For non-exist root we just skip it */
7455 if (IS_ERR(dest_root) || !dest_root)
7458 key.objectid = dback->owner;
7459 key.type = BTRFS_EXTENT_DATA_KEY;
7460 key.offset = dback->offset;
7462 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7464 * For ret < 0, it's OK since the fs-tree may be corrupted,
7465 * we need to record it for inode/file extent rebuild.
7466 * For ret > 0, we record it only for file extent rebuild.
7467 * For ret == 0, the file extent exists but only bytenr
7468 * mismatch, let the original bytenr fix routine to handle,
7474 orphan = malloc(sizeof(*orphan));
7479 INIT_LIST_HEAD(&orphan->list);
7480 orphan->root = dback->root;
7481 orphan->objectid = dback->owner;
7482 orphan->offset = dback->offset;
7483 orphan->disk_bytenr = rec->cache.start;
7484 orphan->disk_len = rec->cache.size;
7485 list_add(&dest_root->orphan_data_extents, &orphan->list);
7486 recorded_data_ref = 1;
7489 btrfs_free_path(path);
7491 return !recorded_data_ref;
7497 * when an incorrect extent item is found, this will delete
7498 * all of the existing entries for it and recreate them
7499 * based on what the tree scan found.
7501 static int fixup_extent_refs(struct btrfs_fs_info *info,
7502 struct cache_tree *extent_cache,
7503 struct extent_record *rec)
7505 struct btrfs_trans_handle *trans = NULL;
7507 struct btrfs_path *path;
7508 struct cache_extent *cache;
7509 struct extent_backref *back, *tmp;
7513 if (rec->flag_block_full_backref)
7514 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7516 path = btrfs_alloc_path();
7520 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7522 * Sometimes the backrefs themselves are so broken they don't
7523 * get attached to any meaningful rec, so first go back and
7524 * check any of our backrefs that we couldn't find and throw
7525 * them into the list if we find the backref so that
7526 * verify_backrefs can figure out what to do.
7528 ret = find_possible_backrefs(info, path, extent_cache, rec);
7533 /* step one, make sure all of the backrefs agree */
7534 ret = verify_backrefs(info, path, rec);
7538 trans = btrfs_start_transaction(info->extent_root, 1);
7539 if (IS_ERR(trans)) {
7540 ret = PTR_ERR(trans);
7544 /* step two, delete all the existing records */
7545 ret = delete_extent_records(trans, info->extent_root, path,
7546 rec->start, rec->max_size);
7551 /* was this block corrupt? If so, don't add references to it */
7552 cache = lookup_cache_extent(info->corrupt_blocks,
7553 rec->start, rec->max_size);
7559 /* step three, recreate all the refs we did find */
7560 rbtree_postorder_for_each_entry_safe(back, tmp,
7561 &rec->backref_tree, node) {
7563 * if we didn't find any references, don't create a
7566 if (!back->found_ref)
7569 rec->bad_full_backref = 0;
7570 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7578 int err = btrfs_commit_transaction(trans, info->extent_root);
7583 btrfs_free_path(path);
7587 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7588 struct extent_record *rec)
7590 struct btrfs_trans_handle *trans;
7591 struct btrfs_root *root = fs_info->extent_root;
7592 struct btrfs_path *path;
7593 struct btrfs_extent_item *ei;
7594 struct btrfs_key key;
7598 key.objectid = rec->start;
7599 if (rec->metadata) {
7600 key.type = BTRFS_METADATA_ITEM_KEY;
7601 key.offset = rec->info_level;
7603 key.type = BTRFS_EXTENT_ITEM_KEY;
7604 key.offset = rec->max_size;
7607 path = btrfs_alloc_path();
7611 trans = btrfs_start_transaction(root, 0);
7612 if (IS_ERR(trans)) {
7613 btrfs_free_path(path);
7614 return PTR_ERR(trans);
7617 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7619 btrfs_free_path(path);
7620 btrfs_commit_transaction(trans, root);
7623 fprintf(stderr, "Didn't find extent for %llu\n",
7624 (unsigned long long)rec->start);
7625 btrfs_free_path(path);
7626 btrfs_commit_transaction(trans, root);
7630 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7631 struct btrfs_extent_item);
7632 flags = btrfs_extent_flags(path->nodes[0], ei);
7633 if (rec->flag_block_full_backref) {
7634 fprintf(stderr, "setting full backref on %llu\n",
7635 (unsigned long long)key.objectid);
7636 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7638 fprintf(stderr, "clearing full backref on %llu\n",
7639 (unsigned long long)key.objectid);
7640 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7642 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7643 btrfs_mark_buffer_dirty(path->nodes[0]);
7644 btrfs_free_path(path);
7645 return btrfs_commit_transaction(trans, root);
7648 /* right now we only prune from the extent allocation tree */
7649 static int prune_one_block(struct btrfs_trans_handle *trans,
7650 struct btrfs_fs_info *info,
7651 struct btrfs_corrupt_block *corrupt)
7654 struct btrfs_path path;
7655 struct extent_buffer *eb;
7659 int level = corrupt->level + 1;
7661 btrfs_init_path(&path);
7663 /* we want to stop at the parent to our busted block */
7664 path.lowest_level = level;
7666 ret = btrfs_search_slot(trans, info->extent_root,
7667 &corrupt->key, &path, -1, 1);
7672 eb = path.nodes[level];
7679 * hopefully the search gave us the block we want to prune,
7680 * lets try that first
7682 slot = path.slots[level];
7683 found = btrfs_node_blockptr(eb, slot);
7684 if (found == corrupt->cache.start)
7687 nritems = btrfs_header_nritems(eb);
7689 /* the search failed, lets scan this node and hope we find it */
7690 for (slot = 0; slot < nritems; slot++) {
7691 found = btrfs_node_blockptr(eb, slot);
7692 if (found == corrupt->cache.start)
7696 * we couldn't find the bad block. TODO, search all the nodes for pointers
7699 if (eb == info->extent_root->node) {
7704 btrfs_release_path(&path);
7709 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7710 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7713 btrfs_release_path(&path);
7717 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7719 struct btrfs_trans_handle *trans = NULL;
7720 struct cache_extent *cache;
7721 struct btrfs_corrupt_block *corrupt;
7724 cache = search_cache_extent(info->corrupt_blocks, 0);
7728 trans = btrfs_start_transaction(info->extent_root, 1);
7730 return PTR_ERR(trans);
7732 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7733 prune_one_block(trans, info, corrupt);
7734 remove_cache_extent(info->corrupt_blocks, cache);
7737 return btrfs_commit_transaction(trans, info->extent_root);
7741 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7743 struct btrfs_block_group_cache *cache;
7748 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7749 &start, &end, EXTENT_DIRTY);
7752 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7758 cache = btrfs_lookup_first_block_group(fs_info, start);
7763 start = cache->key.objectid + cache->key.offset;
7767 static int check_extent_refs(struct btrfs_root *root,
7768 struct cache_tree *extent_cache)
7770 struct extent_record *rec;
7771 struct cache_extent *cache;
7780 * if we're doing a repair, we have to make sure
7781 * we don't allocate from the problem extents.
7782 * In the worst case, this will be all the
7785 cache = search_cache_extent(extent_cache, 0);
7787 rec = container_of(cache, struct extent_record, cache);
7788 set_extent_dirty(root->fs_info->excluded_extents,
7790 rec->start + rec->max_size - 1,
7792 cache = next_cache_extent(cache);
7795 /* pin down all the corrupted blocks too */
7796 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7798 set_extent_dirty(root->fs_info->excluded_extents,
7800 cache->start + cache->size - 1,
7802 cache = next_cache_extent(cache);
7804 prune_corrupt_blocks(root->fs_info);
7805 reset_cached_block_groups(root->fs_info);
7808 reset_cached_block_groups(root->fs_info);
7811 * We need to delete any duplicate entries we find first otherwise we
7812 * could mess up the extent tree when we have backrefs that actually
7813 * belong to a different extent item and not the weird duplicate one.
7815 while (repair && !list_empty(&duplicate_extents)) {
7816 rec = to_extent_record(duplicate_extents.next);
7817 list_del_init(&rec->list);
7819 /* Sometimes we can find a backref before we find an actual
7820 * extent, so we need to process it a little bit to see if there
7821 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7822 * if this is a backref screwup. If we need to delete stuff
7823 * process_duplicates() will return 0, otherwise it will return
7826 if (process_duplicates(root, extent_cache, rec))
7828 ret = delete_duplicate_records(root, rec);
7832 * delete_duplicate_records will return the number of entries
7833 * deleted, so if it's greater than 0 then we know we actually
7834 * did something and we need to remove.
7848 cache = search_cache_extent(extent_cache, 0);
7851 rec = container_of(cache, struct extent_record, cache);
7852 if (rec->num_duplicates) {
7853 fprintf(stderr, "extent item %llu has multiple extent "
7854 "items\n", (unsigned long long)rec->start);
7859 if (rec->refs != rec->extent_item_refs) {
7860 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7861 (unsigned long long)rec->start,
7862 (unsigned long long)rec->nr);
7863 fprintf(stderr, "extent item %llu, found %llu\n",
7864 (unsigned long long)rec->extent_item_refs,
7865 (unsigned long long)rec->refs);
7866 ret = record_orphan_data_extents(root->fs_info, rec);
7873 * we can't use the extent to repair file
7874 * extent, let the fallback method handle it.
7876 if (!fixed && repair) {
7877 ret = fixup_extent_refs(
7888 if (all_backpointers_checked(rec, 1)) {
7889 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7890 (unsigned long long)rec->start,
7891 (unsigned long long)rec->nr);
7893 if (!fixed && !recorded && repair) {
7894 ret = fixup_extent_refs(root->fs_info,
7903 if (!rec->owner_ref_checked) {
7904 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7905 (unsigned long long)rec->start,
7906 (unsigned long long)rec->nr);
7907 if (!fixed && !recorded && repair) {
7908 ret = fixup_extent_refs(root->fs_info,
7917 if (rec->bad_full_backref) {
7918 fprintf(stderr, "bad full backref, on [%llu]\n",
7919 (unsigned long long)rec->start);
7921 ret = fixup_extent_flags(root->fs_info, rec);
7930 * Although it's not a extent ref's problem, we reuse this
7931 * routine for error reporting.
7932 * No repair function yet.
7934 if (rec->crossing_stripes) {
7936 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7937 rec->start, rec->start + rec->max_size);
7942 if (rec->wrong_chunk_type) {
7944 "bad extent [%llu, %llu), type mismatch with chunk\n",
7945 rec->start, rec->start + rec->max_size);
7950 remove_cache_extent(extent_cache, cache);
7951 free_all_extent_backrefs(rec);
7952 if (!init_extent_tree && repair && (!cur_err || fixed))
7953 clear_extent_dirty(root->fs_info->excluded_extents,
7955 rec->start + rec->max_size - 1,
7961 if (ret && ret != -EAGAIN) {
7962 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7965 struct btrfs_trans_handle *trans;
7967 root = root->fs_info->extent_root;
7968 trans = btrfs_start_transaction(root, 1);
7969 if (IS_ERR(trans)) {
7970 ret = PTR_ERR(trans);
7974 btrfs_fix_block_accounting(trans, root);
7975 ret = btrfs_commit_transaction(trans, root);
7980 fprintf(stderr, "repaired damaged extent references\n");
7986 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7990 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7991 stripe_size = length;
7992 stripe_size /= num_stripes;
7993 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7994 stripe_size = length * 2;
7995 stripe_size /= num_stripes;
7996 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7997 stripe_size = length;
7998 stripe_size /= (num_stripes - 1);
7999 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8000 stripe_size = length;
8001 stripe_size /= (num_stripes - 2);
8003 stripe_size = length;
8009 * Check the chunk with its block group/dev list ref:
8010 * Return 0 if all refs seems valid.
8011 * Return 1 if part of refs seems valid, need later check for rebuild ref
8012 * like missing block group and needs to search extent tree to rebuild them.
8013 * Return -1 if essential refs are missing and unable to rebuild.
8015 static int check_chunk_refs(struct chunk_record *chunk_rec,
8016 struct block_group_tree *block_group_cache,
8017 struct device_extent_tree *dev_extent_cache,
8020 struct cache_extent *block_group_item;
8021 struct block_group_record *block_group_rec;
8022 struct cache_extent *dev_extent_item;
8023 struct device_extent_record *dev_extent_rec;
8027 int metadump_v2 = 0;
8031 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8034 if (block_group_item) {
8035 block_group_rec = container_of(block_group_item,
8036 struct block_group_record,
8038 if (chunk_rec->length != block_group_rec->offset ||
8039 chunk_rec->offset != block_group_rec->objectid ||
8041 chunk_rec->type_flags != block_group_rec->flags)) {
8044 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8045 chunk_rec->objectid,
8050 chunk_rec->type_flags,
8051 block_group_rec->objectid,
8052 block_group_rec->type,
8053 block_group_rec->offset,
8054 block_group_rec->offset,
8055 block_group_rec->objectid,
8056 block_group_rec->flags);
8059 list_del_init(&block_group_rec->list);
8060 chunk_rec->bg_rec = block_group_rec;
8065 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8066 chunk_rec->objectid,
8071 chunk_rec->type_flags);
8078 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8079 chunk_rec->num_stripes);
8080 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8081 devid = chunk_rec->stripes[i].devid;
8082 offset = chunk_rec->stripes[i].offset;
8083 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8084 devid, offset, length);
8085 if (dev_extent_item) {
8086 dev_extent_rec = container_of(dev_extent_item,
8087 struct device_extent_record,
8089 if (dev_extent_rec->objectid != devid ||
8090 dev_extent_rec->offset != offset ||
8091 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8092 dev_extent_rec->length != length) {
8095 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8096 chunk_rec->objectid,
8099 chunk_rec->stripes[i].devid,
8100 chunk_rec->stripes[i].offset,
8101 dev_extent_rec->objectid,
8102 dev_extent_rec->offset,
8103 dev_extent_rec->length);
8106 list_move(&dev_extent_rec->chunk_list,
8107 &chunk_rec->dextents);
8112 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8113 chunk_rec->objectid,
8116 chunk_rec->stripes[i].devid,
8117 chunk_rec->stripes[i].offset);
8124 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8125 int check_chunks(struct cache_tree *chunk_cache,
8126 struct block_group_tree *block_group_cache,
8127 struct device_extent_tree *dev_extent_cache,
8128 struct list_head *good, struct list_head *bad,
8129 struct list_head *rebuild, int silent)
8131 struct cache_extent *chunk_item;
8132 struct chunk_record *chunk_rec;
8133 struct block_group_record *bg_rec;
8134 struct device_extent_record *dext_rec;
8138 chunk_item = first_cache_extent(chunk_cache);
8139 while (chunk_item) {
8140 chunk_rec = container_of(chunk_item, struct chunk_record,
8142 err = check_chunk_refs(chunk_rec, block_group_cache,
8143 dev_extent_cache, silent);
8146 if (err == 0 && good)
8147 list_add_tail(&chunk_rec->list, good);
8148 if (err > 0 && rebuild)
8149 list_add_tail(&chunk_rec->list, rebuild);
8151 list_add_tail(&chunk_rec->list, bad);
8152 chunk_item = next_cache_extent(chunk_item);
8155 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8158 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8166 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8170 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8181 static int check_device_used(struct device_record *dev_rec,
8182 struct device_extent_tree *dext_cache)
8184 struct cache_extent *cache;
8185 struct device_extent_record *dev_extent_rec;
8188 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8190 dev_extent_rec = container_of(cache,
8191 struct device_extent_record,
8193 if (dev_extent_rec->objectid != dev_rec->devid)
8196 list_del_init(&dev_extent_rec->device_list);
8197 total_byte += dev_extent_rec->length;
8198 cache = next_cache_extent(cache);
8201 if (total_byte != dev_rec->byte_used) {
8203 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8204 total_byte, dev_rec->byte_used, dev_rec->objectid,
8205 dev_rec->type, dev_rec->offset);
8212 /* check btrfs_dev_item -> btrfs_dev_extent */
8213 static int check_devices(struct rb_root *dev_cache,
8214 struct device_extent_tree *dev_extent_cache)
8216 struct rb_node *dev_node;
8217 struct device_record *dev_rec;
8218 struct device_extent_record *dext_rec;
8222 dev_node = rb_first(dev_cache);
8224 dev_rec = container_of(dev_node, struct device_record, node);
8225 err = check_device_used(dev_rec, dev_extent_cache);
8229 dev_node = rb_next(dev_node);
8231 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8234 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8235 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8242 static int add_root_item_to_list(struct list_head *head,
8243 u64 objectid, u64 bytenr, u64 last_snapshot,
8244 u8 level, u8 drop_level,
8245 int level_size, struct btrfs_key *drop_key)
8248 struct root_item_record *ri_rec;
8249 ri_rec = malloc(sizeof(*ri_rec));
8252 ri_rec->bytenr = bytenr;
8253 ri_rec->objectid = objectid;
8254 ri_rec->level = level;
8255 ri_rec->level_size = level_size;
8256 ri_rec->drop_level = drop_level;
8257 ri_rec->last_snapshot = last_snapshot;
8259 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8260 list_add_tail(&ri_rec->list, head);
8265 static void free_root_item_list(struct list_head *list)
8267 struct root_item_record *ri_rec;
8269 while (!list_empty(list)) {
8270 ri_rec = list_first_entry(list, struct root_item_record,
8272 list_del_init(&ri_rec->list);
8277 static int deal_root_from_list(struct list_head *list,
8278 struct btrfs_root *root,
8279 struct block_info *bits,
8281 struct cache_tree *pending,
8282 struct cache_tree *seen,
8283 struct cache_tree *reada,
8284 struct cache_tree *nodes,
8285 struct cache_tree *extent_cache,
8286 struct cache_tree *chunk_cache,
8287 struct rb_root *dev_cache,
8288 struct block_group_tree *block_group_cache,
8289 struct device_extent_tree *dev_extent_cache)
8294 while (!list_empty(list)) {
8295 struct root_item_record *rec;
8296 struct extent_buffer *buf;
8297 rec = list_entry(list->next,
8298 struct root_item_record, list);
8300 buf = read_tree_block(root->fs_info->tree_root,
8301 rec->bytenr, rec->level_size, 0);
8302 if (!extent_buffer_uptodate(buf)) {
8303 free_extent_buffer(buf);
8307 add_root_to_pending(buf, extent_cache, pending,
8308 seen, nodes, rec->objectid);
8310 * To rebuild extent tree, we need deal with snapshot
8311 * one by one, otherwise we deal with node firstly which
8312 * can maximize readahead.
8315 ret = run_next_block(root, bits, bits_nr, &last,
8316 pending, seen, reada, nodes,
8317 extent_cache, chunk_cache,
8318 dev_cache, block_group_cache,
8319 dev_extent_cache, rec);
8323 free_extent_buffer(buf);
8324 list_del(&rec->list);
8330 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8331 reada, nodes, extent_cache, chunk_cache,
8332 dev_cache, block_group_cache,
8333 dev_extent_cache, NULL);
8343 static int check_chunks_and_extents(struct btrfs_root *root)
8345 struct rb_root dev_cache;
8346 struct cache_tree chunk_cache;
8347 struct block_group_tree block_group_cache;
8348 struct device_extent_tree dev_extent_cache;
8349 struct cache_tree extent_cache;
8350 struct cache_tree seen;
8351 struct cache_tree pending;
8352 struct cache_tree reada;
8353 struct cache_tree nodes;
8354 struct extent_io_tree excluded_extents;
8355 struct cache_tree corrupt_blocks;
8356 struct btrfs_path path;
8357 struct btrfs_key key;
8358 struct btrfs_key found_key;
8360 struct block_info *bits;
8362 struct extent_buffer *leaf;
8364 struct btrfs_root_item ri;
8365 struct list_head dropping_trees;
8366 struct list_head normal_trees;
8367 struct btrfs_root *root1;
8372 dev_cache = RB_ROOT;
8373 cache_tree_init(&chunk_cache);
8374 block_group_tree_init(&block_group_cache);
8375 device_extent_tree_init(&dev_extent_cache);
8377 cache_tree_init(&extent_cache);
8378 cache_tree_init(&seen);
8379 cache_tree_init(&pending);
8380 cache_tree_init(&nodes);
8381 cache_tree_init(&reada);
8382 cache_tree_init(&corrupt_blocks);
8383 extent_io_tree_init(&excluded_extents);
8384 INIT_LIST_HEAD(&dropping_trees);
8385 INIT_LIST_HEAD(&normal_trees);
8388 root->fs_info->excluded_extents = &excluded_extents;
8389 root->fs_info->fsck_extent_cache = &extent_cache;
8390 root->fs_info->free_extent_hook = free_extent_hook;
8391 root->fs_info->corrupt_blocks = &corrupt_blocks;
8395 bits = malloc(bits_nr * sizeof(struct block_info));
8401 if (ctx.progress_enabled) {
8402 ctx.tp = TASK_EXTENTS;
8403 task_start(ctx.info);
8407 root1 = root->fs_info->tree_root;
8408 level = btrfs_header_level(root1->node);
8409 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8410 root1->node->start, 0, level, 0,
8411 root1->nodesize, NULL);
8414 root1 = root->fs_info->chunk_root;
8415 level = btrfs_header_level(root1->node);
8416 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8417 root1->node->start, 0, level, 0,
8418 root1->nodesize, NULL);
8421 btrfs_init_path(&path);
8424 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8425 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8430 leaf = path.nodes[0];
8431 slot = path.slots[0];
8432 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8433 ret = btrfs_next_leaf(root, &path);
8436 leaf = path.nodes[0];
8437 slot = path.slots[0];
8439 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8440 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8441 unsigned long offset;
8444 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8445 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8446 last_snapshot = btrfs_root_last_snapshot(&ri);
8447 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8448 level = btrfs_root_level(&ri);
8449 level_size = root->nodesize;
8450 ret = add_root_item_to_list(&normal_trees,
8452 btrfs_root_bytenr(&ri),
8453 last_snapshot, level,
8454 0, level_size, NULL);
8458 level = btrfs_root_level(&ri);
8459 level_size = root->nodesize;
8460 objectid = found_key.objectid;
8461 btrfs_disk_key_to_cpu(&found_key,
8463 ret = add_root_item_to_list(&dropping_trees,
8465 btrfs_root_bytenr(&ri),
8466 last_snapshot, level,
8468 level_size, &found_key);
8475 btrfs_release_path(&path);
8478 * check_block can return -EAGAIN if it fixes something, please keep
8479 * this in mind when dealing with return values from these functions, if
8480 * we get -EAGAIN we want to fall through and restart the loop.
8482 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8483 &seen, &reada, &nodes, &extent_cache,
8484 &chunk_cache, &dev_cache, &block_group_cache,
8491 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8492 &pending, &seen, &reada, &nodes,
8493 &extent_cache, &chunk_cache, &dev_cache,
8494 &block_group_cache, &dev_extent_cache);
8501 ret = check_chunks(&chunk_cache, &block_group_cache,
8502 &dev_extent_cache, NULL, NULL, NULL, 0);
8509 ret = check_extent_refs(root, &extent_cache);
8516 ret = check_devices(&dev_cache, &dev_extent_cache);
8521 task_stop(ctx.info);
8523 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8524 extent_io_tree_cleanup(&excluded_extents);
8525 root->fs_info->fsck_extent_cache = NULL;
8526 root->fs_info->free_extent_hook = NULL;
8527 root->fs_info->corrupt_blocks = NULL;
8528 root->fs_info->excluded_extents = NULL;
8531 free_chunk_cache_tree(&chunk_cache);
8532 free_device_cache_tree(&dev_cache);
8533 free_block_group_tree(&block_group_cache);
8534 free_device_extent_tree(&dev_extent_cache);
8535 free_extent_cache_tree(&seen);
8536 free_extent_cache_tree(&pending);
8537 free_extent_cache_tree(&reada);
8538 free_extent_cache_tree(&nodes);
8541 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8542 free_extent_cache_tree(&seen);
8543 free_extent_cache_tree(&pending);
8544 free_extent_cache_tree(&reada);
8545 free_extent_cache_tree(&nodes);
8546 free_chunk_cache_tree(&chunk_cache);
8547 free_block_group_tree(&block_group_cache);
8548 free_device_cache_tree(&dev_cache);
8549 free_device_extent_tree(&dev_extent_cache);
8550 free_extent_record_cache(root->fs_info, &extent_cache);
8551 free_root_item_list(&normal_trees);
8552 free_root_item_list(&dropping_trees);
8553 extent_io_tree_cleanup(&excluded_extents);
8558 * Check backrefs of a tree block given by @bytenr or @eb.
8560 * @root: the root containing the @bytenr or @eb
8561 * @eb: tree block extent buffer, can be NULL
8562 * @bytenr: bytenr of the tree block to search
8563 * @level: tree level of the tree block
8564 * @owner: owner of the tree block
8566 * Return >0 for any error found and output error message
8567 * Return 0 for no error found
8569 static int check_tree_block_ref(struct btrfs_root *root,
8570 struct extent_buffer *eb, u64 bytenr,
8571 int level, u64 owner)
8573 struct btrfs_key key;
8574 struct btrfs_root *extent_root = root->fs_info->extent_root;
8575 struct btrfs_path path;
8576 struct btrfs_extent_item *ei;
8577 struct btrfs_extent_inline_ref *iref;
8578 struct extent_buffer *leaf;
8584 u32 nodesize = root->nodesize;
8591 btrfs_init_path(&path);
8592 key.objectid = bytenr;
8593 if (btrfs_fs_incompat(root->fs_info,
8594 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8595 key.type = BTRFS_METADATA_ITEM_KEY;
8597 key.type = BTRFS_EXTENT_ITEM_KEY;
8598 key.offset = (u64)-1;
8600 /* Search for the backref in extent tree */
8601 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8603 err |= BACKREF_MISSING;
8606 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8608 err |= BACKREF_MISSING;
8612 leaf = path.nodes[0];
8613 slot = path.slots[0];
8614 btrfs_item_key_to_cpu(leaf, &key, slot);
8616 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8618 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8619 skinny_level = (int)key.offset;
8620 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8622 struct btrfs_tree_block_info *info;
8624 info = (struct btrfs_tree_block_info *)(ei + 1);
8625 skinny_level = btrfs_tree_block_level(leaf, info);
8626 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8633 if (!(btrfs_extent_flags(leaf, ei) &
8634 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8636 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8637 key.objectid, nodesize,
8638 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8639 err = BACKREF_MISMATCH;
8641 header_gen = btrfs_header_generation(eb);
8642 extent_gen = btrfs_extent_generation(leaf, ei);
8643 if (header_gen != extent_gen) {
8645 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8646 key.objectid, nodesize, header_gen,
8648 err = BACKREF_MISMATCH;
8650 if (level != skinny_level) {
8652 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8653 key.objectid, nodesize, level, skinny_level);
8654 err = BACKREF_MISMATCH;
8656 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8658 "extent[%llu %u] is referred by other roots than %llu",
8659 key.objectid, nodesize, root->objectid);
8660 err = BACKREF_MISMATCH;
8665 * Iterate the extent/metadata item to find the exact backref
8667 item_size = btrfs_item_size_nr(leaf, slot);
8668 ptr = (unsigned long)iref;
8669 end = (unsigned long)ei + item_size;
8671 iref = (struct btrfs_extent_inline_ref *)ptr;
8672 type = btrfs_extent_inline_ref_type(leaf, iref);
8673 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8675 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8676 (offset == root->objectid || offset == owner)) {
8678 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8679 /* Check if the backref points to valid referencer */
8680 found_ref = !check_tree_block_ref(root, NULL, offset,
8686 ptr += btrfs_extent_inline_ref_size(type);
8690 * Inlined extent item doesn't have what we need, check
8691 * TREE_BLOCK_REF_KEY
8694 btrfs_release_path(&path);
8695 key.objectid = bytenr;
8696 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8697 key.offset = root->objectid;
8699 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8704 err |= BACKREF_MISSING;
8706 btrfs_release_path(&path);
8707 if (eb && (err & BACKREF_MISSING))
8708 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8709 bytenr, nodesize, owner, level);
8714 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8716 * Return >0 any error found and output error message
8717 * Return 0 for no error found
8719 static int check_extent_data_item(struct btrfs_root *root,
8720 struct extent_buffer *eb, int slot)
8722 struct btrfs_file_extent_item *fi;
8723 struct btrfs_path path;
8724 struct btrfs_root *extent_root = root->fs_info->extent_root;
8725 struct btrfs_key fi_key;
8726 struct btrfs_key dbref_key;
8727 struct extent_buffer *leaf;
8728 struct btrfs_extent_item *ei;
8729 struct btrfs_extent_inline_ref *iref;
8730 struct btrfs_extent_data_ref *dref;
8732 u64 file_extent_gen;
8735 u64 extent_num_bytes;
8743 int found_dbackref = 0;
8747 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8748 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8749 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8751 /* Nothing to check for hole and inline data extents */
8752 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8753 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8756 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8757 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8758 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8760 /* Check unaligned disk_num_bytes and num_bytes */
8761 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8763 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8764 fi_key.objectid, fi_key.offset, disk_num_bytes,
8766 err |= BYTES_UNALIGNED;
8768 data_bytes_allocated += disk_num_bytes;
8770 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8772 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8773 fi_key.objectid, fi_key.offset, extent_num_bytes,
8775 err |= BYTES_UNALIGNED;
8777 data_bytes_referenced += extent_num_bytes;
8779 owner = btrfs_header_owner(eb);
8781 /* Check the extent item of the file extent in extent tree */
8782 btrfs_init_path(&path);
8783 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8784 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8785 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8787 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8789 err |= BACKREF_MISSING;
8793 leaf = path.nodes[0];
8794 slot = path.slots[0];
8795 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8797 extent_flags = btrfs_extent_flags(leaf, ei);
8798 extent_gen = btrfs_extent_generation(leaf, ei);
8800 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8802 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8803 disk_bytenr, disk_num_bytes,
8804 BTRFS_EXTENT_FLAG_DATA);
8805 err |= BACKREF_MISMATCH;
8808 if (file_extent_gen < extent_gen) {
8810 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8811 disk_bytenr, disk_num_bytes, file_extent_gen,
8813 err |= BACKREF_MISMATCH;
8816 /* Check data backref inside that extent item */
8817 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8818 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8819 ptr = (unsigned long)iref;
8820 end = (unsigned long)ei + item_size;
8822 iref = (struct btrfs_extent_inline_ref *)ptr;
8823 type = btrfs_extent_inline_ref_type(leaf, iref);
8824 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8826 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8827 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8828 if (ref_root == owner || ref_root == root->objectid)
8830 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8831 found_dbackref = !check_tree_block_ref(root, NULL,
8832 btrfs_extent_inline_ref_offset(leaf, iref),
8838 ptr += btrfs_extent_inline_ref_size(type);
8841 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8842 if (!found_dbackref) {
8843 btrfs_release_path(&path);
8845 btrfs_init_path(&path);
8846 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8847 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8848 dbref_key.offset = hash_extent_data_ref(root->objectid,
8849 fi_key.objectid, fi_key.offset);
8851 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8852 &dbref_key, &path, 0, 0);
8857 if (!found_dbackref)
8858 err |= BACKREF_MISSING;
8860 btrfs_release_path(&path);
8861 if (err & BACKREF_MISSING) {
8862 error("data extent[%llu %llu] backref lost",
8863 disk_bytenr, disk_num_bytes);
8869 * Get real tree block level for the case like shared block
8870 * Return >= 0 as tree level
8871 * Return <0 for error
8873 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8875 struct extent_buffer *eb;
8876 struct btrfs_path path;
8877 struct btrfs_key key;
8878 struct btrfs_extent_item *ei;
8881 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8886 /* Search extent tree for extent generation and level */
8887 key.objectid = bytenr;
8888 key.type = BTRFS_METADATA_ITEM_KEY;
8889 key.offset = (u64)-1;
8891 btrfs_init_path(&path);
8892 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8895 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8903 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8904 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8905 struct btrfs_extent_item);
8906 flags = btrfs_extent_flags(path.nodes[0], ei);
8907 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8912 /* Get transid for later read_tree_block() check */
8913 transid = btrfs_extent_generation(path.nodes[0], ei);
8915 /* Get backref level as one source */
8916 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8917 backref_level = key.offset;
8919 struct btrfs_tree_block_info *info;
8921 info = (struct btrfs_tree_block_info *)(ei + 1);
8922 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8924 btrfs_release_path(&path);
8926 /* Get level from tree block as an alternative source */
8927 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8928 if (!extent_buffer_uptodate(eb)) {
8929 free_extent_buffer(eb);
8932 header_level = btrfs_header_level(eb);
8933 free_extent_buffer(eb);
8935 if (header_level != backref_level)
8937 return header_level;
8940 btrfs_release_path(&path);
8945 * Check if a tree block backref is valid (points to a valid tree block)
8946 * if level == -1, level will be resolved
8947 * Return >0 for any error found and print error message
8949 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8950 u64 bytenr, int level)
8952 struct btrfs_root *root;
8953 struct btrfs_key key;
8954 struct btrfs_path path;
8955 struct extent_buffer *eb;
8956 struct extent_buffer *node;
8957 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8961 /* Query level for level == -1 special case */
8963 level = query_tree_block_level(fs_info, bytenr);
8965 err |= REFERENCER_MISSING;
8969 key.objectid = root_id;
8970 key.type = BTRFS_ROOT_ITEM_KEY;
8971 key.offset = (u64)-1;
8973 root = btrfs_read_fs_root(fs_info, &key);
8975 err |= REFERENCER_MISSING;
8979 /* Read out the tree block to get item/node key */
8980 eb = read_tree_block(root, bytenr, root->nodesize, 0);
8981 if (!extent_buffer_uptodate(eb)) {
8982 err |= REFERENCER_MISSING;
8983 free_extent_buffer(eb);
8987 /* Empty tree, no need to check key */
8988 if (!btrfs_header_nritems(eb) && !level) {
8989 free_extent_buffer(eb);
8994 btrfs_node_key_to_cpu(eb, &key, 0);
8996 btrfs_item_key_to_cpu(eb, &key, 0);
8998 free_extent_buffer(eb);
9000 btrfs_init_path(&path);
9001 /* Search with the first key, to ensure we can reach it */
9002 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9004 err |= REFERENCER_MISSING;
9008 node = path.nodes[level];
9009 if (btrfs_header_bytenr(node) != bytenr) {
9011 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9012 bytenr, nodesize, bytenr,
9013 btrfs_header_bytenr(node));
9014 err |= REFERENCER_MISMATCH;
9016 if (btrfs_header_level(node) != level) {
9018 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9019 bytenr, nodesize, level,
9020 btrfs_header_level(node));
9021 err |= REFERENCER_MISMATCH;
9025 btrfs_release_path(&path);
9027 if (err & REFERENCER_MISSING) {
9029 error("extent [%llu %d] lost referencer (owner: %llu)",
9030 bytenr, nodesize, root_id);
9033 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9034 bytenr, nodesize, root_id, level);
9041 * Check referencer for shared block backref
9042 * If level == -1, this function will resolve the level.
9044 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9045 u64 parent, u64 bytenr, int level)
9047 struct extent_buffer *eb;
9048 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9050 int found_parent = 0;
9053 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9054 if (!extent_buffer_uptodate(eb))
9058 level = query_tree_block_level(fs_info, bytenr);
9062 if (level + 1 != btrfs_header_level(eb))
9065 nr = btrfs_header_nritems(eb);
9066 for (i = 0; i < nr; i++) {
9067 if (bytenr == btrfs_node_blockptr(eb, i)) {
9073 free_extent_buffer(eb);
9074 if (!found_parent) {
9076 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9077 bytenr, nodesize, parent, level);
9078 return REFERENCER_MISSING;
9084 * Check referencer for normal (inlined) data ref
9085 * If len == 0, it will be resolved by searching in extent tree
9087 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9088 u64 root_id, u64 objectid, u64 offset,
9089 u64 bytenr, u64 len, u32 count)
9091 struct btrfs_root *root;
9092 struct btrfs_root *extent_root = fs_info->extent_root;
9093 struct btrfs_key key;
9094 struct btrfs_path path;
9095 struct extent_buffer *leaf;
9096 struct btrfs_file_extent_item *fi;
9097 u32 found_count = 0;
9102 key.objectid = bytenr;
9103 key.type = BTRFS_EXTENT_ITEM_KEY;
9104 key.offset = (u64)-1;
9106 btrfs_init_path(&path);
9107 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9110 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9113 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9114 if (key.objectid != bytenr ||
9115 key.type != BTRFS_EXTENT_ITEM_KEY)
9118 btrfs_release_path(&path);
9120 key.objectid = root_id;
9121 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9122 key.offset = (u64)-1;
9123 btrfs_init_path(&path);
9125 root = btrfs_read_fs_root(fs_info, &key);
9129 key.objectid = objectid;
9130 key.type = BTRFS_EXTENT_DATA_KEY;
9132 * It can be nasty as data backref offset is
9133 * file offset - file extent offset, which is smaller or
9134 * equal to original backref offset. The only special case is
9135 * overflow. So we need to special check and do further search.
9137 key.offset = offset & (1ULL << 63) ? 0 : offset;
9139 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9144 * Search afterwards to get correct one
9145 * NOTE: As we must do a comprehensive check on the data backref to
9146 * make sure the dref count also matches, we must iterate all file
9147 * extents for that inode.
9150 leaf = path.nodes[0];
9151 slot = path.slots[0];
9153 btrfs_item_key_to_cpu(leaf, &key, slot);
9154 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9156 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9158 * Except normal disk bytenr and disk num bytes, we still
9159 * need to do extra check on dbackref offset as
9160 * dbackref offset = file_offset - file_extent_offset
9162 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9163 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9164 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9168 ret = btrfs_next_item(root, &path);
9173 btrfs_release_path(&path);
9174 if (found_count != count) {
9176 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9177 bytenr, len, root_id, objectid, offset, count, found_count);
9178 return REFERENCER_MISSING;
9184 * Check if the referencer of a shared data backref exists
9186 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9187 u64 parent, u64 bytenr)
9189 struct extent_buffer *eb;
9190 struct btrfs_key key;
9191 struct btrfs_file_extent_item *fi;
9192 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9194 int found_parent = 0;
9197 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9198 if (!extent_buffer_uptodate(eb))
9201 nr = btrfs_header_nritems(eb);
9202 for (i = 0; i < nr; i++) {
9203 btrfs_item_key_to_cpu(eb, &key, i);
9204 if (key.type != BTRFS_EXTENT_DATA_KEY)
9207 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9208 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9211 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9218 free_extent_buffer(eb);
9219 if (!found_parent) {
9220 error("shared extent %llu referencer lost (parent: %llu)",
9222 return REFERENCER_MISSING;
9228 * This function will check a given extent item, including its backref and
9229 * itself (like crossing stripe boundary and type)
9231 * Since we don't use extent_record anymore, introduce new error bit
9233 static int check_extent_item(struct btrfs_fs_info *fs_info,
9234 struct extent_buffer *eb, int slot)
9236 struct btrfs_extent_item *ei;
9237 struct btrfs_extent_inline_ref *iref;
9238 struct btrfs_extent_data_ref *dref;
9242 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9243 u32 item_size = btrfs_item_size_nr(eb, slot);
9248 struct btrfs_key key;
9252 btrfs_item_key_to_cpu(eb, &key, slot);
9253 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9254 bytes_used += key.offset;
9256 bytes_used += nodesize;
9258 if (item_size < sizeof(*ei)) {
9260 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9261 * old thing when on disk format is still un-determined.
9262 * No need to care about it anymore
9264 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9268 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9269 flags = btrfs_extent_flags(eb, ei);
9271 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9273 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9274 error("bad metadata [%llu, %llu) crossing stripe boundary",
9275 key.objectid, key.objectid + nodesize);
9276 err |= CROSSING_STRIPE_BOUNDARY;
9279 ptr = (unsigned long)(ei + 1);
9281 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9282 /* Old EXTENT_ITEM metadata */
9283 struct btrfs_tree_block_info *info;
9285 info = (struct btrfs_tree_block_info *)ptr;
9286 level = btrfs_tree_block_level(eb, info);
9287 ptr += sizeof(struct btrfs_tree_block_info);
9289 /* New METADATA_ITEM */
9292 end = (unsigned long)ei + item_size;
9295 err |= ITEM_SIZE_MISMATCH;
9299 /* Now check every backref in this extent item */
9301 iref = (struct btrfs_extent_inline_ref *)ptr;
9302 type = btrfs_extent_inline_ref_type(eb, iref);
9303 offset = btrfs_extent_inline_ref_offset(eb, iref);
9305 case BTRFS_TREE_BLOCK_REF_KEY:
9306 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9310 case BTRFS_SHARED_BLOCK_REF_KEY:
9311 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9315 case BTRFS_EXTENT_DATA_REF_KEY:
9316 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9317 ret = check_extent_data_backref(fs_info,
9318 btrfs_extent_data_ref_root(eb, dref),
9319 btrfs_extent_data_ref_objectid(eb, dref),
9320 btrfs_extent_data_ref_offset(eb, dref),
9321 key.objectid, key.offset,
9322 btrfs_extent_data_ref_count(eb, dref));
9325 case BTRFS_SHARED_DATA_REF_KEY:
9326 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9330 error("extent[%llu %d %llu] has unknown ref type: %d",
9331 key.objectid, key.type, key.offset, type);
9332 err |= UNKNOWN_TYPE;
9336 ptr += btrfs_extent_inline_ref_size(type);
9345 * Check if a dev extent item is referred correctly by its chunk
9347 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9348 struct extent_buffer *eb, int slot)
9350 struct btrfs_root *chunk_root = fs_info->chunk_root;
9351 struct btrfs_dev_extent *ptr;
9352 struct btrfs_path path;
9353 struct btrfs_key chunk_key;
9354 struct btrfs_key devext_key;
9355 struct btrfs_chunk *chunk;
9356 struct extent_buffer *l;
9360 int found_chunk = 0;
9363 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9364 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9365 length = btrfs_dev_extent_length(eb, ptr);
9367 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9368 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9369 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9371 btrfs_init_path(&path);
9372 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9377 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9378 if (btrfs_chunk_length(l, chunk) != length)
9381 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9382 for (i = 0; i < num_stripes; i++) {
9383 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9384 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9386 if (devid == devext_key.objectid &&
9387 offset == devext_key.offset) {
9393 btrfs_release_path(&path);
9396 "device extent[%llu, %llu, %llu] did not find the related chunk",
9397 devext_key.objectid, devext_key.offset, length);
9398 return REFERENCER_MISSING;
9403 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9404 struct btrfs_root *root, int overwrite)
9406 struct extent_buffer *c;
9407 struct extent_buffer *old = root->node;
9410 struct btrfs_disk_key disk_key = {0,0,0};
9416 extent_buffer_get(c);
9419 c = btrfs_alloc_free_block(trans, root,
9421 root->root_key.objectid,
9422 &disk_key, level, 0, 0);
9425 extent_buffer_get(c);
9429 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9430 btrfs_set_header_level(c, level);
9431 btrfs_set_header_bytenr(c, c->start);
9432 btrfs_set_header_generation(c, trans->transid);
9433 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9434 btrfs_set_header_owner(c, root->root_key.objectid);
9436 write_extent_buffer(c, root->fs_info->fsid,
9437 btrfs_header_fsid(), BTRFS_FSID_SIZE);
9439 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9440 btrfs_header_chunk_tree_uuid(c),
9443 btrfs_mark_buffer_dirty(c);
9445 * this case can happen in the following case:
9447 * 1.overwrite previous root.
9449 * 2.reinit reloc data root, this is because we skip pin
9450 * down reloc data tree before which means we can allocate
9451 * same block bytenr here.
9453 if (old->start == c->start) {
9454 btrfs_set_root_generation(&root->root_item,
9456 root->root_item.level = btrfs_header_level(root->node);
9457 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9458 &root->root_key, &root->root_item);
9460 free_extent_buffer(c);
9464 free_extent_buffer(old);
9466 add_root_to_dirty_list(root);
9470 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9471 struct extent_buffer *eb, int tree_root)
9473 struct extent_buffer *tmp;
9474 struct btrfs_root_item *ri;
9475 struct btrfs_key key;
9478 int level = btrfs_header_level(eb);
9484 * If we have pinned this block before, don't pin it again.
9485 * This can not only avoid forever loop with broken filesystem
9486 * but also give us some speedups.
9488 if (test_range_bit(&fs_info->pinned_extents, eb->start,
9489 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9492 btrfs_pin_extent(fs_info, eb->start, eb->len);
9494 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9495 nritems = btrfs_header_nritems(eb);
9496 for (i = 0; i < nritems; i++) {
9498 btrfs_item_key_to_cpu(eb, &key, i);
9499 if (key.type != BTRFS_ROOT_ITEM_KEY)
9501 /* Skip the extent root and reloc roots */
9502 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9503 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9504 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9506 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9507 bytenr = btrfs_disk_root_bytenr(eb, ri);
9510 * If at any point we start needing the real root we
9511 * will have to build a stump root for the root we are
9512 * in, but for now this doesn't actually use the root so
9513 * just pass in extent_root.
9515 tmp = read_tree_block(fs_info->extent_root, bytenr,
9517 if (!extent_buffer_uptodate(tmp)) {
9518 fprintf(stderr, "Error reading root block\n");
9521 ret = pin_down_tree_blocks(fs_info, tmp, 0);
9522 free_extent_buffer(tmp);
9526 bytenr = btrfs_node_blockptr(eb, i);
9528 /* If we aren't the tree root don't read the block */
9529 if (level == 1 && !tree_root) {
9530 btrfs_pin_extent(fs_info, bytenr, nodesize);
9534 tmp = read_tree_block(fs_info->extent_root, bytenr,
9536 if (!extent_buffer_uptodate(tmp)) {
9537 fprintf(stderr, "Error reading tree block\n");
9540 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9541 free_extent_buffer(tmp);
9550 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9554 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9558 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9561 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9563 struct btrfs_block_group_cache *cache;
9564 struct btrfs_path *path;
9565 struct extent_buffer *leaf;
9566 struct btrfs_chunk *chunk;
9567 struct btrfs_key key;
9571 path = btrfs_alloc_path();
9576 key.type = BTRFS_CHUNK_ITEM_KEY;
9579 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9581 btrfs_free_path(path);
9586 * We do this in case the block groups were screwed up and had alloc
9587 * bits that aren't actually set on the chunks. This happens with
9588 * restored images every time and could happen in real life I guess.
9590 fs_info->avail_data_alloc_bits = 0;
9591 fs_info->avail_metadata_alloc_bits = 0;
9592 fs_info->avail_system_alloc_bits = 0;
9594 /* First we need to create the in-memory block groups */
9596 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9597 ret = btrfs_next_leaf(fs_info->chunk_root, path);
9599 btrfs_free_path(path);
9607 leaf = path->nodes[0];
9608 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9609 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9614 chunk = btrfs_item_ptr(leaf, path->slots[0],
9615 struct btrfs_chunk);
9616 btrfs_add_block_group(fs_info, 0,
9617 btrfs_chunk_type(leaf, chunk),
9618 key.objectid, key.offset,
9619 btrfs_chunk_length(leaf, chunk));
9620 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9621 key.offset + btrfs_chunk_length(leaf, chunk),
9627 cache = btrfs_lookup_first_block_group(fs_info, start);
9631 start = cache->key.objectid + cache->key.offset;
9634 btrfs_free_path(path);
9638 static int reset_balance(struct btrfs_trans_handle *trans,
9639 struct btrfs_fs_info *fs_info)
9641 struct btrfs_root *root = fs_info->tree_root;
9642 struct btrfs_path *path;
9643 struct extent_buffer *leaf;
9644 struct btrfs_key key;
9645 int del_slot, del_nr = 0;
9649 path = btrfs_alloc_path();
9653 key.objectid = BTRFS_BALANCE_OBJECTID;
9654 key.type = BTRFS_BALANCE_ITEM_KEY;
9657 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9662 goto reinit_data_reloc;
9667 ret = btrfs_del_item(trans, root, path);
9670 btrfs_release_path(path);
9672 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9673 key.type = BTRFS_ROOT_ITEM_KEY;
9676 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9680 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9685 ret = btrfs_del_items(trans, root, path,
9692 btrfs_release_path(path);
9695 ret = btrfs_search_slot(trans, root, &key, path,
9702 leaf = path->nodes[0];
9703 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9704 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9706 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9711 del_slot = path->slots[0];
9720 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9724 btrfs_release_path(path);
9727 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9728 key.type = BTRFS_ROOT_ITEM_KEY;
9729 key.offset = (u64)-1;
9730 root = btrfs_read_fs_root(fs_info, &key);
9732 fprintf(stderr, "Error reading data reloc tree\n");
9733 ret = PTR_ERR(root);
9736 record_root_in_trans(trans, root);
9737 ret = btrfs_fsck_reinit_root(trans, root, 0);
9740 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9742 btrfs_free_path(path);
9746 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9747 struct btrfs_fs_info *fs_info)
9753 * The only reason we don't do this is because right now we're just
9754 * walking the trees we find and pinning down their bytes, we don't look
9755 * at any of the leaves. In order to do mixed groups we'd have to check
9756 * the leaves of any fs roots and pin down the bytes for any file
9757 * extents we find. Not hard but why do it if we don't have to?
9759 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9760 fprintf(stderr, "We don't support re-initing the extent tree "
9761 "for mixed block groups yet, please notify a btrfs "
9762 "developer you want to do this so they can add this "
9763 "functionality.\n");
9768 * first we need to walk all of the trees except the extent tree and pin
9769 * down the bytes that are in use so we don't overwrite any existing
9772 ret = pin_metadata_blocks(fs_info);
9774 fprintf(stderr, "error pinning down used bytes\n");
9779 * Need to drop all the block groups since we're going to recreate all
9782 btrfs_free_block_groups(fs_info);
9783 ret = reset_block_groups(fs_info);
9785 fprintf(stderr, "error resetting the block groups\n");
9789 /* Ok we can allocate now, reinit the extent root */
9790 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9792 fprintf(stderr, "extent root initialization failed\n");
9794 * When the transaction code is updated we should end the
9795 * transaction, but for now progs only knows about commit so
9796 * just return an error.
9802 * Now we have all the in-memory block groups setup so we can make
9803 * allocations properly, and the metadata we care about is safe since we
9804 * pinned all of it above.
9807 struct btrfs_block_group_cache *cache;
9809 cache = btrfs_lookup_first_block_group(fs_info, start);
9812 start = cache->key.objectid + cache->key.offset;
9813 ret = btrfs_insert_item(trans, fs_info->extent_root,
9814 &cache->key, &cache->item,
9815 sizeof(cache->item));
9817 fprintf(stderr, "Error adding block group\n");
9820 btrfs_extent_post_op(trans, fs_info->extent_root);
9823 ret = reset_balance(trans, fs_info);
9825 fprintf(stderr, "error resetting the pending balance\n");
9830 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9832 struct btrfs_path *path;
9833 struct btrfs_trans_handle *trans;
9834 struct btrfs_key key;
9837 printf("Recowing metadata block %llu\n", eb->start);
9838 key.objectid = btrfs_header_owner(eb);
9839 key.type = BTRFS_ROOT_ITEM_KEY;
9840 key.offset = (u64)-1;
9842 root = btrfs_read_fs_root(root->fs_info, &key);
9844 fprintf(stderr, "Couldn't find owner root %llu\n",
9846 return PTR_ERR(root);
9849 path = btrfs_alloc_path();
9853 trans = btrfs_start_transaction(root, 1);
9854 if (IS_ERR(trans)) {
9855 btrfs_free_path(path);
9856 return PTR_ERR(trans);
9859 path->lowest_level = btrfs_header_level(eb);
9860 if (path->lowest_level)
9861 btrfs_node_key_to_cpu(eb, &key, 0);
9863 btrfs_item_key_to_cpu(eb, &key, 0);
9865 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9866 btrfs_commit_transaction(trans, root);
9867 btrfs_free_path(path);
9871 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9873 struct btrfs_path *path;
9874 struct btrfs_trans_handle *trans;
9875 struct btrfs_key key;
9878 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9879 bad->key.type, bad->key.offset);
9880 key.objectid = bad->root_id;
9881 key.type = BTRFS_ROOT_ITEM_KEY;
9882 key.offset = (u64)-1;
9884 root = btrfs_read_fs_root(root->fs_info, &key);
9886 fprintf(stderr, "Couldn't find owner root %llu\n",
9888 return PTR_ERR(root);
9891 path = btrfs_alloc_path();
9895 trans = btrfs_start_transaction(root, 1);
9896 if (IS_ERR(trans)) {
9897 btrfs_free_path(path);
9898 return PTR_ERR(trans);
9901 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9907 ret = btrfs_del_item(trans, root, path);
9909 btrfs_commit_transaction(trans, root);
9910 btrfs_free_path(path);
9914 static int zero_log_tree(struct btrfs_root *root)
9916 struct btrfs_trans_handle *trans;
9919 trans = btrfs_start_transaction(root, 1);
9920 if (IS_ERR(trans)) {
9921 ret = PTR_ERR(trans);
9924 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9925 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9926 ret = btrfs_commit_transaction(trans, root);
9930 static int populate_csum(struct btrfs_trans_handle *trans,
9931 struct btrfs_root *csum_root, char *buf, u64 start,
9938 while (offset < len) {
9939 sectorsize = csum_root->sectorsize;
9940 ret = read_extent_data(csum_root, buf, start + offset,
9944 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9945 start + offset, buf, sectorsize);
9948 offset += sectorsize;
9953 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9954 struct btrfs_root *csum_root,
9955 struct btrfs_root *cur_root)
9957 struct btrfs_path *path;
9958 struct btrfs_key key;
9959 struct extent_buffer *node;
9960 struct btrfs_file_extent_item *fi;
9967 path = btrfs_alloc_path();
9970 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9980 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9983 /* Iterate all regular file extents and fill its csum */
9985 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9987 if (key.type != BTRFS_EXTENT_DATA_KEY)
9989 node = path->nodes[0];
9990 slot = path->slots[0];
9991 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9992 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9994 start = btrfs_file_extent_disk_bytenr(node, fi);
9995 len = btrfs_file_extent_disk_num_bytes(node, fi);
9997 ret = populate_csum(trans, csum_root, buf, start, len);
10004 * TODO: if next leaf is corrupted, jump to nearest next valid
10007 ret = btrfs_next_item(cur_root, path);
10017 btrfs_free_path(path);
10022 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10023 struct btrfs_root *csum_root)
10025 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10026 struct btrfs_path *path;
10027 struct btrfs_root *tree_root = fs_info->tree_root;
10028 struct btrfs_root *cur_root;
10029 struct extent_buffer *node;
10030 struct btrfs_key key;
10034 path = btrfs_alloc_path();
10038 key.objectid = BTRFS_FS_TREE_OBJECTID;
10040 key.type = BTRFS_ROOT_ITEM_KEY;
10042 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10051 node = path->nodes[0];
10052 slot = path->slots[0];
10053 btrfs_item_key_to_cpu(node, &key, slot);
10054 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10056 if (key.type != BTRFS_ROOT_ITEM_KEY)
10058 if (!is_fstree(key.objectid))
10060 key.offset = (u64)-1;
10062 cur_root = btrfs_read_fs_root(fs_info, &key);
10063 if (IS_ERR(cur_root) || !cur_root) {
10064 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10068 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10073 ret = btrfs_next_item(tree_root, path);
10083 btrfs_free_path(path);
10087 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10088 struct btrfs_root *csum_root)
10090 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10091 struct btrfs_path *path;
10092 struct btrfs_extent_item *ei;
10093 struct extent_buffer *leaf;
10095 struct btrfs_key key;
10098 path = btrfs_alloc_path();
10103 key.type = BTRFS_EXTENT_ITEM_KEY;
10106 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10108 btrfs_free_path(path);
10112 buf = malloc(csum_root->sectorsize);
10114 btrfs_free_path(path);
10119 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10120 ret = btrfs_next_leaf(extent_root, path);
10128 leaf = path->nodes[0];
10130 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10131 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10136 ei = btrfs_item_ptr(leaf, path->slots[0],
10137 struct btrfs_extent_item);
10138 if (!(btrfs_extent_flags(leaf, ei) &
10139 BTRFS_EXTENT_FLAG_DATA)) {
10144 ret = populate_csum(trans, csum_root, buf, key.objectid,
10151 btrfs_free_path(path);
10157 * Recalculate the csum and put it into the csum tree.
10159 * Extent tree init will wipe out all the extent info, so in that case, we
10160 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10161 * will use fs/subvol trees to init the csum tree.
10163 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10164 struct btrfs_root *csum_root,
10165 int search_fs_tree)
10167 if (search_fs_tree)
10168 return fill_csum_tree_from_fs(trans, csum_root);
10170 return fill_csum_tree_from_extent(trans, csum_root);
10173 static void free_roots_info_cache(void)
10175 if (!roots_info_cache)
10178 while (!cache_tree_empty(roots_info_cache)) {
10179 struct cache_extent *entry;
10180 struct root_item_info *rii;
10182 entry = first_cache_extent(roots_info_cache);
10185 remove_cache_extent(roots_info_cache, entry);
10186 rii = container_of(entry, struct root_item_info, cache_extent);
10190 free(roots_info_cache);
10191 roots_info_cache = NULL;
10194 static int build_roots_info_cache(struct btrfs_fs_info *info)
10197 struct btrfs_key key;
10198 struct extent_buffer *leaf;
10199 struct btrfs_path *path;
10201 if (!roots_info_cache) {
10202 roots_info_cache = malloc(sizeof(*roots_info_cache));
10203 if (!roots_info_cache)
10205 cache_tree_init(roots_info_cache);
10208 path = btrfs_alloc_path();
10213 key.type = BTRFS_EXTENT_ITEM_KEY;
10216 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10219 leaf = path->nodes[0];
10222 struct btrfs_key found_key;
10223 struct btrfs_extent_item *ei;
10224 struct btrfs_extent_inline_ref *iref;
10225 int slot = path->slots[0];
10230 struct cache_extent *entry;
10231 struct root_item_info *rii;
10233 if (slot >= btrfs_header_nritems(leaf)) {
10234 ret = btrfs_next_leaf(info->extent_root, path);
10241 leaf = path->nodes[0];
10242 slot = path->slots[0];
10245 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10247 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10248 found_key.type != BTRFS_METADATA_ITEM_KEY)
10251 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10252 flags = btrfs_extent_flags(leaf, ei);
10254 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10255 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10258 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10259 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10260 level = found_key.offset;
10262 struct btrfs_tree_block_info *binfo;
10264 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10265 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10266 level = btrfs_tree_block_level(leaf, binfo);
10270 * For a root extent, it must be of the following type and the
10271 * first (and only one) iref in the item.
10273 type = btrfs_extent_inline_ref_type(leaf, iref);
10274 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10277 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10278 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10280 rii = malloc(sizeof(struct root_item_info));
10285 rii->cache_extent.start = root_id;
10286 rii->cache_extent.size = 1;
10287 rii->level = (u8)-1;
10288 entry = &rii->cache_extent;
10289 ret = insert_cache_extent(roots_info_cache, entry);
10292 rii = container_of(entry, struct root_item_info,
10296 ASSERT(rii->cache_extent.start == root_id);
10297 ASSERT(rii->cache_extent.size == 1);
10299 if (level > rii->level || rii->level == (u8)-1) {
10300 rii->level = level;
10301 rii->bytenr = found_key.objectid;
10302 rii->gen = btrfs_extent_generation(leaf, ei);
10303 rii->node_count = 1;
10304 } else if (level == rii->level) {
10312 btrfs_free_path(path);
10317 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10318 struct btrfs_path *path,
10319 const struct btrfs_key *root_key,
10320 const int read_only_mode)
10322 const u64 root_id = root_key->objectid;
10323 struct cache_extent *entry;
10324 struct root_item_info *rii;
10325 struct btrfs_root_item ri;
10326 unsigned long offset;
10328 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10331 "Error: could not find extent items for root %llu\n",
10332 root_key->objectid);
10336 rii = container_of(entry, struct root_item_info, cache_extent);
10337 ASSERT(rii->cache_extent.start == root_id);
10338 ASSERT(rii->cache_extent.size == 1);
10340 if (rii->node_count != 1) {
10342 "Error: could not find btree root extent for root %llu\n",
10347 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10348 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10350 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10351 btrfs_root_level(&ri) != rii->level ||
10352 btrfs_root_generation(&ri) != rii->gen) {
10355 * If we're in repair mode but our caller told us to not update
10356 * the root item, i.e. just check if it needs to be updated, don't
10357 * print this message, since the caller will call us again shortly
10358 * for the same root item without read only mode (the caller will
10359 * open a transaction first).
10361 if (!(read_only_mode && repair))
10363 "%sroot item for root %llu,"
10364 " current bytenr %llu, current gen %llu, current level %u,"
10365 " new bytenr %llu, new gen %llu, new level %u\n",
10366 (read_only_mode ? "" : "fixing "),
10368 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10369 btrfs_root_level(&ri),
10370 rii->bytenr, rii->gen, rii->level);
10372 if (btrfs_root_generation(&ri) > rii->gen) {
10374 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10375 root_id, btrfs_root_generation(&ri), rii->gen);
10379 if (!read_only_mode) {
10380 btrfs_set_root_bytenr(&ri, rii->bytenr);
10381 btrfs_set_root_level(&ri, rii->level);
10382 btrfs_set_root_generation(&ri, rii->gen);
10383 write_extent_buffer(path->nodes[0], &ri,
10384 offset, sizeof(ri));
10394 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10395 * caused read-only snapshots to be corrupted if they were created at a moment
10396 * when the source subvolume/snapshot had orphan items. The issue was that the
10397 * on-disk root items became incorrect, referring to the pre orphan cleanup root
10398 * node instead of the post orphan cleanup root node.
10399 * So this function, and its callees, just detects and fixes those cases. Even
10400 * though the regression was for read-only snapshots, this function applies to
10401 * any snapshot/subvolume root.
10402 * This must be run before any other repair code - not doing it so, makes other
10403 * repair code delete or modify backrefs in the extent tree for example, which
10404 * will result in an inconsistent fs after repairing the root items.
10406 static int repair_root_items(struct btrfs_fs_info *info)
10408 struct btrfs_path *path = NULL;
10409 struct btrfs_key key;
10410 struct extent_buffer *leaf;
10411 struct btrfs_trans_handle *trans = NULL;
10414 int need_trans = 0;
10416 ret = build_roots_info_cache(info);
10420 path = btrfs_alloc_path();
10426 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10427 key.type = BTRFS_ROOT_ITEM_KEY;
10432 * Avoid opening and committing transactions if a leaf doesn't have
10433 * any root items that need to be fixed, so that we avoid rotating
10434 * backup roots unnecessarily.
10437 trans = btrfs_start_transaction(info->tree_root, 1);
10438 if (IS_ERR(trans)) {
10439 ret = PTR_ERR(trans);
10444 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10448 leaf = path->nodes[0];
10451 struct btrfs_key found_key;
10453 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10454 int no_more_keys = find_next_key(path, &key);
10456 btrfs_release_path(path);
10458 ret = btrfs_commit_transaction(trans,
10470 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10472 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10474 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10477 ret = maybe_repair_root_item(info, path, &found_key,
10482 if (!trans && repair) {
10485 btrfs_release_path(path);
10495 free_roots_info_cache();
10496 btrfs_free_path(path);
10498 btrfs_commit_transaction(trans, info->tree_root);
10505 const char * const cmd_check_usage[] = {
10506 "btrfs check [options] <device>",
10507 "Check structural integrity of a filesystem (unmounted).",
10508 "Check structural integrity of an unmounted filesystem. Verify internal",
10509 "trees' consistency and item connectivity. In the repair mode try to",
10510 "fix the problems found.",
10511 "WARNING: the repair mode is considered dangerous",
10513 "-s|--super <superblock> use this superblock copy",
10514 "-b|--backup use the first valid backup root copy",
10515 "--repair try to repair the filesystem",
10516 "--readonly run in read-only mode (default)",
10517 "--init-csum-tree create a new CRC tree",
10518 "--init-extent-tree create a new extent tree",
10519 "--check-data-csum verify checksums of data blocks",
10520 "-Q|--qgroup-report print a report on qgroup consistency",
10521 "-E|--subvol-extents <subvolid>",
10522 " print subvolume extents and sharing state",
10523 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
10524 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
10525 "-p|--progress indicate progress",
10529 int cmd_check(int argc, char **argv)
10531 struct cache_tree root_cache;
10532 struct btrfs_root *root;
10533 struct btrfs_fs_info *info;
10536 u64 tree_root_bytenr = 0;
10537 u64 chunk_root_bytenr = 0;
10538 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10541 int init_csum_tree = 0;
10543 int qgroup_report = 0;
10544 int qgroups_repaired = 0;
10545 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10549 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10550 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10551 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10552 static const struct option long_options[] = {
10553 { "super", required_argument, NULL, 's' },
10554 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10555 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10556 { "init-csum-tree", no_argument, NULL,
10557 GETOPT_VAL_INIT_CSUM },
10558 { "init-extent-tree", no_argument, NULL,
10559 GETOPT_VAL_INIT_EXTENT },
10560 { "check-data-csum", no_argument, NULL,
10561 GETOPT_VAL_CHECK_CSUM },
10562 { "backup", no_argument, NULL, 'b' },
10563 { "subvol-extents", required_argument, NULL, 'E' },
10564 { "qgroup-report", no_argument, NULL, 'Q' },
10565 { "tree-root", required_argument, NULL, 'r' },
10566 { "chunk-root", required_argument, NULL,
10567 GETOPT_VAL_CHUNK_TREE },
10568 { "progress", no_argument, NULL, 'p' },
10569 { NULL, 0, NULL, 0}
10572 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10576 case 'a': /* ignored */ break;
10578 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10581 num = arg_strtou64(optarg);
10582 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10584 "ERROR: super mirror should be less than: %d\n",
10585 BTRFS_SUPER_MIRROR_MAX);
10588 bytenr = btrfs_sb_offset(((int)num));
10589 printf("using SB copy %llu, bytenr %llu\n", num,
10590 (unsigned long long)bytenr);
10596 subvolid = arg_strtou64(optarg);
10599 tree_root_bytenr = arg_strtou64(optarg);
10601 case GETOPT_VAL_CHUNK_TREE:
10602 chunk_root_bytenr = arg_strtou64(optarg);
10605 ctx.progress_enabled = true;
10609 usage(cmd_check_usage);
10610 case GETOPT_VAL_REPAIR:
10611 printf("enabling repair mode\n");
10613 ctree_flags |= OPEN_CTREE_WRITES;
10615 case GETOPT_VAL_READONLY:
10618 case GETOPT_VAL_INIT_CSUM:
10619 printf("Creating a new CRC tree\n");
10620 init_csum_tree = 1;
10622 ctree_flags |= OPEN_CTREE_WRITES;
10624 case GETOPT_VAL_INIT_EXTENT:
10625 init_extent_tree = 1;
10626 ctree_flags |= (OPEN_CTREE_WRITES |
10627 OPEN_CTREE_NO_BLOCK_GROUPS);
10630 case GETOPT_VAL_CHECK_CSUM:
10631 check_data_csum = 1;
10636 if (check_argc_exact(argc - optind, 1))
10637 usage(cmd_check_usage);
10639 if (ctx.progress_enabled) {
10640 ctx.tp = TASK_NOTHING;
10641 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10644 /* This check is the only reason for --readonly to exist */
10645 if (readonly && repair) {
10646 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10651 cache_tree_init(&root_cache);
10653 if((ret = check_mounted(argv[optind])) < 0) {
10654 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10657 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10662 /* only allow partial opening under repair mode */
10664 ctree_flags |= OPEN_CTREE_PARTIAL;
10666 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10667 chunk_root_bytenr, ctree_flags);
10669 fprintf(stderr, "Couldn't open file system\n");
10674 global_info = info;
10675 root = info->fs_root;
10678 * repair mode will force us to commit transaction which
10679 * will make us fail to load log tree when mounting.
10681 if (repair && btrfs_super_log_root(info->super_copy)) {
10682 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10687 ret = zero_log_tree(root);
10689 fprintf(stderr, "fail to zero log tree\n");
10694 uuid_unparse(info->super_copy->fsid, uuidbuf);
10695 if (qgroup_report) {
10696 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10698 ret = qgroup_verify_all(info);
10704 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10705 subvolid, argv[optind], uuidbuf);
10706 ret = print_extent_state(info, subvolid);
10709 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10711 if (!extent_buffer_uptodate(info->tree_root->node) ||
10712 !extent_buffer_uptodate(info->dev_root->node) ||
10713 !extent_buffer_uptodate(info->chunk_root->node)) {
10714 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10719 if (init_extent_tree || init_csum_tree) {
10720 struct btrfs_trans_handle *trans;
10722 trans = btrfs_start_transaction(info->extent_root, 0);
10723 if (IS_ERR(trans)) {
10724 fprintf(stderr, "Error starting transaction\n");
10725 ret = PTR_ERR(trans);
10729 if (init_extent_tree) {
10730 printf("Creating a new extent tree\n");
10731 ret = reinit_extent_tree(trans, info);
10736 if (init_csum_tree) {
10737 fprintf(stderr, "Reinit crc root\n");
10738 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10740 fprintf(stderr, "crc root initialization failed\n");
10745 ret = fill_csum_tree(trans, info->csum_root,
10748 fprintf(stderr, "crc refilling failed\n");
10753 * Ok now we commit and run the normal fsck, which will add
10754 * extent entries for all of the items it finds.
10756 ret = btrfs_commit_transaction(trans, info->extent_root);
10760 if (!extent_buffer_uptodate(info->extent_root->node)) {
10761 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10765 if (!extent_buffer_uptodate(info->csum_root->node)) {
10766 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10771 if (!ctx.progress_enabled)
10772 fprintf(stderr, "checking extents\n");
10773 ret = check_chunks_and_extents(root);
10775 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10777 ret = repair_root_items(info);
10781 fprintf(stderr, "Fixed %d roots.\n", ret);
10783 } else if (ret > 0) {
10785 "Found %d roots with an outdated root item.\n",
10788 "Please run a filesystem check with the option --repair to fix them.\n");
10793 if (!ctx.progress_enabled) {
10794 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10795 fprintf(stderr, "checking free space tree\n");
10797 fprintf(stderr, "checking free space cache\n");
10799 ret = check_space_cache(root);
10804 * We used to have to have these hole extents in between our real
10805 * extents so if we don't have this flag set we need to make sure there
10806 * are no gaps in the file extents for inodes, otherwise we can just
10807 * ignore it when this happens.
10809 no_holes = btrfs_fs_incompat(root->fs_info,
10810 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10811 if (!ctx.progress_enabled)
10812 fprintf(stderr, "checking fs roots\n");
10813 ret = check_fs_roots(root, &root_cache);
10817 fprintf(stderr, "checking csums\n");
10818 ret = check_csums(root);
10822 fprintf(stderr, "checking root refs\n");
10823 ret = check_root_refs(root, &root_cache);
10827 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10828 struct extent_buffer *eb;
10830 eb = list_first_entry(&root->fs_info->recow_ebs,
10831 struct extent_buffer, recow);
10832 list_del_init(&eb->recow);
10833 ret = recow_extent_buffer(root, eb);
10838 while (!list_empty(&delete_items)) {
10839 struct bad_item *bad;
10841 bad = list_first_entry(&delete_items, struct bad_item, list);
10842 list_del_init(&bad->list);
10844 ret = delete_bad_item(root, bad);
10848 if (info->quota_enabled) {
10850 fprintf(stderr, "checking quota groups\n");
10851 err = qgroup_verify_all(info);
10855 err = repair_qgroups(info, &qgroups_repaired);
10860 if (!list_empty(&root->fs_info->recow_ebs)) {
10861 fprintf(stderr, "Transid errors in file system\n");
10865 /* Don't override original ret */
10866 if (!ret && qgroups_repaired)
10867 ret = qgroups_repaired;
10869 if (found_old_backref) { /*
10870 * there was a disk format change when mixed
10871 * backref was in testing tree. The old format
10872 * existed about one week.
10874 printf("\n * Found old mixed backref format. "
10875 "The old format is not supported! *"
10876 "\n * Please mount the FS in readonly mode, "
10877 "backup data and re-format the FS. *\n\n");
10880 printf("found %llu bytes used err is %d\n",
10881 (unsigned long long)bytes_used, ret);
10882 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10883 printf("total tree bytes: %llu\n",
10884 (unsigned long long)total_btree_bytes);
10885 printf("total fs tree bytes: %llu\n",
10886 (unsigned long long)total_fs_tree_bytes);
10887 printf("total extent tree bytes: %llu\n",
10888 (unsigned long long)total_extent_tree_bytes);
10889 printf("btree space waste bytes: %llu\n",
10890 (unsigned long long)btree_space_waste);
10891 printf("file data blocks allocated: %llu\n referenced %llu\n",
10892 (unsigned long long)data_bytes_allocated,
10893 (unsigned long long)data_bytes_referenced);
10895 free_qgroup_counts();
10896 free_root_recs_tree(&root_cache);
10900 if (ctx.progress_enabled)
10901 task_deinit(ctx.info);