2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 struct extent_backref {
80 unsigned int is_data:1;
81 unsigned int found_extent_tree:1;
82 unsigned int full_backref:1;
83 unsigned int found_ref:1;
84 unsigned int broken:1;
87 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
89 return rb_entry(node, struct extent_backref, node);
93 struct extent_backref node;
107 static inline struct data_backref* to_data_backref(struct extent_backref *back)
109 return container_of(back, struct data_backref, node);
112 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
114 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
115 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
116 struct data_backref *back1 = to_data_backref(ext1);
117 struct data_backref *back2 = to_data_backref(ext2);
119 WARN_ON(!ext1->is_data);
120 WARN_ON(!ext2->is_data);
122 /* parent and root are a union, so this covers both */
123 if (back1->parent > back2->parent)
125 if (back1->parent < back2->parent)
128 /* This is a full backref and the parents match. */
129 if (back1->node.full_backref)
132 if (back1->owner > back2->owner)
134 if (back1->owner < back2->owner)
137 if (back1->offset > back2->offset)
139 if (back1->offset < back2->offset)
142 if (back1->bytes > back2->bytes)
144 if (back1->bytes < back2->bytes)
147 if (back1->found_ref && back2->found_ref) {
148 if (back1->disk_bytenr > back2->disk_bytenr)
150 if (back1->disk_bytenr < back2->disk_bytenr)
153 if (back1->found_ref > back2->found_ref)
155 if (back1->found_ref < back2->found_ref)
163 * Much like data_backref, just removed the undetermined members
164 * and change it to use list_head.
165 * During extent scan, it is stored in root->orphan_data_extent.
166 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
168 struct orphan_data_extent {
169 struct list_head list;
177 struct tree_backref {
178 struct extent_backref node;
185 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
187 return container_of(back, struct tree_backref, node);
190 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
192 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
193 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
194 struct tree_backref *back1 = to_tree_backref(ext1);
195 struct tree_backref *back2 = to_tree_backref(ext2);
197 WARN_ON(ext1->is_data);
198 WARN_ON(ext2->is_data);
200 /* parent and root are a union, so this covers both */
201 if (back1->parent > back2->parent)
203 if (back1->parent < back2->parent)
209 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
211 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
212 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
214 if (ext1->is_data > ext2->is_data)
217 if (ext1->is_data < ext2->is_data)
220 if (ext1->full_backref > ext2->full_backref)
222 if (ext1->full_backref < ext2->full_backref)
226 return compare_data_backref(node1, node2);
228 return compare_tree_backref(node1, node2);
231 /* Explicit initialization for extent_record::flag_block_full_backref */
232 enum { FLAG_UNSET = 2 };
234 struct extent_record {
235 struct list_head backrefs;
236 struct list_head dups;
237 struct rb_root backref_tree;
238 struct list_head list;
239 struct cache_extent cache;
240 struct btrfs_disk_key parent_key;
245 u64 extent_item_refs;
247 u64 parent_generation;
251 unsigned int flag_block_full_backref:2;
252 unsigned int found_rec:1;
253 unsigned int content_checked:1;
254 unsigned int owner_ref_checked:1;
255 unsigned int is_root:1;
256 unsigned int metadata:1;
257 unsigned int bad_full_backref:1;
258 unsigned int crossing_stripes:1;
259 unsigned int wrong_chunk_type:1;
262 static inline struct extent_record* to_extent_record(struct list_head *entry)
264 return container_of(entry, struct extent_record, list);
267 struct inode_backref {
268 struct list_head list;
269 unsigned int found_dir_item:1;
270 unsigned int found_dir_index:1;
271 unsigned int found_inode_ref:1;
272 unsigned int filetype:8;
274 unsigned int ref_type;
281 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
283 return list_entry(entry, struct inode_backref, list);
286 struct root_item_record {
287 struct list_head list;
294 struct btrfs_key drop_key;
297 #define REF_ERR_NO_DIR_ITEM (1 << 0)
298 #define REF_ERR_NO_DIR_INDEX (1 << 1)
299 #define REF_ERR_NO_INODE_REF (1 << 2)
300 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
301 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
302 #define REF_ERR_DUP_INODE_REF (1 << 5)
303 #define REF_ERR_INDEX_UNMATCH (1 << 6)
304 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
305 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
306 #define REF_ERR_NO_ROOT_REF (1 << 9)
307 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
308 #define REF_ERR_DUP_ROOT_REF (1 << 11)
309 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
311 struct file_extent_hole {
317 struct inode_record {
318 struct list_head backrefs;
319 unsigned int checked:1;
320 unsigned int merging:1;
321 unsigned int found_inode_item:1;
322 unsigned int found_dir_item:1;
323 unsigned int found_file_extent:1;
324 unsigned int found_csum_item:1;
325 unsigned int some_csum_missing:1;
326 unsigned int nodatasum:1;
339 struct rb_root holes;
340 struct list_head orphan_extents;
345 #define I_ERR_NO_INODE_ITEM (1 << 0)
346 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
347 #define I_ERR_DUP_INODE_ITEM (1 << 2)
348 #define I_ERR_DUP_DIR_INDEX (1 << 3)
349 #define I_ERR_ODD_DIR_ITEM (1 << 4)
350 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
351 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
352 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
353 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
354 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
355 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
356 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
357 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
358 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
359 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
361 struct root_backref {
362 struct list_head list;
363 unsigned int found_dir_item:1;
364 unsigned int found_dir_index:1;
365 unsigned int found_back_ref:1;
366 unsigned int found_forward_ref:1;
367 unsigned int reachable:1;
376 static inline struct root_backref* to_root_backref(struct list_head *entry)
378 return list_entry(entry, struct root_backref, list);
382 struct list_head backrefs;
383 struct cache_extent cache;
384 unsigned int found_root_item:1;
390 struct cache_extent cache;
395 struct cache_extent cache;
396 struct cache_tree root_cache;
397 struct cache_tree inode_cache;
398 struct inode_record *current;
407 struct walk_control {
408 struct cache_tree shared;
409 struct shared_node *nodes[BTRFS_MAX_LEVEL];
415 struct btrfs_key key;
417 struct list_head list;
420 struct extent_entry {
425 struct list_head list;
428 struct root_item_info {
429 /* level of the root */
431 /* number of nodes at this level, must be 1 for a root */
435 struct cache_extent cache_extent;
438 static void *print_status_check(void *p)
440 struct task_ctx *priv = p;
441 const char work_indicator[] = { '.', 'o', 'O', 'o' };
443 static char *task_position_string[] = {
445 "checking free space cache",
449 task_period_start(priv->info, 1000 /* 1s */);
451 if (priv->tp == TASK_NOTHING)
455 printf("%s [%c]\r", task_position_string[priv->tp],
456 work_indicator[count % 4]);
459 task_period_wait(priv->info);
464 static int print_status_return(void *p)
472 /* Compatible function to allow reuse of old codes */
473 static u64 first_extent_gap(struct rb_root *holes)
475 struct file_extent_hole *hole;
477 if (RB_EMPTY_ROOT(holes))
480 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
484 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
486 struct file_extent_hole *hole1;
487 struct file_extent_hole *hole2;
489 hole1 = rb_entry(node1, struct file_extent_hole, node);
490 hole2 = rb_entry(node2, struct file_extent_hole, node);
492 if (hole1->start > hole2->start)
494 if (hole1->start < hole2->start)
496 /* Now hole1->start == hole2->start */
497 if (hole1->len >= hole2->len)
499 * Hole 1 will be merge center
500 * Same hole will be merged later
503 /* Hole 2 will be merge center */
508 * Add a hole to the record
510 * This will do hole merge for copy_file_extent_holes(),
511 * which will ensure there won't be continuous holes.
513 static int add_file_extent_hole(struct rb_root *holes,
516 struct file_extent_hole *hole;
517 struct file_extent_hole *prev = NULL;
518 struct file_extent_hole *next = NULL;
520 hole = malloc(sizeof(*hole));
525 /* Since compare will not return 0, no -EEXIST will happen */
526 rb_insert(holes, &hole->node, compare_hole);
528 /* simple merge with previous hole */
529 if (rb_prev(&hole->node))
530 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
532 if (prev && prev->start + prev->len >= hole->start) {
533 hole->len = hole->start + hole->len - prev->start;
534 hole->start = prev->start;
535 rb_erase(&prev->node, holes);
540 /* iterate merge with next holes */
542 if (!rb_next(&hole->node))
544 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
546 if (hole->start + hole->len >= next->start) {
547 if (hole->start + hole->len <= next->start + next->len)
548 hole->len = next->start + next->len -
550 rb_erase(&next->node, holes);
559 static int compare_hole_range(struct rb_node *node, void *data)
561 struct file_extent_hole *hole;
564 hole = (struct file_extent_hole *)data;
567 hole = rb_entry(node, struct file_extent_hole, node);
568 if (start < hole->start)
570 if (start >= hole->start && start < hole->start + hole->len)
576 * Delete a hole in the record
578 * This will do the hole split and is much restrict than add.
580 static int del_file_extent_hole(struct rb_root *holes,
583 struct file_extent_hole *hole;
584 struct file_extent_hole tmp;
589 struct rb_node *node;
596 node = rb_search(holes, &tmp, compare_hole_range, NULL);
599 hole = rb_entry(node, struct file_extent_hole, node);
600 if (start + len > hole->start + hole->len)
604 * Now there will be no overlap, delete the hole and re-add the
605 * split(s) if they exists.
607 if (start > hole->start) {
608 prev_start = hole->start;
609 prev_len = start - hole->start;
612 if (hole->start + hole->len > start + len) {
613 next_start = start + len;
614 next_len = hole->start + hole->len - start - len;
617 rb_erase(node, holes);
620 ret = add_file_extent_hole(holes, prev_start, prev_len);
625 ret = add_file_extent_hole(holes, next_start, next_len);
632 static int copy_file_extent_holes(struct rb_root *dst,
635 struct file_extent_hole *hole;
636 struct rb_node *node;
639 node = rb_first(src);
641 hole = rb_entry(node, struct file_extent_hole, node);
642 ret = add_file_extent_hole(dst, hole->start, hole->len);
645 node = rb_next(node);
650 static void free_file_extent_holes(struct rb_root *holes)
652 struct rb_node *node;
653 struct file_extent_hole *hole;
655 node = rb_first(holes);
657 hole = rb_entry(node, struct file_extent_hole, node);
658 rb_erase(node, holes);
660 node = rb_first(holes);
664 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
666 static void record_root_in_trans(struct btrfs_trans_handle *trans,
667 struct btrfs_root *root)
669 if (root->last_trans != trans->transid) {
670 root->track_dirty = 1;
671 root->last_trans = trans->transid;
672 root->commit_root = root->node;
673 extent_buffer_get(root->node);
677 static u8 imode_to_type(u32 imode)
680 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
681 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
682 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
683 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
684 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
685 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
686 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
687 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
690 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
694 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
696 struct device_record *rec1;
697 struct device_record *rec2;
699 rec1 = rb_entry(node1, struct device_record, node);
700 rec2 = rb_entry(node2, struct device_record, node);
701 if (rec1->devid > rec2->devid)
703 else if (rec1->devid < rec2->devid)
709 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
711 struct inode_record *rec;
712 struct inode_backref *backref;
713 struct inode_backref *orig;
714 struct inode_backref *tmp;
715 struct orphan_data_extent *src_orphan;
716 struct orphan_data_extent *dst_orphan;
720 rec = malloc(sizeof(*rec));
722 return ERR_PTR(-ENOMEM);
723 memcpy(rec, orig_rec, sizeof(*rec));
725 INIT_LIST_HEAD(&rec->backrefs);
726 INIT_LIST_HEAD(&rec->orphan_extents);
727 rec->holes = RB_ROOT;
729 list_for_each_entry(orig, &orig_rec->backrefs, list) {
730 size = sizeof(*orig) + orig->namelen + 1;
731 backref = malloc(size);
736 memcpy(backref, orig, size);
737 list_add_tail(&backref->list, &rec->backrefs);
739 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
740 dst_orphan = malloc(sizeof(*dst_orphan));
745 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
746 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
748 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
754 if (!list_empty(&rec->backrefs))
755 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
756 list_del(&orig->list);
760 if (!list_empty(&rec->orphan_extents))
761 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
762 list_del(&orig->list);
771 static void print_orphan_data_extents(struct list_head *orphan_extents,
774 struct orphan_data_extent *orphan;
776 if (list_empty(orphan_extents))
778 printf("The following data extent is lost in tree %llu:\n",
780 list_for_each_entry(orphan, orphan_extents, list) {
781 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
782 orphan->objectid, orphan->offset, orphan->disk_bytenr,
787 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
789 u64 root_objectid = root->root_key.objectid;
790 int errors = rec->errors;
794 /* reloc root errors, we print its corresponding fs root objectid*/
795 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
796 root_objectid = root->root_key.offset;
797 fprintf(stderr, "reloc");
799 fprintf(stderr, "root %llu inode %llu errors %x",
800 (unsigned long long) root_objectid,
801 (unsigned long long) rec->ino, rec->errors);
803 if (errors & I_ERR_NO_INODE_ITEM)
804 fprintf(stderr, ", no inode item");
805 if (errors & I_ERR_NO_ORPHAN_ITEM)
806 fprintf(stderr, ", no orphan item");
807 if (errors & I_ERR_DUP_INODE_ITEM)
808 fprintf(stderr, ", dup inode item");
809 if (errors & I_ERR_DUP_DIR_INDEX)
810 fprintf(stderr, ", dup dir index");
811 if (errors & I_ERR_ODD_DIR_ITEM)
812 fprintf(stderr, ", odd dir item");
813 if (errors & I_ERR_ODD_FILE_EXTENT)
814 fprintf(stderr, ", odd file extent");
815 if (errors & I_ERR_BAD_FILE_EXTENT)
816 fprintf(stderr, ", bad file extent");
817 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
818 fprintf(stderr, ", file extent overlap");
819 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
820 fprintf(stderr, ", file extent discount");
821 if (errors & I_ERR_DIR_ISIZE_WRONG)
822 fprintf(stderr, ", dir isize wrong");
823 if (errors & I_ERR_FILE_NBYTES_WRONG)
824 fprintf(stderr, ", nbytes wrong");
825 if (errors & I_ERR_ODD_CSUM_ITEM)
826 fprintf(stderr, ", odd csum item");
827 if (errors & I_ERR_SOME_CSUM_MISSING)
828 fprintf(stderr, ", some csum missing");
829 if (errors & I_ERR_LINK_COUNT_WRONG)
830 fprintf(stderr, ", link count wrong");
831 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
832 fprintf(stderr, ", orphan file extent");
833 fprintf(stderr, "\n");
834 /* Print the orphan extents if needed */
835 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
836 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
838 /* Print the holes if needed */
839 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
840 struct file_extent_hole *hole;
841 struct rb_node *node;
844 node = rb_first(&rec->holes);
845 fprintf(stderr, "Found file extent holes:\n");
848 hole = rb_entry(node, struct file_extent_hole, node);
849 fprintf(stderr, "\tstart: %llu, len: %llu\n",
850 hole->start, hole->len);
851 node = rb_next(node);
854 fprintf(stderr, "\tstart: 0, len: %llu\n",
855 round_up(rec->isize, root->sectorsize));
859 static void print_ref_error(int errors)
861 if (errors & REF_ERR_NO_DIR_ITEM)
862 fprintf(stderr, ", no dir item");
863 if (errors & REF_ERR_NO_DIR_INDEX)
864 fprintf(stderr, ", no dir index");
865 if (errors & REF_ERR_NO_INODE_REF)
866 fprintf(stderr, ", no inode ref");
867 if (errors & REF_ERR_DUP_DIR_ITEM)
868 fprintf(stderr, ", dup dir item");
869 if (errors & REF_ERR_DUP_DIR_INDEX)
870 fprintf(stderr, ", dup dir index");
871 if (errors & REF_ERR_DUP_INODE_REF)
872 fprintf(stderr, ", dup inode ref");
873 if (errors & REF_ERR_INDEX_UNMATCH)
874 fprintf(stderr, ", index mismatch");
875 if (errors & REF_ERR_FILETYPE_UNMATCH)
876 fprintf(stderr, ", filetype mismatch");
877 if (errors & REF_ERR_NAME_TOO_LONG)
878 fprintf(stderr, ", name too long");
879 if (errors & REF_ERR_NO_ROOT_REF)
880 fprintf(stderr, ", no root ref");
881 if (errors & REF_ERR_NO_ROOT_BACKREF)
882 fprintf(stderr, ", no root backref");
883 if (errors & REF_ERR_DUP_ROOT_REF)
884 fprintf(stderr, ", dup root ref");
885 if (errors & REF_ERR_DUP_ROOT_BACKREF)
886 fprintf(stderr, ", dup root backref");
887 fprintf(stderr, "\n");
890 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
893 struct ptr_node *node;
894 struct cache_extent *cache;
895 struct inode_record *rec = NULL;
898 cache = lookup_cache_extent(inode_cache, ino, 1);
900 node = container_of(cache, struct ptr_node, cache);
902 if (mod && rec->refs > 1) {
903 node->data = clone_inode_rec(rec);
904 if (IS_ERR(node->data))
910 rec = calloc(1, sizeof(*rec));
912 return ERR_PTR(-ENOMEM);
914 rec->extent_start = (u64)-1;
916 INIT_LIST_HEAD(&rec->backrefs);
917 INIT_LIST_HEAD(&rec->orphan_extents);
918 rec->holes = RB_ROOT;
920 node = malloc(sizeof(*node));
923 return ERR_PTR(-ENOMEM);
925 node->cache.start = ino;
926 node->cache.size = 1;
929 if (ino == BTRFS_FREE_INO_OBJECTID)
932 ret = insert_cache_extent(inode_cache, &node->cache);
934 return ERR_PTR(-EEXIST);
939 static void free_orphan_data_extents(struct list_head *orphan_extents)
941 struct orphan_data_extent *orphan;
943 while (!list_empty(orphan_extents)) {
944 orphan = list_entry(orphan_extents->next,
945 struct orphan_data_extent, list);
946 list_del(&orphan->list);
951 static void free_inode_rec(struct inode_record *rec)
953 struct inode_backref *backref;
958 while (!list_empty(&rec->backrefs)) {
959 backref = to_inode_backref(rec->backrefs.next);
960 list_del(&backref->list);
963 free_orphan_data_extents(&rec->orphan_extents);
964 free_file_extent_holes(&rec->holes);
968 static int can_free_inode_rec(struct inode_record *rec)
970 if (!rec->errors && rec->checked && rec->found_inode_item &&
971 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
976 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
977 struct inode_record *rec)
979 struct cache_extent *cache;
980 struct inode_backref *tmp, *backref;
981 struct ptr_node *node;
982 unsigned char filetype;
984 if (!rec->found_inode_item)
987 filetype = imode_to_type(rec->imode);
988 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
989 if (backref->found_dir_item && backref->found_dir_index) {
990 if (backref->filetype != filetype)
991 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
992 if (!backref->errors && backref->found_inode_ref &&
993 rec->nlink == rec->found_link) {
994 list_del(&backref->list);
1000 if (!rec->checked || rec->merging)
1003 if (S_ISDIR(rec->imode)) {
1004 if (rec->found_size != rec->isize)
1005 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1006 if (rec->found_file_extent)
1007 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1008 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1009 if (rec->found_dir_item)
1010 rec->errors |= I_ERR_ODD_DIR_ITEM;
1011 if (rec->found_size != rec->nbytes)
1012 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1013 if (rec->nlink > 0 && !no_holes &&
1014 (rec->extent_end < rec->isize ||
1015 first_extent_gap(&rec->holes) < rec->isize))
1016 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1019 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1020 if (rec->found_csum_item && rec->nodatasum)
1021 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1022 if (rec->some_csum_missing && !rec->nodatasum)
1023 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1026 BUG_ON(rec->refs != 1);
1027 if (can_free_inode_rec(rec)) {
1028 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1029 node = container_of(cache, struct ptr_node, cache);
1030 BUG_ON(node->data != rec);
1031 remove_cache_extent(inode_cache, &node->cache);
1033 free_inode_rec(rec);
1037 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1039 struct btrfs_path path;
1040 struct btrfs_key key;
1043 key.objectid = BTRFS_ORPHAN_OBJECTID;
1044 key.type = BTRFS_ORPHAN_ITEM_KEY;
1047 btrfs_init_path(&path);
1048 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1049 btrfs_release_path(&path);
1055 static int process_inode_item(struct extent_buffer *eb,
1056 int slot, struct btrfs_key *key,
1057 struct shared_node *active_node)
1059 struct inode_record *rec;
1060 struct btrfs_inode_item *item;
1062 rec = active_node->current;
1063 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1064 if (rec->found_inode_item) {
1065 rec->errors |= I_ERR_DUP_INODE_ITEM;
1068 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1069 rec->nlink = btrfs_inode_nlink(eb, item);
1070 rec->isize = btrfs_inode_size(eb, item);
1071 rec->nbytes = btrfs_inode_nbytes(eb, item);
1072 rec->imode = btrfs_inode_mode(eb, item);
1073 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1075 rec->found_inode_item = 1;
1076 if (rec->nlink == 0)
1077 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1078 maybe_free_inode_rec(&active_node->inode_cache, rec);
1082 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1084 int namelen, u64 dir)
1086 struct inode_backref *backref;
1088 list_for_each_entry(backref, &rec->backrefs, list) {
1089 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1091 if (backref->dir != dir || backref->namelen != namelen)
1093 if (memcmp(name, backref->name, namelen))
1098 backref = malloc(sizeof(*backref) + namelen + 1);
1101 memset(backref, 0, sizeof(*backref));
1103 backref->namelen = namelen;
1104 memcpy(backref->name, name, namelen);
1105 backref->name[namelen] = '\0';
1106 list_add_tail(&backref->list, &rec->backrefs);
1110 static int add_inode_backref(struct cache_tree *inode_cache,
1111 u64 ino, u64 dir, u64 index,
1112 const char *name, int namelen,
1113 int filetype, int itemtype, int errors)
1115 struct inode_record *rec;
1116 struct inode_backref *backref;
1118 rec = get_inode_rec(inode_cache, ino, 1);
1119 BUG_ON(IS_ERR(rec));
1120 backref = get_inode_backref(rec, name, namelen, dir);
1123 backref->errors |= errors;
1124 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1125 if (backref->found_dir_index)
1126 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1127 if (backref->found_inode_ref && backref->index != index)
1128 backref->errors |= REF_ERR_INDEX_UNMATCH;
1129 if (backref->found_dir_item && backref->filetype != filetype)
1130 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1132 backref->index = index;
1133 backref->filetype = filetype;
1134 backref->found_dir_index = 1;
1135 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1137 if (backref->found_dir_item)
1138 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1139 if (backref->found_dir_index && backref->filetype != filetype)
1140 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1142 backref->filetype = filetype;
1143 backref->found_dir_item = 1;
1144 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1145 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1146 if (backref->found_inode_ref)
1147 backref->errors |= REF_ERR_DUP_INODE_REF;
1148 if (backref->found_dir_index && backref->index != index)
1149 backref->errors |= REF_ERR_INDEX_UNMATCH;
1151 backref->index = index;
1153 backref->ref_type = itemtype;
1154 backref->found_inode_ref = 1;
1159 maybe_free_inode_rec(inode_cache, rec);
1163 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1164 struct cache_tree *dst_cache)
1166 struct inode_backref *backref;
1171 list_for_each_entry(backref, &src->backrefs, list) {
1172 if (backref->found_dir_index) {
1173 add_inode_backref(dst_cache, dst->ino, backref->dir,
1174 backref->index, backref->name,
1175 backref->namelen, backref->filetype,
1176 BTRFS_DIR_INDEX_KEY, backref->errors);
1178 if (backref->found_dir_item) {
1180 add_inode_backref(dst_cache, dst->ino,
1181 backref->dir, 0, backref->name,
1182 backref->namelen, backref->filetype,
1183 BTRFS_DIR_ITEM_KEY, backref->errors);
1185 if (backref->found_inode_ref) {
1186 add_inode_backref(dst_cache, dst->ino,
1187 backref->dir, backref->index,
1188 backref->name, backref->namelen, 0,
1189 backref->ref_type, backref->errors);
1193 if (src->found_dir_item)
1194 dst->found_dir_item = 1;
1195 if (src->found_file_extent)
1196 dst->found_file_extent = 1;
1197 if (src->found_csum_item)
1198 dst->found_csum_item = 1;
1199 if (src->some_csum_missing)
1200 dst->some_csum_missing = 1;
1201 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1202 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1207 BUG_ON(src->found_link < dir_count);
1208 dst->found_link += src->found_link - dir_count;
1209 dst->found_size += src->found_size;
1210 if (src->extent_start != (u64)-1) {
1211 if (dst->extent_start == (u64)-1) {
1212 dst->extent_start = src->extent_start;
1213 dst->extent_end = src->extent_end;
1215 if (dst->extent_end > src->extent_start)
1216 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1217 else if (dst->extent_end < src->extent_start) {
1218 ret = add_file_extent_hole(&dst->holes,
1220 src->extent_start - dst->extent_end);
1222 if (dst->extent_end < src->extent_end)
1223 dst->extent_end = src->extent_end;
1227 dst->errors |= src->errors;
1228 if (src->found_inode_item) {
1229 if (!dst->found_inode_item) {
1230 dst->nlink = src->nlink;
1231 dst->isize = src->isize;
1232 dst->nbytes = src->nbytes;
1233 dst->imode = src->imode;
1234 dst->nodatasum = src->nodatasum;
1235 dst->found_inode_item = 1;
1237 dst->errors |= I_ERR_DUP_INODE_ITEM;
1245 static int splice_shared_node(struct shared_node *src_node,
1246 struct shared_node *dst_node)
1248 struct cache_extent *cache;
1249 struct ptr_node *node, *ins;
1250 struct cache_tree *src, *dst;
1251 struct inode_record *rec, *conflict;
1252 u64 current_ino = 0;
1256 if (--src_node->refs == 0)
1258 if (src_node->current)
1259 current_ino = src_node->current->ino;
1261 src = &src_node->root_cache;
1262 dst = &dst_node->root_cache;
1264 cache = search_cache_extent(src, 0);
1266 node = container_of(cache, struct ptr_node, cache);
1268 cache = next_cache_extent(cache);
1271 remove_cache_extent(src, &node->cache);
1274 ins = malloc(sizeof(*ins));
1276 ins->cache.start = node->cache.start;
1277 ins->cache.size = node->cache.size;
1281 ret = insert_cache_extent(dst, &ins->cache);
1282 if (ret == -EEXIST) {
1283 conflict = get_inode_rec(dst, rec->ino, 1);
1284 BUG_ON(IS_ERR(conflict));
1285 merge_inode_recs(rec, conflict, dst);
1287 conflict->checked = 1;
1288 if (dst_node->current == conflict)
1289 dst_node->current = NULL;
1291 maybe_free_inode_rec(dst, conflict);
1292 free_inode_rec(rec);
1299 if (src == &src_node->root_cache) {
1300 src = &src_node->inode_cache;
1301 dst = &dst_node->inode_cache;
1305 if (current_ino > 0 && (!dst_node->current ||
1306 current_ino > dst_node->current->ino)) {
1307 if (dst_node->current) {
1308 dst_node->current->checked = 1;
1309 maybe_free_inode_rec(dst, dst_node->current);
1311 dst_node->current = get_inode_rec(dst, current_ino, 1);
1312 BUG_ON(IS_ERR(dst_node->current));
1317 static void free_inode_ptr(struct cache_extent *cache)
1319 struct ptr_node *node;
1320 struct inode_record *rec;
1322 node = container_of(cache, struct ptr_node, cache);
1324 free_inode_rec(rec);
1328 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1330 static struct shared_node *find_shared_node(struct cache_tree *shared,
1333 struct cache_extent *cache;
1334 struct shared_node *node;
1336 cache = lookup_cache_extent(shared, bytenr, 1);
1338 node = container_of(cache, struct shared_node, cache);
1344 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1347 struct shared_node *node;
1349 node = calloc(1, sizeof(*node));
1352 node->cache.start = bytenr;
1353 node->cache.size = 1;
1354 cache_tree_init(&node->root_cache);
1355 cache_tree_init(&node->inode_cache);
1358 ret = insert_cache_extent(shared, &node->cache);
1363 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1364 struct walk_control *wc, int level)
1366 struct shared_node *node;
1367 struct shared_node *dest;
1370 if (level == wc->active_node)
1373 BUG_ON(wc->active_node <= level);
1374 node = find_shared_node(&wc->shared, bytenr);
1376 ret = add_shared_node(&wc->shared, bytenr, refs);
1378 node = find_shared_node(&wc->shared, bytenr);
1379 wc->nodes[level] = node;
1380 wc->active_node = level;
1384 if (wc->root_level == wc->active_node &&
1385 btrfs_root_refs(&root->root_item) == 0) {
1386 if (--node->refs == 0) {
1387 free_inode_recs_tree(&node->root_cache);
1388 free_inode_recs_tree(&node->inode_cache);
1389 remove_cache_extent(&wc->shared, &node->cache);
1395 dest = wc->nodes[wc->active_node];
1396 splice_shared_node(node, dest);
1397 if (node->refs == 0) {
1398 remove_cache_extent(&wc->shared, &node->cache);
1404 static int leave_shared_node(struct btrfs_root *root,
1405 struct walk_control *wc, int level)
1407 struct shared_node *node;
1408 struct shared_node *dest;
1411 if (level == wc->root_level)
1414 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1418 BUG_ON(i >= BTRFS_MAX_LEVEL);
1420 node = wc->nodes[wc->active_node];
1421 wc->nodes[wc->active_node] = NULL;
1422 wc->active_node = i;
1424 dest = wc->nodes[wc->active_node];
1425 if (wc->active_node < wc->root_level ||
1426 btrfs_root_refs(&root->root_item) > 0) {
1427 BUG_ON(node->refs <= 1);
1428 splice_shared_node(node, dest);
1430 BUG_ON(node->refs < 2);
1439 * 1 - if the root with id child_root_id is a child of root parent_root_id
1440 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1441 * has other root(s) as parent(s)
1442 * 2 - if the root child_root_id doesn't have any parent roots
1444 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1447 struct btrfs_path path;
1448 struct btrfs_key key;
1449 struct extent_buffer *leaf;
1453 btrfs_init_path(&path);
1455 key.objectid = parent_root_id;
1456 key.type = BTRFS_ROOT_REF_KEY;
1457 key.offset = child_root_id;
1458 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1462 btrfs_release_path(&path);
1466 key.objectid = child_root_id;
1467 key.type = BTRFS_ROOT_BACKREF_KEY;
1469 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1475 leaf = path.nodes[0];
1476 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1477 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1480 leaf = path.nodes[0];
1483 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1484 if (key.objectid != child_root_id ||
1485 key.type != BTRFS_ROOT_BACKREF_KEY)
1490 if (key.offset == parent_root_id) {
1491 btrfs_release_path(&path);
1498 btrfs_release_path(&path);
1501 return has_parent ? 0 : 2;
1504 static int process_dir_item(struct btrfs_root *root,
1505 struct extent_buffer *eb,
1506 int slot, struct btrfs_key *key,
1507 struct shared_node *active_node)
1517 struct btrfs_dir_item *di;
1518 struct inode_record *rec;
1519 struct cache_tree *root_cache;
1520 struct cache_tree *inode_cache;
1521 struct btrfs_key location;
1522 char namebuf[BTRFS_NAME_LEN];
1524 root_cache = &active_node->root_cache;
1525 inode_cache = &active_node->inode_cache;
1526 rec = active_node->current;
1527 rec->found_dir_item = 1;
1529 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1530 total = btrfs_item_size_nr(eb, slot);
1531 while (cur < total) {
1533 btrfs_dir_item_key_to_cpu(eb, di, &location);
1534 name_len = btrfs_dir_name_len(eb, di);
1535 data_len = btrfs_dir_data_len(eb, di);
1536 filetype = btrfs_dir_type(eb, di);
1538 rec->found_size += name_len;
1539 if (name_len <= BTRFS_NAME_LEN) {
1543 len = BTRFS_NAME_LEN;
1544 error = REF_ERR_NAME_TOO_LONG;
1546 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1548 if (location.type == BTRFS_INODE_ITEM_KEY) {
1549 add_inode_backref(inode_cache, location.objectid,
1550 key->objectid, key->offset, namebuf,
1551 len, filetype, key->type, error);
1552 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1553 add_inode_backref(root_cache, location.objectid,
1554 key->objectid, key->offset,
1555 namebuf, len, filetype,
1558 fprintf(stderr, "invalid location in dir item %u\n",
1560 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1561 key->objectid, key->offset, namebuf,
1562 len, filetype, key->type, error);
1565 len = sizeof(*di) + name_len + data_len;
1566 di = (struct btrfs_dir_item *)((char *)di + len);
1569 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1570 rec->errors |= I_ERR_DUP_DIR_INDEX;
1575 static int process_inode_ref(struct extent_buffer *eb,
1576 int slot, struct btrfs_key *key,
1577 struct shared_node *active_node)
1585 struct cache_tree *inode_cache;
1586 struct btrfs_inode_ref *ref;
1587 char namebuf[BTRFS_NAME_LEN];
1589 inode_cache = &active_node->inode_cache;
1591 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1592 total = btrfs_item_size_nr(eb, slot);
1593 while (cur < total) {
1594 name_len = btrfs_inode_ref_name_len(eb, ref);
1595 index = btrfs_inode_ref_index(eb, ref);
1596 if (name_len <= BTRFS_NAME_LEN) {
1600 len = BTRFS_NAME_LEN;
1601 error = REF_ERR_NAME_TOO_LONG;
1603 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, key->offset,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*ref) + name_len;
1608 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1614 static int process_inode_extref(struct extent_buffer *eb,
1615 int slot, struct btrfs_key *key,
1616 struct shared_node *active_node)
1625 struct cache_tree *inode_cache;
1626 struct btrfs_inode_extref *extref;
1627 char namebuf[BTRFS_NAME_LEN];
1629 inode_cache = &active_node->inode_cache;
1631 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632 total = btrfs_item_size_nr(eb, slot);
1633 while (cur < total) {
1634 name_len = btrfs_inode_extref_name_len(eb, extref);
1635 index = btrfs_inode_extref_index(eb, extref);
1636 parent = btrfs_inode_extref_parent(eb, extref);
1637 if (name_len <= BTRFS_NAME_LEN) {
1641 len = BTRFS_NAME_LEN;
1642 error = REF_ERR_NAME_TOO_LONG;
1644 read_extent_buffer(eb, namebuf,
1645 (unsigned long)(extref + 1), len);
1646 add_inode_backref(inode_cache, key->objectid, parent,
1647 index, namebuf, len, 0, key->type, error);
1649 len = sizeof(*extref) + name_len;
1650 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658 u64 len, u64 *found)
1660 struct btrfs_key key;
1661 struct btrfs_path path;
1662 struct extent_buffer *leaf;
1667 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1669 btrfs_init_path(&path);
1671 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1673 key.type = BTRFS_EXTENT_CSUM_KEY;
1675 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1679 if (ret > 0 && path.slots[0] > 0) {
1680 leaf = path.nodes[0];
1681 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683 key.type == BTRFS_EXTENT_CSUM_KEY)
1688 leaf = path.nodes[0];
1689 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1695 leaf = path.nodes[0];
1698 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700 key.type != BTRFS_EXTENT_CSUM_KEY)
1703 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704 if (key.offset >= start + len)
1707 if (key.offset > start)
1710 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1712 if (csum_end > start) {
1713 size = min(csum_end - start, len);
1722 btrfs_release_path(&path);
1728 static int process_file_extent(struct btrfs_root *root,
1729 struct extent_buffer *eb,
1730 int slot, struct btrfs_key *key,
1731 struct shared_node *active_node)
1733 struct inode_record *rec;
1734 struct btrfs_file_extent_item *fi;
1736 u64 disk_bytenr = 0;
1737 u64 extent_offset = 0;
1738 u64 mask = root->sectorsize - 1;
1742 rec = active_node->current;
1743 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1744 rec->found_file_extent = 1;
1746 if (rec->extent_start == (u64)-1) {
1747 rec->extent_start = key->offset;
1748 rec->extent_end = key->offset;
1751 if (rec->extent_end > key->offset)
1752 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1753 else if (rec->extent_end < key->offset) {
1754 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1755 key->offset - rec->extent_end);
1760 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1761 extent_type = btrfs_file_extent_type(eb, fi);
1763 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1764 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1766 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767 rec->found_size += num_bytes;
1768 num_bytes = (num_bytes + mask) & ~mask;
1769 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1770 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1771 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1772 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1773 extent_offset = btrfs_file_extent_offset(eb, fi);
1774 if (num_bytes == 0 || (num_bytes & mask))
1775 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776 if (num_bytes + extent_offset >
1777 btrfs_file_extent_ram_bytes(eb, fi))
1778 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1779 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1780 (btrfs_file_extent_compression(eb, fi) ||
1781 btrfs_file_extent_encryption(eb, fi) ||
1782 btrfs_file_extent_other_encoding(eb, fi)))
1783 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784 if (disk_bytenr > 0)
1785 rec->found_size += num_bytes;
1787 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1789 rec->extent_end = key->offset + num_bytes;
1792 * The data reloc tree will copy full extents into its inode and then
1793 * copy the corresponding csums. Because the extent it copied could be
1794 * a preallocated extent that hasn't been written to yet there may be no
1795 * csums to copy, ergo we won't have csums for our file extent. This is
1796 * ok so just don't bother checking csums if the inode belongs to the
1799 if (disk_bytenr > 0 &&
1800 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1802 if (btrfs_file_extent_compression(eb, fi))
1803 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1805 disk_bytenr += extent_offset;
1807 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1810 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1812 rec->found_csum_item = 1;
1813 if (found < num_bytes)
1814 rec->some_csum_missing = 1;
1815 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1817 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1823 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1824 struct walk_control *wc)
1826 struct btrfs_key key;
1830 struct cache_tree *inode_cache;
1831 struct shared_node *active_node;
1833 if (wc->root_level == wc->active_node &&
1834 btrfs_root_refs(&root->root_item) == 0)
1837 active_node = wc->nodes[wc->active_node];
1838 inode_cache = &active_node->inode_cache;
1839 nritems = btrfs_header_nritems(eb);
1840 for (i = 0; i < nritems; i++) {
1841 btrfs_item_key_to_cpu(eb, &key, i);
1843 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1845 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1848 if (active_node->current == NULL ||
1849 active_node->current->ino < key.objectid) {
1850 if (active_node->current) {
1851 active_node->current->checked = 1;
1852 maybe_free_inode_rec(inode_cache,
1853 active_node->current);
1855 active_node->current = get_inode_rec(inode_cache,
1857 BUG_ON(IS_ERR(active_node->current));
1860 case BTRFS_DIR_ITEM_KEY:
1861 case BTRFS_DIR_INDEX_KEY:
1862 ret = process_dir_item(root, eb, i, &key, active_node);
1864 case BTRFS_INODE_REF_KEY:
1865 ret = process_inode_ref(eb, i, &key, active_node);
1867 case BTRFS_INODE_EXTREF_KEY:
1868 ret = process_inode_extref(eb, i, &key, active_node);
1870 case BTRFS_INODE_ITEM_KEY:
1871 ret = process_inode_item(eb, i, &key, active_node);
1873 case BTRFS_EXTENT_DATA_KEY:
1874 ret = process_file_extent(root, eb, i, &key,
1884 static void reada_walk_down(struct btrfs_root *root,
1885 struct extent_buffer *node, int slot)
1894 level = btrfs_header_level(node);
1898 nritems = btrfs_header_nritems(node);
1899 blocksize = root->nodesize;
1900 for (i = slot; i < nritems; i++) {
1901 bytenr = btrfs_node_blockptr(node, i);
1902 ptr_gen = btrfs_node_ptr_generation(node, i);
1903 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1908 * Check the child node/leaf by the following condition:
1909 * 1. the first item key of the node/leaf should be the same with the one
1911 * 2. block in parent node should match the child node/leaf.
1912 * 3. generation of parent node and child's header should be consistent.
1914 * Or the child node/leaf pointed by the key in parent is not valid.
1916 * We hope to check leaf owner too, but since subvol may share leaves,
1917 * which makes leaf owner check not so strong, key check should be
1918 * sufficient enough for that case.
1920 static int check_child_node(struct btrfs_root *root,
1921 struct extent_buffer *parent, int slot,
1922 struct extent_buffer *child)
1924 struct btrfs_key parent_key;
1925 struct btrfs_key child_key;
1928 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1929 if (btrfs_header_level(child) == 0)
1930 btrfs_item_key_to_cpu(child, &child_key, 0);
1932 btrfs_node_key_to_cpu(child, &child_key, 0);
1934 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1937 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1938 parent_key.objectid, parent_key.type, parent_key.offset,
1939 child_key.objectid, child_key.type, child_key.offset);
1941 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1943 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1944 btrfs_node_blockptr(parent, slot),
1945 btrfs_header_bytenr(child));
1947 if (btrfs_node_ptr_generation(parent, slot) !=
1948 btrfs_header_generation(child)) {
1950 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1951 btrfs_header_generation(child),
1952 btrfs_node_ptr_generation(parent, slot));
1957 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1958 struct walk_control *wc, int *level)
1960 enum btrfs_tree_block_status status;
1963 struct extent_buffer *next;
1964 struct extent_buffer *cur;
1969 WARN_ON(*level < 0);
1970 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1971 ret = btrfs_lookup_extent_info(NULL, root,
1972 path->nodes[*level]->start,
1973 *level, 1, &refs, NULL);
1980 ret = enter_shared_node(root, path->nodes[*level]->start,
1988 while (*level >= 0) {
1989 WARN_ON(*level < 0);
1990 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1991 cur = path->nodes[*level];
1993 if (btrfs_header_level(cur) != *level)
1996 if (path->slots[*level] >= btrfs_header_nritems(cur))
1999 ret = process_one_leaf(root, cur, wc);
2004 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2005 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2006 blocksize = root->nodesize;
2007 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
2013 ret = enter_shared_node(root, bytenr, refs,
2016 path->slots[*level]++;
2021 next = btrfs_find_tree_block(root, bytenr, blocksize);
2022 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2023 free_extent_buffer(next);
2024 reada_walk_down(root, cur, path->slots[*level]);
2025 next = read_tree_block(root, bytenr, blocksize,
2027 if (!extent_buffer_uptodate(next)) {
2028 struct btrfs_key node_key;
2030 btrfs_node_key_to_cpu(path->nodes[*level],
2032 path->slots[*level]);
2033 btrfs_add_corrupt_extent_record(root->fs_info,
2035 path->nodes[*level]->start,
2036 root->nodesize, *level);
2042 ret = check_child_node(root, cur, path->slots[*level], next);
2048 if (btrfs_is_leaf(next))
2049 status = btrfs_check_leaf(root, NULL, next);
2051 status = btrfs_check_node(root, NULL, next);
2052 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2053 free_extent_buffer(next);
2058 *level = *level - 1;
2059 free_extent_buffer(path->nodes[*level]);
2060 path->nodes[*level] = next;
2061 path->slots[*level] = 0;
2064 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2068 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2069 struct walk_control *wc, int *level)
2072 struct extent_buffer *leaf;
2074 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2075 leaf = path->nodes[i];
2076 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2081 free_extent_buffer(path->nodes[*level]);
2082 path->nodes[*level] = NULL;
2083 BUG_ON(*level > wc->active_node);
2084 if (*level == wc->active_node)
2085 leave_shared_node(root, wc, *level);
2092 static int check_root_dir(struct inode_record *rec)
2094 struct inode_backref *backref;
2097 if (!rec->found_inode_item || rec->errors)
2099 if (rec->nlink != 1 || rec->found_link != 0)
2101 if (list_empty(&rec->backrefs))
2103 backref = to_inode_backref(rec->backrefs.next);
2104 if (!backref->found_inode_ref)
2106 if (backref->index != 0 || backref->namelen != 2 ||
2107 memcmp(backref->name, "..", 2))
2109 if (backref->found_dir_index || backref->found_dir_item)
2116 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2117 struct btrfs_root *root, struct btrfs_path *path,
2118 struct inode_record *rec)
2120 struct btrfs_inode_item *ei;
2121 struct btrfs_key key;
2124 key.objectid = rec->ino;
2125 key.type = BTRFS_INODE_ITEM_KEY;
2126 key.offset = (u64)-1;
2128 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2132 if (!path->slots[0]) {
2139 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2140 if (key.objectid != rec->ino) {
2145 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2146 struct btrfs_inode_item);
2147 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2148 btrfs_mark_buffer_dirty(path->nodes[0]);
2149 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2150 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2151 root->root_key.objectid);
2153 btrfs_release_path(path);
2157 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2158 struct btrfs_root *root,
2159 struct btrfs_path *path,
2160 struct inode_record *rec)
2164 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2165 btrfs_release_path(path);
2167 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2171 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2172 struct btrfs_root *root,
2173 struct btrfs_path *path,
2174 struct inode_record *rec)
2176 struct btrfs_inode_item *ei;
2177 struct btrfs_key key;
2180 key.objectid = rec->ino;
2181 key.type = BTRFS_INODE_ITEM_KEY;
2184 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2191 /* Since ret == 0, no need to check anything */
2192 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2193 struct btrfs_inode_item);
2194 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2195 btrfs_mark_buffer_dirty(path->nodes[0]);
2196 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2197 printf("reset nbytes for ino %llu root %llu\n",
2198 rec->ino, root->root_key.objectid);
2200 btrfs_release_path(path);
2204 static int add_missing_dir_index(struct btrfs_root *root,
2205 struct cache_tree *inode_cache,
2206 struct inode_record *rec,
2207 struct inode_backref *backref)
2209 struct btrfs_path *path;
2210 struct btrfs_trans_handle *trans;
2211 struct btrfs_dir_item *dir_item;
2212 struct extent_buffer *leaf;
2213 struct btrfs_key key;
2214 struct btrfs_disk_key disk_key;
2215 struct inode_record *dir_rec;
2216 unsigned long name_ptr;
2217 u32 data_size = sizeof(*dir_item) + backref->namelen;
2220 path = btrfs_alloc_path();
2224 trans = btrfs_start_transaction(root, 1);
2225 if (IS_ERR(trans)) {
2226 btrfs_free_path(path);
2227 return PTR_ERR(trans);
2230 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2231 (unsigned long long)rec->ino);
2232 key.objectid = backref->dir;
2233 key.type = BTRFS_DIR_INDEX_KEY;
2234 key.offset = backref->index;
2236 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2239 leaf = path->nodes[0];
2240 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2242 disk_key.objectid = cpu_to_le64(rec->ino);
2243 disk_key.type = BTRFS_INODE_ITEM_KEY;
2244 disk_key.offset = 0;
2246 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2247 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2248 btrfs_set_dir_data_len(leaf, dir_item, 0);
2249 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2250 name_ptr = (unsigned long)(dir_item + 1);
2251 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2252 btrfs_mark_buffer_dirty(leaf);
2253 btrfs_free_path(path);
2254 btrfs_commit_transaction(trans, root);
2256 backref->found_dir_index = 1;
2257 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2258 BUG_ON(IS_ERR(dir_rec));
2261 dir_rec->found_size += backref->namelen;
2262 if (dir_rec->found_size == dir_rec->isize &&
2263 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2264 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2265 if (dir_rec->found_size != dir_rec->isize)
2266 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2271 static int delete_dir_index(struct btrfs_root *root,
2272 struct cache_tree *inode_cache,
2273 struct inode_record *rec,
2274 struct inode_backref *backref)
2276 struct btrfs_trans_handle *trans;
2277 struct btrfs_dir_item *di;
2278 struct btrfs_path *path;
2281 path = btrfs_alloc_path();
2285 trans = btrfs_start_transaction(root, 1);
2286 if (IS_ERR(trans)) {
2287 btrfs_free_path(path);
2288 return PTR_ERR(trans);
2292 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2293 (unsigned long long)backref->dir,
2294 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2295 (unsigned long long)root->objectid);
2297 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2298 backref->name, backref->namelen,
2299 backref->index, -1);
2302 btrfs_free_path(path);
2303 btrfs_commit_transaction(trans, root);
2310 ret = btrfs_del_item(trans, root, path);
2312 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2314 btrfs_free_path(path);
2315 btrfs_commit_transaction(trans, root);
2319 static int create_inode_item(struct btrfs_root *root,
2320 struct inode_record *rec,
2321 struct inode_backref *backref, int root_dir)
2323 struct btrfs_trans_handle *trans;
2324 struct btrfs_inode_item inode_item;
2325 time_t now = time(NULL);
2328 trans = btrfs_start_transaction(root, 1);
2329 if (IS_ERR(trans)) {
2330 ret = PTR_ERR(trans);
2334 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2335 "be incomplete, please check permissions and content after "
2336 "the fsck completes.\n", (unsigned long long)root->objectid,
2337 (unsigned long long)rec->ino);
2339 memset(&inode_item, 0, sizeof(inode_item));
2340 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2342 btrfs_set_stack_inode_nlink(&inode_item, 1);
2344 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2345 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2346 if (rec->found_dir_item) {
2347 if (rec->found_file_extent)
2348 fprintf(stderr, "root %llu inode %llu has both a dir "
2349 "item and extents, unsure if it is a dir or a "
2350 "regular file so setting it as a directory\n",
2351 (unsigned long long)root->objectid,
2352 (unsigned long long)rec->ino);
2353 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2354 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2355 } else if (!rec->found_dir_item) {
2356 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2357 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2359 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2360 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2361 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2362 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2363 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2364 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2365 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2366 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2368 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2370 btrfs_commit_transaction(trans, root);
2374 static int repair_inode_backrefs(struct btrfs_root *root,
2375 struct inode_record *rec,
2376 struct cache_tree *inode_cache,
2379 struct inode_backref *tmp, *backref;
2380 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2384 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2385 if (!delete && rec->ino == root_dirid) {
2386 if (!rec->found_inode_item) {
2387 ret = create_inode_item(root, rec, backref, 1);
2394 /* Index 0 for root dir's are special, don't mess with it */
2395 if (rec->ino == root_dirid && backref->index == 0)
2399 ((backref->found_dir_index && !backref->found_inode_ref) ||
2400 (backref->found_dir_index && backref->found_inode_ref &&
2401 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2402 ret = delete_dir_index(root, inode_cache, rec, backref);
2406 list_del(&backref->list);
2410 if (!delete && !backref->found_dir_index &&
2411 backref->found_dir_item && backref->found_inode_ref) {
2412 ret = add_missing_dir_index(root, inode_cache, rec,
2417 if (backref->found_dir_item &&
2418 backref->found_dir_index &&
2419 backref->found_dir_index) {
2420 if (!backref->errors &&
2421 backref->found_inode_ref) {
2422 list_del(&backref->list);
2428 if (!delete && (!backref->found_dir_index &&
2429 !backref->found_dir_item &&
2430 backref->found_inode_ref)) {
2431 struct btrfs_trans_handle *trans;
2432 struct btrfs_key location;
2434 ret = check_dir_conflict(root, backref->name,
2440 * let nlink fixing routine to handle it,
2441 * which can do it better.
2446 location.objectid = rec->ino;
2447 location.type = BTRFS_INODE_ITEM_KEY;
2448 location.offset = 0;
2450 trans = btrfs_start_transaction(root, 1);
2451 if (IS_ERR(trans)) {
2452 ret = PTR_ERR(trans);
2455 fprintf(stderr, "adding missing dir index/item pair "
2457 (unsigned long long)rec->ino);
2458 ret = btrfs_insert_dir_item(trans, root, backref->name,
2460 backref->dir, &location,
2461 imode_to_type(rec->imode),
2464 btrfs_commit_transaction(trans, root);
2468 if (!delete && (backref->found_inode_ref &&
2469 backref->found_dir_index &&
2470 backref->found_dir_item &&
2471 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2472 !rec->found_inode_item)) {
2473 ret = create_inode_item(root, rec, backref, 0);
2480 return ret ? ret : repaired;
2484 * To determine the file type for nlink/inode_item repair
2486 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2487 * Return -ENOENT if file type is not found.
2489 static int find_file_type(struct inode_record *rec, u8 *type)
2491 struct inode_backref *backref;
2493 /* For inode item recovered case */
2494 if (rec->found_inode_item) {
2495 *type = imode_to_type(rec->imode);
2499 list_for_each_entry(backref, &rec->backrefs, list) {
2500 if (backref->found_dir_index || backref->found_dir_item) {
2501 *type = backref->filetype;
2509 * To determine the file name for nlink repair
2511 * Return 0 if file name is found, set name and namelen.
2512 * Return -ENOENT if file name is not found.
2514 static int find_file_name(struct inode_record *rec,
2515 char *name, int *namelen)
2517 struct inode_backref *backref;
2519 list_for_each_entry(backref, &rec->backrefs, list) {
2520 if (backref->found_dir_index || backref->found_dir_item ||
2521 backref->found_inode_ref) {
2522 memcpy(name, backref->name, backref->namelen);
2523 *namelen = backref->namelen;
2530 /* Reset the nlink of the inode to the correct one */
2531 static int reset_nlink(struct btrfs_trans_handle *trans,
2532 struct btrfs_root *root,
2533 struct btrfs_path *path,
2534 struct inode_record *rec)
2536 struct inode_backref *backref;
2537 struct inode_backref *tmp;
2538 struct btrfs_key key;
2539 struct btrfs_inode_item *inode_item;
2542 /* We don't believe this either, reset it and iterate backref */
2543 rec->found_link = 0;
2545 /* Remove all backref including the valid ones */
2546 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2547 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2548 backref->index, backref->name,
2549 backref->namelen, 0);
2553 /* remove invalid backref, so it won't be added back */
2554 if (!(backref->found_dir_index &&
2555 backref->found_dir_item &&
2556 backref->found_inode_ref)) {
2557 list_del(&backref->list);
2564 /* Set nlink to 0 */
2565 key.objectid = rec->ino;
2566 key.type = BTRFS_INODE_ITEM_KEY;
2568 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2575 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2576 struct btrfs_inode_item);
2577 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2578 btrfs_mark_buffer_dirty(path->nodes[0]);
2579 btrfs_release_path(path);
2582 * Add back valid inode_ref/dir_item/dir_index,
2583 * add_link() will handle the nlink inc, so new nlink must be correct
2585 list_for_each_entry(backref, &rec->backrefs, list) {
2586 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2587 backref->name, backref->namelen,
2588 backref->filetype, &backref->index, 1);
2593 btrfs_release_path(path);
2597 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2598 struct btrfs_root *root,
2599 struct btrfs_path *path,
2600 struct inode_record *rec)
2602 char *dir_name = "lost+found";
2603 char namebuf[BTRFS_NAME_LEN] = {0};
2608 int name_recovered = 0;
2609 int type_recovered = 0;
2613 * Get file name and type first before these invalid inode ref
2614 * are deleted by remove_all_invalid_backref()
2616 name_recovered = !find_file_name(rec, namebuf, &namelen);
2617 type_recovered = !find_file_type(rec, &type);
2619 if (!name_recovered) {
2620 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2621 rec->ino, rec->ino);
2622 namelen = count_digits(rec->ino);
2623 sprintf(namebuf, "%llu", rec->ino);
2626 if (!type_recovered) {
2627 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2629 type = BTRFS_FT_REG_FILE;
2633 ret = reset_nlink(trans, root, path, rec);
2636 "Failed to reset nlink for inode %llu: %s\n",
2637 rec->ino, strerror(-ret));
2641 if (rec->found_link == 0) {
2642 lost_found_ino = root->highest_inode;
2643 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2648 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2649 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2652 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2653 dir_name, strerror(-ret));
2656 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2657 namebuf, namelen, type, NULL, 1);
2659 * Add ".INO" suffix several times to handle case where
2660 * "FILENAME.INO" is already taken by another file.
2662 while (ret == -EEXIST) {
2664 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2666 if (namelen + count_digits(rec->ino) + 1 >
2671 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2673 namelen += count_digits(rec->ino) + 1;
2674 ret = btrfs_add_link(trans, root, rec->ino,
2675 lost_found_ino, namebuf,
2676 namelen, type, NULL, 1);
2680 "Failed to link the inode %llu to %s dir: %s\n",
2681 rec->ino, dir_name, strerror(-ret));
2685 * Just increase the found_link, don't actually add the
2686 * backref. This will make things easier and this inode
2687 * record will be freed after the repair is done.
2688 * So fsck will not report problem about this inode.
2691 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2692 namelen, namebuf, dir_name);
2694 printf("Fixed the nlink of inode %llu\n", rec->ino);
2697 * Clear the flag anyway, or we will loop forever for the same inode
2698 * as it will not be removed from the bad inode list and the dead loop
2701 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2702 btrfs_release_path(path);
2707 * Check if there is any normal(reg or prealloc) file extent for given
2709 * This is used to determine the file type when neither its dir_index/item or
2710 * inode_item exists.
2712 * This will *NOT* report error, if any error happens, just consider it does
2713 * not have any normal file extent.
2715 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2717 struct btrfs_path *path;
2718 struct btrfs_key key;
2719 struct btrfs_key found_key;
2720 struct btrfs_file_extent_item *fi;
2724 path = btrfs_alloc_path();
2728 key.type = BTRFS_EXTENT_DATA_KEY;
2731 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2736 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2737 ret = btrfs_next_leaf(root, path);
2744 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2746 if (found_key.objectid != ino ||
2747 found_key.type != BTRFS_EXTENT_DATA_KEY)
2749 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2750 struct btrfs_file_extent_item);
2751 type = btrfs_file_extent_type(path->nodes[0], fi);
2752 if (type != BTRFS_FILE_EXTENT_INLINE) {
2758 btrfs_free_path(path);
2762 static u32 btrfs_type_to_imode(u8 type)
2764 static u32 imode_by_btrfs_type[] = {
2765 [BTRFS_FT_REG_FILE] = S_IFREG,
2766 [BTRFS_FT_DIR] = S_IFDIR,
2767 [BTRFS_FT_CHRDEV] = S_IFCHR,
2768 [BTRFS_FT_BLKDEV] = S_IFBLK,
2769 [BTRFS_FT_FIFO] = S_IFIFO,
2770 [BTRFS_FT_SOCK] = S_IFSOCK,
2771 [BTRFS_FT_SYMLINK] = S_IFLNK,
2774 return imode_by_btrfs_type[(type)];
2777 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2778 struct btrfs_root *root,
2779 struct btrfs_path *path,
2780 struct inode_record *rec)
2784 int type_recovered = 0;
2787 printf("Trying to rebuild inode:%llu\n", rec->ino);
2789 type_recovered = !find_file_type(rec, &filetype);
2792 * Try to determine inode type if type not found.
2794 * For found regular file extent, it must be FILE.
2795 * For found dir_item/index, it must be DIR.
2797 * For undetermined one, use FILE as fallback.
2800 * 1. If found backref(inode_index/item is already handled) to it,
2802 * Need new inode-inode ref structure to allow search for that.
2804 if (!type_recovered) {
2805 if (rec->found_file_extent &&
2806 find_normal_file_extent(root, rec->ino)) {
2808 filetype = BTRFS_FT_REG_FILE;
2809 } else if (rec->found_dir_item) {
2811 filetype = BTRFS_FT_DIR;
2812 } else if (!list_empty(&rec->orphan_extents)) {
2814 filetype = BTRFS_FT_REG_FILE;
2816 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2819 filetype = BTRFS_FT_REG_FILE;
2823 ret = btrfs_new_inode(trans, root, rec->ino,
2824 mode | btrfs_type_to_imode(filetype));
2829 * Here inode rebuild is done, we only rebuild the inode item,
2830 * don't repair the nlink(like move to lost+found).
2831 * That is the job of nlink repair.
2833 * We just fill the record and return
2835 rec->found_dir_item = 1;
2836 rec->imode = mode | btrfs_type_to_imode(filetype);
2838 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2839 /* Ensure the inode_nlinks repair function will be called */
2840 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2845 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2846 struct btrfs_root *root,
2847 struct btrfs_path *path,
2848 struct inode_record *rec)
2850 struct orphan_data_extent *orphan;
2851 struct orphan_data_extent *tmp;
2854 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2856 * Check for conflicting file extents
2858 * Here we don't know whether the extents is compressed or not,
2859 * so we can only assume it not compressed nor data offset,
2860 * and use its disk_len as extent length.
2862 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2863 orphan->offset, orphan->disk_len, 0);
2864 btrfs_release_path(path);
2869 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2870 orphan->disk_bytenr, orphan->disk_len);
2871 ret = btrfs_free_extent(trans,
2872 root->fs_info->extent_root,
2873 orphan->disk_bytenr, orphan->disk_len,
2874 0, root->objectid, orphan->objectid,
2879 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2880 orphan->offset, orphan->disk_bytenr,
2881 orphan->disk_len, orphan->disk_len);
2885 /* Update file size info */
2886 rec->found_size += orphan->disk_len;
2887 if (rec->found_size == rec->nbytes)
2888 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2890 /* Update the file extent hole info too */
2891 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2895 if (RB_EMPTY_ROOT(&rec->holes))
2896 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898 list_del(&orphan->list);
2901 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2906 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2907 struct btrfs_root *root,
2908 struct btrfs_path *path,
2909 struct inode_record *rec)
2911 struct rb_node *node;
2912 struct file_extent_hole *hole;
2916 node = rb_first(&rec->holes);
2920 hole = rb_entry(node, struct file_extent_hole, node);
2921 ret = btrfs_punch_hole(trans, root, rec->ino,
2922 hole->start, hole->len);
2925 ret = del_file_extent_hole(&rec->holes, hole->start,
2929 if (RB_EMPTY_ROOT(&rec->holes))
2930 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2931 node = rb_first(&rec->holes);
2933 /* special case for a file losing all its file extent */
2935 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2936 round_up(rec->isize, root->sectorsize));
2940 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2941 rec->ino, root->objectid);
2946 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2948 struct btrfs_trans_handle *trans;
2949 struct btrfs_path *path;
2952 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2953 I_ERR_NO_ORPHAN_ITEM |
2954 I_ERR_LINK_COUNT_WRONG |
2955 I_ERR_NO_INODE_ITEM |
2956 I_ERR_FILE_EXTENT_ORPHAN |
2957 I_ERR_FILE_EXTENT_DISCOUNT|
2958 I_ERR_FILE_NBYTES_WRONG)))
2961 path = btrfs_alloc_path();
2966 * For nlink repair, it may create a dir and add link, so
2967 * 2 for parent(256)'s dir_index and dir_item
2968 * 2 for lost+found dir's inode_item and inode_ref
2969 * 1 for the new inode_ref of the file
2970 * 2 for lost+found dir's dir_index and dir_item for the file
2972 trans = btrfs_start_transaction(root, 7);
2973 if (IS_ERR(trans)) {
2974 btrfs_free_path(path);
2975 return PTR_ERR(trans);
2978 if (rec->errors & I_ERR_NO_INODE_ITEM)
2979 ret = repair_inode_no_item(trans, root, path, rec);
2980 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2981 ret = repair_inode_orphan_extent(trans, root, path, rec);
2982 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2983 ret = repair_inode_discount_extent(trans, root, path, rec);
2984 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2985 ret = repair_inode_isize(trans, root, path, rec);
2986 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2987 ret = repair_inode_orphan_item(trans, root, path, rec);
2988 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2989 ret = repair_inode_nlinks(trans, root, path, rec);
2990 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2991 ret = repair_inode_nbytes(trans, root, path, rec);
2992 btrfs_commit_transaction(trans, root);
2993 btrfs_free_path(path);
2997 static int check_inode_recs(struct btrfs_root *root,
2998 struct cache_tree *inode_cache)
3000 struct cache_extent *cache;
3001 struct ptr_node *node;
3002 struct inode_record *rec;
3003 struct inode_backref *backref;
3008 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3010 if (btrfs_root_refs(&root->root_item) == 0) {
3011 if (!cache_tree_empty(inode_cache))
3012 fprintf(stderr, "warning line %d\n", __LINE__);
3017 * We need to record the highest inode number for later 'lost+found'
3019 * We must select an ino not used/referred by any existing inode, or
3020 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3021 * this may cause 'lost+found' dir has wrong nlinks.
3023 cache = last_cache_extent(inode_cache);
3025 node = container_of(cache, struct ptr_node, cache);
3027 if (rec->ino > root->highest_inode)
3028 root->highest_inode = rec->ino;
3032 * We need to repair backrefs first because we could change some of the
3033 * errors in the inode recs.
3035 * We also need to go through and delete invalid backrefs first and then
3036 * add the correct ones second. We do this because we may get EEXIST
3037 * when adding back the correct index because we hadn't yet deleted the
3040 * For example, if we were missing a dir index then the directories
3041 * isize would be wrong, so if we fixed the isize to what we thought it
3042 * would be and then fixed the backref we'd still have a invalid fs, so
3043 * we need to add back the dir index and then check to see if the isize
3048 if (stage == 3 && !err)
3051 cache = search_cache_extent(inode_cache, 0);
3052 while (repair && cache) {
3053 node = container_of(cache, struct ptr_node, cache);
3055 cache = next_cache_extent(cache);
3057 /* Need to free everything up and rescan */
3059 remove_cache_extent(inode_cache, &node->cache);
3061 free_inode_rec(rec);
3065 if (list_empty(&rec->backrefs))
3068 ret = repair_inode_backrefs(root, rec, inode_cache,
3082 rec = get_inode_rec(inode_cache, root_dirid, 0);
3083 BUG_ON(IS_ERR(rec));
3085 ret = check_root_dir(rec);
3087 fprintf(stderr, "root %llu root dir %llu error\n",
3088 (unsigned long long)root->root_key.objectid,
3089 (unsigned long long)root_dirid);
3090 print_inode_error(root, rec);
3095 struct btrfs_trans_handle *trans;
3097 trans = btrfs_start_transaction(root, 1);
3098 if (IS_ERR(trans)) {
3099 err = PTR_ERR(trans);
3104 "root %llu missing its root dir, recreating\n",
3105 (unsigned long long)root->objectid);
3107 ret = btrfs_make_root_dir(trans, root, root_dirid);
3110 btrfs_commit_transaction(trans, root);
3114 fprintf(stderr, "root %llu root dir %llu not found\n",
3115 (unsigned long long)root->root_key.objectid,
3116 (unsigned long long)root_dirid);
3120 cache = search_cache_extent(inode_cache, 0);
3123 node = container_of(cache, struct ptr_node, cache);
3125 remove_cache_extent(inode_cache, &node->cache);
3127 if (rec->ino == root_dirid ||
3128 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3129 free_inode_rec(rec);
3133 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3134 ret = check_orphan_item(root, rec->ino);
3136 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3137 if (can_free_inode_rec(rec)) {
3138 free_inode_rec(rec);
3143 if (!rec->found_inode_item)
3144 rec->errors |= I_ERR_NO_INODE_ITEM;
3145 if (rec->found_link != rec->nlink)
3146 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3148 ret = try_repair_inode(root, rec);
3149 if (ret == 0 && can_free_inode_rec(rec)) {
3150 free_inode_rec(rec);
3156 if (!(repair && ret == 0))
3158 print_inode_error(root, rec);
3159 list_for_each_entry(backref, &rec->backrefs, list) {
3160 if (!backref->found_dir_item)
3161 backref->errors |= REF_ERR_NO_DIR_ITEM;
3162 if (!backref->found_dir_index)
3163 backref->errors |= REF_ERR_NO_DIR_INDEX;
3164 if (!backref->found_inode_ref)
3165 backref->errors |= REF_ERR_NO_INODE_REF;
3166 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3167 " namelen %u name %s filetype %d errors %x",
3168 (unsigned long long)backref->dir,
3169 (unsigned long long)backref->index,
3170 backref->namelen, backref->name,
3171 backref->filetype, backref->errors);
3172 print_ref_error(backref->errors);
3174 free_inode_rec(rec);
3176 return (error > 0) ? -1 : 0;
3179 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3182 struct cache_extent *cache;
3183 struct root_record *rec = NULL;
3186 cache = lookup_cache_extent(root_cache, objectid, 1);
3188 rec = container_of(cache, struct root_record, cache);
3190 rec = calloc(1, sizeof(*rec));
3192 return ERR_PTR(-ENOMEM);
3193 rec->objectid = objectid;
3194 INIT_LIST_HEAD(&rec->backrefs);
3195 rec->cache.start = objectid;
3196 rec->cache.size = 1;
3198 ret = insert_cache_extent(root_cache, &rec->cache);
3200 return ERR_PTR(-EEXIST);
3205 static struct root_backref *get_root_backref(struct root_record *rec,
3206 u64 ref_root, u64 dir, u64 index,
3207 const char *name, int namelen)
3209 struct root_backref *backref;
3211 list_for_each_entry(backref, &rec->backrefs, list) {
3212 if (backref->ref_root != ref_root || backref->dir != dir ||
3213 backref->namelen != namelen)
3215 if (memcmp(name, backref->name, namelen))
3220 backref = calloc(1, sizeof(*backref) + namelen + 1);
3223 backref->ref_root = ref_root;
3225 backref->index = index;
3226 backref->namelen = namelen;
3227 memcpy(backref->name, name, namelen);
3228 backref->name[namelen] = '\0';
3229 list_add_tail(&backref->list, &rec->backrefs);
3233 static void free_root_record(struct cache_extent *cache)
3235 struct root_record *rec;
3236 struct root_backref *backref;
3238 rec = container_of(cache, struct root_record, cache);
3239 while (!list_empty(&rec->backrefs)) {
3240 backref = to_root_backref(rec->backrefs.next);
3241 list_del(&backref->list);
3248 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3250 static int add_root_backref(struct cache_tree *root_cache,
3251 u64 root_id, u64 ref_root, u64 dir, u64 index,
3252 const char *name, int namelen,
3253 int item_type, int errors)
3255 struct root_record *rec;
3256 struct root_backref *backref;
3258 rec = get_root_rec(root_cache, root_id);
3259 BUG_ON(IS_ERR(rec));
3260 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3263 backref->errors |= errors;
3265 if (item_type != BTRFS_DIR_ITEM_KEY) {
3266 if (backref->found_dir_index || backref->found_back_ref ||
3267 backref->found_forward_ref) {
3268 if (backref->index != index)
3269 backref->errors |= REF_ERR_INDEX_UNMATCH;
3271 backref->index = index;
3275 if (item_type == BTRFS_DIR_ITEM_KEY) {
3276 if (backref->found_forward_ref)
3278 backref->found_dir_item = 1;
3279 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3280 backref->found_dir_index = 1;
3281 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3282 if (backref->found_forward_ref)
3283 backref->errors |= REF_ERR_DUP_ROOT_REF;
3284 else if (backref->found_dir_item)
3286 backref->found_forward_ref = 1;
3287 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3288 if (backref->found_back_ref)
3289 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3290 backref->found_back_ref = 1;
3295 if (backref->found_forward_ref && backref->found_dir_item)
3296 backref->reachable = 1;
3300 static int merge_root_recs(struct btrfs_root *root,
3301 struct cache_tree *src_cache,
3302 struct cache_tree *dst_cache)
3304 struct cache_extent *cache;
3305 struct ptr_node *node;
3306 struct inode_record *rec;
3307 struct inode_backref *backref;
3310 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3311 free_inode_recs_tree(src_cache);
3316 cache = search_cache_extent(src_cache, 0);
3319 node = container_of(cache, struct ptr_node, cache);
3321 remove_cache_extent(src_cache, &node->cache);
3324 ret = is_child_root(root, root->objectid, rec->ino);
3330 list_for_each_entry(backref, &rec->backrefs, list) {
3331 BUG_ON(backref->found_inode_ref);
3332 if (backref->found_dir_item)
3333 add_root_backref(dst_cache, rec->ino,
3334 root->root_key.objectid, backref->dir,
3335 backref->index, backref->name,
3336 backref->namelen, BTRFS_DIR_ITEM_KEY,
3338 if (backref->found_dir_index)
3339 add_root_backref(dst_cache, rec->ino,
3340 root->root_key.objectid, backref->dir,
3341 backref->index, backref->name,
3342 backref->namelen, BTRFS_DIR_INDEX_KEY,
3346 free_inode_rec(rec);
3353 static int check_root_refs(struct btrfs_root *root,
3354 struct cache_tree *root_cache)
3356 struct root_record *rec;
3357 struct root_record *ref_root;
3358 struct root_backref *backref;
3359 struct cache_extent *cache;
3365 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3366 BUG_ON(IS_ERR(rec));
3369 /* fixme: this can not detect circular references */
3372 cache = search_cache_extent(root_cache, 0);
3376 rec = container_of(cache, struct root_record, cache);
3377 cache = next_cache_extent(cache);
3379 if (rec->found_ref == 0)
3382 list_for_each_entry(backref, &rec->backrefs, list) {
3383 if (!backref->reachable)
3386 ref_root = get_root_rec(root_cache,
3388 BUG_ON(IS_ERR(ref_root));
3389 if (ref_root->found_ref > 0)
3392 backref->reachable = 0;
3394 if (rec->found_ref == 0)
3400 cache = search_cache_extent(root_cache, 0);
3404 rec = container_of(cache, struct root_record, cache);
3405 cache = next_cache_extent(cache);
3407 if (rec->found_ref == 0 &&
3408 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3409 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3410 ret = check_orphan_item(root->fs_info->tree_root,
3416 * If we don't have a root item then we likely just have
3417 * a dir item in a snapshot for this root but no actual
3418 * ref key or anything so it's meaningless.
3420 if (!rec->found_root_item)
3423 fprintf(stderr, "fs tree %llu not referenced\n",
3424 (unsigned long long)rec->objectid);
3428 if (rec->found_ref > 0 && !rec->found_root_item)
3430 list_for_each_entry(backref, &rec->backrefs, list) {
3431 if (!backref->found_dir_item)
3432 backref->errors |= REF_ERR_NO_DIR_ITEM;
3433 if (!backref->found_dir_index)
3434 backref->errors |= REF_ERR_NO_DIR_INDEX;
3435 if (!backref->found_back_ref)
3436 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3437 if (!backref->found_forward_ref)
3438 backref->errors |= REF_ERR_NO_ROOT_REF;
3439 if (backref->reachable && backref->errors)
3446 fprintf(stderr, "fs tree %llu refs %u %s\n",
3447 (unsigned long long)rec->objectid, rec->found_ref,
3448 rec->found_root_item ? "" : "not found");
3450 list_for_each_entry(backref, &rec->backrefs, list) {
3451 if (!backref->reachable)
3453 if (!backref->errors && rec->found_root_item)
3455 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3456 " index %llu namelen %u name %s errors %x\n",
3457 (unsigned long long)backref->ref_root,
3458 (unsigned long long)backref->dir,
3459 (unsigned long long)backref->index,
3460 backref->namelen, backref->name,
3462 print_ref_error(backref->errors);
3465 return errors > 0 ? 1 : 0;
3468 static int process_root_ref(struct extent_buffer *eb, int slot,
3469 struct btrfs_key *key,
3470 struct cache_tree *root_cache)
3476 struct btrfs_root_ref *ref;
3477 char namebuf[BTRFS_NAME_LEN];
3480 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3482 dirid = btrfs_root_ref_dirid(eb, ref);
3483 index = btrfs_root_ref_sequence(eb, ref);
3484 name_len = btrfs_root_ref_name_len(eb, ref);
3486 if (name_len <= BTRFS_NAME_LEN) {
3490 len = BTRFS_NAME_LEN;
3491 error = REF_ERR_NAME_TOO_LONG;
3493 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3495 if (key->type == BTRFS_ROOT_REF_KEY) {
3496 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3497 index, namebuf, len, key->type, error);
3499 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3500 index, namebuf, len, key->type, error);
3505 static void free_corrupt_block(struct cache_extent *cache)
3507 struct btrfs_corrupt_block *corrupt;
3509 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3513 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3516 * Repair the btree of the given root.
3518 * The fix is to remove the node key in corrupt_blocks cache_tree.
3519 * and rebalance the tree.
3520 * After the fix, the btree should be writeable.
3522 static int repair_btree(struct btrfs_root *root,
3523 struct cache_tree *corrupt_blocks)
3525 struct btrfs_trans_handle *trans;
3526 struct btrfs_path *path;
3527 struct btrfs_corrupt_block *corrupt;
3528 struct cache_extent *cache;
3529 struct btrfs_key key;
3534 if (cache_tree_empty(corrupt_blocks))
3537 path = btrfs_alloc_path();
3541 trans = btrfs_start_transaction(root, 1);
3542 if (IS_ERR(trans)) {
3543 ret = PTR_ERR(trans);
3544 fprintf(stderr, "Error starting transaction: %s\n",
3548 cache = first_cache_extent(corrupt_blocks);
3550 corrupt = container_of(cache, struct btrfs_corrupt_block,
3552 level = corrupt->level;
3553 path->lowest_level = level;
3554 key.objectid = corrupt->key.objectid;
3555 key.type = corrupt->key.type;
3556 key.offset = corrupt->key.offset;
3559 * Here we don't want to do any tree balance, since it may
3560 * cause a balance with corrupted brother leaf/node,
3561 * so ins_len set to 0 here.
3562 * Balance will be done after all corrupt node/leaf is deleted.
3564 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3567 offset = btrfs_node_blockptr(path->nodes[level],
3568 path->slots[level]);
3570 /* Remove the ptr */
3571 ret = btrfs_del_ptr(trans, root, path, level,
3572 path->slots[level]);
3576 * Remove the corresponding extent
3577 * return value is not concerned.
3579 btrfs_release_path(path);
3580 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3581 0, root->root_key.objectid,
3583 cache = next_cache_extent(cache);
3586 /* Balance the btree using btrfs_search_slot() */
3587 cache = first_cache_extent(corrupt_blocks);
3589 corrupt = container_of(cache, struct btrfs_corrupt_block,
3591 memcpy(&key, &corrupt->key, sizeof(key));
3592 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3595 /* return will always >0 since it won't find the item */
3597 btrfs_release_path(path);
3598 cache = next_cache_extent(cache);
3601 btrfs_commit_transaction(trans, root);
3603 btrfs_free_path(path);
3607 static int check_fs_root(struct btrfs_root *root,
3608 struct cache_tree *root_cache,
3609 struct walk_control *wc)
3615 struct btrfs_path path;
3616 struct shared_node root_node;
3617 struct root_record *rec;
3618 struct btrfs_root_item *root_item = &root->root_item;
3619 struct cache_tree corrupt_blocks;
3620 struct orphan_data_extent *orphan;
3621 struct orphan_data_extent *tmp;
3622 enum btrfs_tree_block_status status;
3625 * Reuse the corrupt_block cache tree to record corrupted tree block
3627 * Unlike the usage in extent tree check, here we do it in a per
3628 * fs/subvol tree base.
3630 cache_tree_init(&corrupt_blocks);
3631 root->fs_info->corrupt_blocks = &corrupt_blocks;
3633 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3634 rec = get_root_rec(root_cache, root->root_key.objectid);
3635 BUG_ON(IS_ERR(rec));
3636 if (btrfs_root_refs(root_item) > 0)
3637 rec->found_root_item = 1;
3640 btrfs_init_path(&path);
3641 memset(&root_node, 0, sizeof(root_node));
3642 cache_tree_init(&root_node.root_cache);
3643 cache_tree_init(&root_node.inode_cache);
3645 /* Move the orphan extent record to corresponding inode_record */
3646 list_for_each_entry_safe(orphan, tmp,
3647 &root->orphan_data_extents, list) {
3648 struct inode_record *inode;
3650 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3652 BUG_ON(IS_ERR(inode));
3653 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3654 list_move(&orphan->list, &inode->orphan_extents);
3657 level = btrfs_header_level(root->node);
3658 memset(wc->nodes, 0, sizeof(wc->nodes));
3659 wc->nodes[level] = &root_node;
3660 wc->active_node = level;
3661 wc->root_level = level;
3663 /* We may not have checked the root block, lets do that now */
3664 if (btrfs_is_leaf(root->node))
3665 status = btrfs_check_leaf(root, NULL, root->node);
3667 status = btrfs_check_node(root, NULL, root->node);
3668 if (status != BTRFS_TREE_BLOCK_CLEAN)
3671 if (btrfs_root_refs(root_item) > 0 ||
3672 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3673 path.nodes[level] = root->node;
3674 extent_buffer_get(root->node);
3675 path.slots[level] = 0;
3677 struct btrfs_key key;
3678 struct btrfs_disk_key found_key;
3680 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3681 level = root_item->drop_level;
3682 path.lowest_level = level;
3683 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3686 btrfs_node_key(path.nodes[level], &found_key,
3688 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3689 sizeof(found_key)));
3693 wret = walk_down_tree(root, &path, wc, &level);
3699 wret = walk_up_tree(root, &path, wc, &level);
3706 btrfs_release_path(&path);
3708 if (!cache_tree_empty(&corrupt_blocks)) {
3709 struct cache_extent *cache;
3710 struct btrfs_corrupt_block *corrupt;
3712 printf("The following tree block(s) is corrupted in tree %llu:\n",
3713 root->root_key.objectid);
3714 cache = first_cache_extent(&corrupt_blocks);
3716 corrupt = container_of(cache,
3717 struct btrfs_corrupt_block,
3719 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3720 cache->start, corrupt->level,
3721 corrupt->key.objectid, corrupt->key.type,
3722 corrupt->key.offset);
3723 cache = next_cache_extent(cache);
3726 printf("Try to repair the btree for root %llu\n",
3727 root->root_key.objectid);
3728 ret = repair_btree(root, &corrupt_blocks);
3730 fprintf(stderr, "Failed to repair btree: %s\n",
3733 printf("Btree for root %llu is fixed\n",
3734 root->root_key.objectid);
3738 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3742 if (root_node.current) {
3743 root_node.current->checked = 1;
3744 maybe_free_inode_rec(&root_node.inode_cache,
3748 err = check_inode_recs(root, &root_node.inode_cache);
3752 free_corrupt_blocks_tree(&corrupt_blocks);
3753 root->fs_info->corrupt_blocks = NULL;
3754 free_orphan_data_extents(&root->orphan_data_extents);
3758 static int fs_root_objectid(u64 objectid)
3760 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3761 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3763 return is_fstree(objectid);
3766 static int check_fs_roots(struct btrfs_root *root,
3767 struct cache_tree *root_cache)
3769 struct btrfs_path path;
3770 struct btrfs_key key;
3771 struct walk_control wc;
3772 struct extent_buffer *leaf, *tree_node;
3773 struct btrfs_root *tmp_root;
3774 struct btrfs_root *tree_root = root->fs_info->tree_root;
3778 if (ctx.progress_enabled) {
3779 ctx.tp = TASK_FS_ROOTS;
3780 task_start(ctx.info);
3784 * Just in case we made any changes to the extent tree that weren't
3785 * reflected into the free space cache yet.
3788 reset_cached_block_groups(root->fs_info);
3789 memset(&wc, 0, sizeof(wc));
3790 cache_tree_init(&wc.shared);
3791 btrfs_init_path(&path);
3796 key.type = BTRFS_ROOT_ITEM_KEY;
3797 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3802 tree_node = tree_root->node;
3804 if (tree_node != tree_root->node) {
3805 free_root_recs_tree(root_cache);
3806 btrfs_release_path(&path);
3809 leaf = path.nodes[0];
3810 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3811 ret = btrfs_next_leaf(tree_root, &path);
3817 leaf = path.nodes[0];
3819 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3820 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3821 fs_root_objectid(key.objectid)) {
3822 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3823 tmp_root = btrfs_read_fs_root_no_cache(
3824 root->fs_info, &key);
3826 key.offset = (u64)-1;
3827 tmp_root = btrfs_read_fs_root(
3828 root->fs_info, &key);
3830 if (IS_ERR(tmp_root)) {
3834 ret = check_fs_root(tmp_root, root_cache, &wc);
3835 if (ret == -EAGAIN) {
3836 free_root_recs_tree(root_cache);
3837 btrfs_release_path(&path);
3842 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3843 btrfs_free_fs_root(tmp_root);
3844 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3845 key.type == BTRFS_ROOT_BACKREF_KEY) {
3846 process_root_ref(leaf, path.slots[0], &key,
3853 btrfs_release_path(&path);
3855 free_extent_cache_tree(&wc.shared);
3856 if (!cache_tree_empty(&wc.shared))
3857 fprintf(stderr, "warning line %d\n", __LINE__);
3859 task_stop(ctx.info);
3864 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3867 struct extent_backref *back;
3868 struct tree_backref *tback;
3869 struct data_backref *dback;
3873 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3874 back = rb_node_to_extent_backref(n);
3875 if (!back->found_extent_tree) {
3879 if (back->is_data) {
3880 dback = to_data_backref(back);
3881 fprintf(stderr, "Backref %llu %s %llu"
3882 " owner %llu offset %llu num_refs %lu"
3883 " not found in extent tree\n",
3884 (unsigned long long)rec->start,
3885 back->full_backref ?
3887 back->full_backref ?
3888 (unsigned long long)dback->parent:
3889 (unsigned long long)dback->root,
3890 (unsigned long long)dback->owner,
3891 (unsigned long long)dback->offset,
3892 (unsigned long)dback->num_refs);
3894 tback = to_tree_backref(back);
3895 fprintf(stderr, "Backref %llu parent %llu"
3896 " root %llu not found in extent tree\n",
3897 (unsigned long long)rec->start,
3898 (unsigned long long)tback->parent,
3899 (unsigned long long)tback->root);
3902 if (!back->is_data && !back->found_ref) {
3906 tback = to_tree_backref(back);
3907 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3908 (unsigned long long)rec->start,
3909 back->full_backref ? "parent" : "root",
3910 back->full_backref ?
3911 (unsigned long long)tback->parent :
3912 (unsigned long long)tback->root, back);
3914 if (back->is_data) {
3915 dback = to_data_backref(back);
3916 if (dback->found_ref != dback->num_refs) {
3920 fprintf(stderr, "Incorrect local backref count"
3921 " on %llu %s %llu owner %llu"
3922 " offset %llu found %u wanted %u back %p\n",
3923 (unsigned long long)rec->start,
3924 back->full_backref ?
3926 back->full_backref ?
3927 (unsigned long long)dback->parent:
3928 (unsigned long long)dback->root,
3929 (unsigned long long)dback->owner,
3930 (unsigned long long)dback->offset,
3931 dback->found_ref, dback->num_refs, back);
3933 if (dback->disk_bytenr != rec->start) {
3937 fprintf(stderr, "Backref disk bytenr does not"
3938 " match extent record, bytenr=%llu, "
3939 "ref bytenr=%llu\n",
3940 (unsigned long long)rec->start,
3941 (unsigned long long)dback->disk_bytenr);
3944 if (dback->bytes != rec->nr) {
3948 fprintf(stderr, "Backref bytes do not match "
3949 "extent backref, bytenr=%llu, ref "
3950 "bytes=%llu, backref bytes=%llu\n",
3951 (unsigned long long)rec->start,
3952 (unsigned long long)rec->nr,
3953 (unsigned long long)dback->bytes);
3956 if (!back->is_data) {
3959 dback = to_data_backref(back);
3960 found += dback->found_ref;
3963 if (found != rec->refs) {
3967 fprintf(stderr, "Incorrect global backref count "
3968 "on %llu found %llu wanted %llu\n",
3969 (unsigned long long)rec->start,
3970 (unsigned long long)found,
3971 (unsigned long long)rec->refs);
3977 static void __free_one_backref(struct rb_node *node)
3979 struct extent_backref *back = rb_node_to_extent_backref(node);
3984 static void free_all_extent_backrefs(struct extent_record *rec)
3986 rb_free_nodes(&rec->backref_tree, __free_one_backref);
3989 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3990 struct cache_tree *extent_cache)
3992 struct cache_extent *cache;
3993 struct extent_record *rec;
3996 cache = first_cache_extent(extent_cache);
3999 rec = container_of(cache, struct extent_record, cache);
4000 remove_cache_extent(extent_cache, cache);
4001 free_all_extent_backrefs(rec);
4006 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4007 struct extent_record *rec)
4009 if (rec->content_checked && rec->owner_ref_checked &&
4010 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4011 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4012 !rec->bad_full_backref && !rec->crossing_stripes &&
4013 !rec->wrong_chunk_type) {
4014 remove_cache_extent(extent_cache, &rec->cache);
4015 free_all_extent_backrefs(rec);
4016 list_del_init(&rec->list);
4022 static int check_owner_ref(struct btrfs_root *root,
4023 struct extent_record *rec,
4024 struct extent_buffer *buf)
4026 struct extent_backref *node, *tmp;
4027 struct tree_backref *back;
4028 struct btrfs_root *ref_root;
4029 struct btrfs_key key;
4030 struct btrfs_path path;
4031 struct extent_buffer *parent;
4036 rbtree_postorder_for_each_entry_safe(node, tmp,
4037 &rec->backref_tree, node) {
4040 if (!node->found_ref)
4042 if (node->full_backref)
4044 back = to_tree_backref(node);
4045 if (btrfs_header_owner(buf) == back->root)
4048 BUG_ON(rec->is_root);
4050 /* try to find the block by search corresponding fs tree */
4051 key.objectid = btrfs_header_owner(buf);
4052 key.type = BTRFS_ROOT_ITEM_KEY;
4053 key.offset = (u64)-1;
4055 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4056 if (IS_ERR(ref_root))
4059 level = btrfs_header_level(buf);
4061 btrfs_item_key_to_cpu(buf, &key, 0);
4063 btrfs_node_key_to_cpu(buf, &key, 0);
4065 btrfs_init_path(&path);
4066 path.lowest_level = level + 1;
4067 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4071 parent = path.nodes[level + 1];
4072 if (parent && buf->start == btrfs_node_blockptr(parent,
4073 path.slots[level + 1]))
4076 btrfs_release_path(&path);
4077 return found ? 0 : 1;
4080 static int is_extent_tree_record(struct extent_record *rec)
4082 struct extent_backref *ref, *tmp;
4083 struct tree_backref *back;
4086 rbtree_postorder_for_each_entry_safe(ref, tmp,
4087 &rec->backref_tree, node) {
4090 back = to_tree_backref(ref);
4091 if (ref->full_backref)
4093 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4100 static int record_bad_block_io(struct btrfs_fs_info *info,
4101 struct cache_tree *extent_cache,
4104 struct extent_record *rec;
4105 struct cache_extent *cache;
4106 struct btrfs_key key;
4108 cache = lookup_cache_extent(extent_cache, start, len);
4112 rec = container_of(cache, struct extent_record, cache);
4113 if (!is_extent_tree_record(rec))
4116 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4117 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4120 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4121 struct extent_buffer *buf, int slot)
4123 if (btrfs_header_level(buf)) {
4124 struct btrfs_key_ptr ptr1, ptr2;
4126 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4127 sizeof(struct btrfs_key_ptr));
4128 read_extent_buffer(buf, &ptr2,
4129 btrfs_node_key_ptr_offset(slot + 1),
4130 sizeof(struct btrfs_key_ptr));
4131 write_extent_buffer(buf, &ptr1,
4132 btrfs_node_key_ptr_offset(slot + 1),
4133 sizeof(struct btrfs_key_ptr));
4134 write_extent_buffer(buf, &ptr2,
4135 btrfs_node_key_ptr_offset(slot),
4136 sizeof(struct btrfs_key_ptr));
4138 struct btrfs_disk_key key;
4139 btrfs_node_key(buf, &key, 0);
4140 btrfs_fixup_low_keys(root, path, &key,
4141 btrfs_header_level(buf) + 1);
4144 struct btrfs_item *item1, *item2;
4145 struct btrfs_key k1, k2;
4146 char *item1_data, *item2_data;
4147 u32 item1_offset, item2_offset, item1_size, item2_size;
4149 item1 = btrfs_item_nr(slot);
4150 item2 = btrfs_item_nr(slot + 1);
4151 btrfs_item_key_to_cpu(buf, &k1, slot);
4152 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4153 item1_offset = btrfs_item_offset(buf, item1);
4154 item2_offset = btrfs_item_offset(buf, item2);
4155 item1_size = btrfs_item_size(buf, item1);
4156 item2_size = btrfs_item_size(buf, item2);
4158 item1_data = malloc(item1_size);
4161 item2_data = malloc(item2_size);
4167 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4168 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4170 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4171 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4175 btrfs_set_item_offset(buf, item1, item2_offset);
4176 btrfs_set_item_offset(buf, item2, item1_offset);
4177 btrfs_set_item_size(buf, item1, item2_size);
4178 btrfs_set_item_size(buf, item2, item1_size);
4180 path->slots[0] = slot;
4181 btrfs_set_item_key_unsafe(root, path, &k2);
4182 path->slots[0] = slot + 1;
4183 btrfs_set_item_key_unsafe(root, path, &k1);
4188 static int fix_key_order(struct btrfs_trans_handle *trans,
4189 struct btrfs_root *root,
4190 struct btrfs_path *path)
4192 struct extent_buffer *buf;
4193 struct btrfs_key k1, k2;
4195 int level = path->lowest_level;
4198 buf = path->nodes[level];
4199 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4201 btrfs_node_key_to_cpu(buf, &k1, i);
4202 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4204 btrfs_item_key_to_cpu(buf, &k1, i);
4205 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4207 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4209 ret = swap_values(root, path, buf, i);
4212 btrfs_mark_buffer_dirty(buf);
4218 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4219 struct btrfs_root *root,
4220 struct btrfs_path *path,
4221 struct extent_buffer *buf, int slot)
4223 struct btrfs_key key;
4224 int nritems = btrfs_header_nritems(buf);
4226 btrfs_item_key_to_cpu(buf, &key, slot);
4228 /* These are all the keys we can deal with missing. */
4229 if (key.type != BTRFS_DIR_INDEX_KEY &&
4230 key.type != BTRFS_EXTENT_ITEM_KEY &&
4231 key.type != BTRFS_METADATA_ITEM_KEY &&
4232 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4233 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4236 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4237 (unsigned long long)key.objectid, key.type,
4238 (unsigned long long)key.offset, slot, buf->start);
4239 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4240 btrfs_item_nr_offset(slot + 1),
4241 sizeof(struct btrfs_item) *
4242 (nritems - slot - 1));
4243 btrfs_set_header_nritems(buf, nritems - 1);
4245 struct btrfs_disk_key disk_key;
4247 btrfs_item_key(buf, &disk_key, 0);
4248 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4250 btrfs_mark_buffer_dirty(buf);
4254 static int fix_item_offset(struct btrfs_trans_handle *trans,
4255 struct btrfs_root *root,
4256 struct btrfs_path *path)
4258 struct extent_buffer *buf;
4262 /* We should only get this for leaves */
4263 BUG_ON(path->lowest_level);
4264 buf = path->nodes[0];
4266 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4267 unsigned int shift = 0, offset;
4269 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4270 BTRFS_LEAF_DATA_SIZE(root)) {
4271 if (btrfs_item_end_nr(buf, i) >
4272 BTRFS_LEAF_DATA_SIZE(root)) {
4273 ret = delete_bogus_item(trans, root, path,
4277 fprintf(stderr, "item is off the end of the "
4278 "leaf, can't fix\n");
4282 shift = BTRFS_LEAF_DATA_SIZE(root) -
4283 btrfs_item_end_nr(buf, i);
4284 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4285 btrfs_item_offset_nr(buf, i - 1)) {
4286 if (btrfs_item_end_nr(buf, i) >
4287 btrfs_item_offset_nr(buf, i - 1)) {
4288 ret = delete_bogus_item(trans, root, path,
4292 fprintf(stderr, "items overlap, can't fix\n");
4296 shift = btrfs_item_offset_nr(buf, i - 1) -
4297 btrfs_item_end_nr(buf, i);
4302 printf("Shifting item nr %d by %u bytes in block %llu\n",
4303 i, shift, (unsigned long long)buf->start);
4304 offset = btrfs_item_offset_nr(buf, i);
4305 memmove_extent_buffer(buf,
4306 btrfs_leaf_data(buf) + offset + shift,
4307 btrfs_leaf_data(buf) + offset,
4308 btrfs_item_size_nr(buf, i));
4309 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4311 btrfs_mark_buffer_dirty(buf);
4315 * We may have moved things, in which case we want to exit so we don't
4316 * write those changes out. Once we have proper abort functionality in
4317 * progs this can be changed to something nicer.
4324 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4325 * then just return -EIO.
4327 static int try_to_fix_bad_block(struct btrfs_root *root,
4328 struct extent_buffer *buf,
4329 enum btrfs_tree_block_status status)
4331 struct btrfs_trans_handle *trans;
4332 struct ulist *roots;
4333 struct ulist_node *node;
4334 struct btrfs_root *search_root;
4335 struct btrfs_path *path;
4336 struct ulist_iterator iter;
4337 struct btrfs_key root_key, key;
4340 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4341 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4344 path = btrfs_alloc_path();
4348 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4351 btrfs_free_path(path);
4355 ULIST_ITER_INIT(&iter);
4356 while ((node = ulist_next(roots, &iter))) {
4357 root_key.objectid = node->val;
4358 root_key.type = BTRFS_ROOT_ITEM_KEY;
4359 root_key.offset = (u64)-1;
4361 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4368 trans = btrfs_start_transaction(search_root, 0);
4369 if (IS_ERR(trans)) {
4370 ret = PTR_ERR(trans);
4374 path->lowest_level = btrfs_header_level(buf);
4375 path->skip_check_block = 1;
4376 if (path->lowest_level)
4377 btrfs_node_key_to_cpu(buf, &key, 0);
4379 btrfs_item_key_to_cpu(buf, &key, 0);
4380 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4383 btrfs_commit_transaction(trans, search_root);
4386 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4387 ret = fix_key_order(trans, search_root, path);
4388 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4389 ret = fix_item_offset(trans, search_root, path);
4391 btrfs_commit_transaction(trans, search_root);
4394 btrfs_release_path(path);
4395 btrfs_commit_transaction(trans, search_root);
4398 btrfs_free_path(path);
4402 static int check_block(struct btrfs_root *root,
4403 struct cache_tree *extent_cache,
4404 struct extent_buffer *buf, u64 flags)
4406 struct extent_record *rec;
4407 struct cache_extent *cache;
4408 struct btrfs_key key;
4409 enum btrfs_tree_block_status status;
4413 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4416 rec = container_of(cache, struct extent_record, cache);
4417 rec->generation = btrfs_header_generation(buf);
4419 level = btrfs_header_level(buf);
4420 if (btrfs_header_nritems(buf) > 0) {
4423 btrfs_item_key_to_cpu(buf, &key, 0);
4425 btrfs_node_key_to_cpu(buf, &key, 0);
4427 rec->info_objectid = key.objectid;
4429 rec->info_level = level;
4431 if (btrfs_is_leaf(buf))
4432 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4434 status = btrfs_check_node(root, &rec->parent_key, buf);
4436 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4438 status = try_to_fix_bad_block(root, buf, status);
4439 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4441 fprintf(stderr, "bad block %llu\n",
4442 (unsigned long long)buf->start);
4445 * Signal to callers we need to start the scan over
4446 * again since we'll have cowed blocks.
4451 rec->content_checked = 1;
4452 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4453 rec->owner_ref_checked = 1;
4455 ret = check_owner_ref(root, rec, buf);
4457 rec->owner_ref_checked = 1;
4461 maybe_free_extent_rec(extent_cache, rec);
4466 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4467 u64 parent, u64 root)
4469 struct rb_node *node;
4470 struct tree_backref *back = NULL;
4471 struct tree_backref match = {
4478 match.parent = parent;
4479 match.node.full_backref = 1;
4484 node = rb_search(&rec->backref_tree, &match.node.node,
4485 (rb_compare_keys)compare_extent_backref, NULL);
4487 back = to_tree_backref(rb_node_to_extent_backref(node));
4492 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4493 u64 parent, u64 root)
4495 struct tree_backref *ref = malloc(sizeof(*ref));
4499 memset(&ref->node, 0, sizeof(ref->node));
4501 ref->parent = parent;
4502 ref->node.full_backref = 1;
4505 ref->node.full_backref = 0;
4507 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4512 static struct data_backref *find_data_backref(struct extent_record *rec,
4513 u64 parent, u64 root,
4514 u64 owner, u64 offset,
4516 u64 disk_bytenr, u64 bytes)
4518 struct rb_node *node;
4519 struct data_backref *back = NULL;
4520 struct data_backref match = {
4527 .found_ref = found_ref,
4528 .disk_bytenr = disk_bytenr,
4532 match.parent = parent;
4533 match.node.full_backref = 1;
4538 node = rb_search(&rec->backref_tree, &match.node.node,
4539 (rb_compare_keys)compare_extent_backref, NULL);
4541 back = to_data_backref(rb_node_to_extent_backref(node));
4546 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4547 u64 parent, u64 root,
4548 u64 owner, u64 offset,
4551 struct data_backref *ref = malloc(sizeof(*ref));
4555 memset(&ref->node, 0, sizeof(ref->node));
4556 ref->node.is_data = 1;
4559 ref->parent = parent;
4562 ref->node.full_backref = 1;
4566 ref->offset = offset;
4567 ref->node.full_backref = 0;
4569 ref->bytes = max_size;
4572 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4573 if (max_size > rec->max_size)
4574 rec->max_size = max_size;
4578 /* Check if the type of extent matches with its chunk */
4579 static void check_extent_type(struct extent_record *rec)
4581 struct btrfs_block_group_cache *bg_cache;
4583 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4587 /* data extent, check chunk directly*/
4588 if (!rec->metadata) {
4589 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4590 rec->wrong_chunk_type = 1;
4594 /* metadata extent, check the obvious case first */
4595 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4596 BTRFS_BLOCK_GROUP_METADATA))) {
4597 rec->wrong_chunk_type = 1;
4602 * Check SYSTEM extent, as it's also marked as metadata, we can only
4603 * make sure it's a SYSTEM extent by its backref
4605 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4606 struct extent_backref *node;
4607 struct tree_backref *tback;
4610 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4611 if (node->is_data) {
4612 /* tree block shouldn't have data backref */
4613 rec->wrong_chunk_type = 1;
4616 tback = container_of(node, struct tree_backref, node);
4618 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4619 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4621 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4622 if (!(bg_cache->flags & bg_type))
4623 rec->wrong_chunk_type = 1;
4628 * Allocate a new extent record, fill default values from @tmpl and insert int
4629 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4630 * the cache, otherwise it fails.
4632 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4633 struct extent_record *tmpl)
4635 struct extent_record *rec;
4638 rec = malloc(sizeof(*rec));
4641 rec->start = tmpl->start;
4642 rec->max_size = tmpl->max_size;
4643 rec->nr = max(tmpl->nr, tmpl->max_size);
4644 rec->found_rec = tmpl->found_rec;
4645 rec->content_checked = tmpl->content_checked;
4646 rec->owner_ref_checked = tmpl->owner_ref_checked;
4647 rec->num_duplicates = 0;
4648 rec->metadata = tmpl->metadata;
4649 rec->flag_block_full_backref = FLAG_UNSET;
4650 rec->bad_full_backref = 0;
4651 rec->crossing_stripes = 0;
4652 rec->wrong_chunk_type = 0;
4653 rec->is_root = tmpl->is_root;
4654 rec->refs = tmpl->refs;
4655 rec->extent_item_refs = tmpl->extent_item_refs;
4656 rec->parent_generation = tmpl->parent_generation;
4657 INIT_LIST_HEAD(&rec->backrefs);
4658 INIT_LIST_HEAD(&rec->dups);
4659 INIT_LIST_HEAD(&rec->list);
4660 rec->backref_tree = RB_ROOT;
4661 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4662 rec->cache.start = tmpl->start;
4663 rec->cache.size = tmpl->nr;
4664 ret = insert_cache_extent(extent_cache, &rec->cache);
4666 bytes_used += rec->nr;
4669 rec->crossing_stripes = check_crossing_stripes(rec->start,
4670 global_info->tree_root->nodesize);
4671 check_extent_type(rec);
4676 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4678 * - refs - if found, increase refs
4679 * - is_root - if found, set
4680 * - content_checked - if found, set
4681 * - owner_ref_checked - if found, set
4683 * If not found, create a new one, initialize and insert.
4685 static int add_extent_rec(struct cache_tree *extent_cache,
4686 struct extent_record *tmpl)
4688 struct extent_record *rec;
4689 struct cache_extent *cache;
4693 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4695 rec = container_of(cache, struct extent_record, cache);
4699 rec->nr = max(tmpl->nr, tmpl->max_size);
4702 * We need to make sure to reset nr to whatever the extent
4703 * record says was the real size, this way we can compare it to
4706 if (tmpl->found_rec) {
4707 if (tmpl->start != rec->start || rec->found_rec) {
4708 struct extent_record *tmp;
4711 if (list_empty(&rec->list))
4712 list_add_tail(&rec->list,
4713 &duplicate_extents);
4716 * We have to do this song and dance in case we
4717 * find an extent record that falls inside of
4718 * our current extent record but does not have
4719 * the same objectid.
4721 tmp = malloc(sizeof(*tmp));
4724 tmp->start = tmpl->start;
4725 tmp->max_size = tmpl->max_size;
4728 tmp->metadata = tmpl->metadata;
4729 tmp->extent_item_refs = tmpl->extent_item_refs;
4730 INIT_LIST_HEAD(&tmp->list);
4731 list_add_tail(&tmp->list, &rec->dups);
4732 rec->num_duplicates++;
4739 if (tmpl->extent_item_refs && !dup) {
4740 if (rec->extent_item_refs) {
4741 fprintf(stderr, "block %llu rec "
4742 "extent_item_refs %llu, passed %llu\n",
4743 (unsigned long long)tmpl->start,
4744 (unsigned long long)
4745 rec->extent_item_refs,
4746 (unsigned long long)tmpl->extent_item_refs);
4748 rec->extent_item_refs = tmpl->extent_item_refs;
4752 if (tmpl->content_checked)
4753 rec->content_checked = 1;
4754 if (tmpl->owner_ref_checked)
4755 rec->owner_ref_checked = 1;
4756 memcpy(&rec->parent_key, &tmpl->parent_key,
4757 sizeof(tmpl->parent_key));
4758 if (tmpl->parent_generation)
4759 rec->parent_generation = tmpl->parent_generation;
4760 if (rec->max_size < tmpl->max_size)
4761 rec->max_size = tmpl->max_size;
4764 * A metadata extent can't cross stripe_len boundary, otherwise
4765 * kernel scrub won't be able to handle it.
4766 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4770 rec->crossing_stripes = check_crossing_stripes(
4771 rec->start, global_info->tree_root->nodesize);
4772 check_extent_type(rec);
4773 maybe_free_extent_rec(extent_cache, rec);
4777 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4782 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4783 u64 parent, u64 root, int found_ref)
4785 struct extent_record *rec;
4786 struct tree_backref *back;
4787 struct cache_extent *cache;
4789 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4791 struct extent_record tmpl;
4793 memset(&tmpl, 0, sizeof(tmpl));
4794 tmpl.start = bytenr;
4798 add_extent_rec_nolookup(extent_cache, &tmpl);
4800 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4805 rec = container_of(cache, struct extent_record, cache);
4806 if (rec->start != bytenr) {
4810 back = find_tree_backref(rec, parent, root);
4812 back = alloc_tree_backref(rec, parent, root);
4817 if (back->node.found_ref) {
4818 fprintf(stderr, "Extent back ref already exists "
4819 "for %llu parent %llu root %llu \n",
4820 (unsigned long long)bytenr,
4821 (unsigned long long)parent,
4822 (unsigned long long)root);
4824 back->node.found_ref = 1;
4826 if (back->node.found_extent_tree) {
4827 fprintf(stderr, "Extent back ref already exists "
4828 "for %llu parent %llu root %llu \n",
4829 (unsigned long long)bytenr,
4830 (unsigned long long)parent,
4831 (unsigned long long)root);
4833 back->node.found_extent_tree = 1;
4835 check_extent_type(rec);
4836 maybe_free_extent_rec(extent_cache, rec);
4840 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4841 u64 parent, u64 root, u64 owner, u64 offset,
4842 u32 num_refs, int found_ref, u64 max_size)
4844 struct extent_record *rec;
4845 struct data_backref *back;
4846 struct cache_extent *cache;
4848 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4850 struct extent_record tmpl;
4852 memset(&tmpl, 0, sizeof(tmpl));
4853 tmpl.start = bytenr;
4855 tmpl.max_size = max_size;
4857 add_extent_rec_nolookup(extent_cache, &tmpl);
4859 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4864 rec = container_of(cache, struct extent_record, cache);
4865 if (rec->max_size < max_size)
4866 rec->max_size = max_size;
4869 * If found_ref is set then max_size is the real size and must match the
4870 * existing refs. So if we have already found a ref then we need to
4871 * make sure that this ref matches the existing one, otherwise we need
4872 * to add a new backref so we can notice that the backrefs don't match
4873 * and we need to figure out who is telling the truth. This is to
4874 * account for that awful fsync bug I introduced where we'd end up with
4875 * a btrfs_file_extent_item that would have its length include multiple
4876 * prealloc extents or point inside of a prealloc extent.
4878 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4881 back = alloc_data_backref(rec, parent, root, owner, offset,
4887 BUG_ON(num_refs != 1);
4888 if (back->node.found_ref)
4889 BUG_ON(back->bytes != max_size);
4890 back->node.found_ref = 1;
4891 back->found_ref += 1;
4892 back->bytes = max_size;
4893 back->disk_bytenr = bytenr;
4895 rec->content_checked = 1;
4896 rec->owner_ref_checked = 1;
4898 if (back->node.found_extent_tree) {
4899 fprintf(stderr, "Extent back ref already exists "
4900 "for %llu parent %llu root %llu "
4901 "owner %llu offset %llu num_refs %lu\n",
4902 (unsigned long long)bytenr,
4903 (unsigned long long)parent,
4904 (unsigned long long)root,
4905 (unsigned long long)owner,
4906 (unsigned long long)offset,
4907 (unsigned long)num_refs);
4909 back->num_refs = num_refs;
4910 back->node.found_extent_tree = 1;
4912 maybe_free_extent_rec(extent_cache, rec);
4916 static int add_pending(struct cache_tree *pending,
4917 struct cache_tree *seen, u64 bytenr, u32 size)
4920 ret = add_cache_extent(seen, bytenr, size);
4923 add_cache_extent(pending, bytenr, size);
4927 static int pick_next_pending(struct cache_tree *pending,
4928 struct cache_tree *reada,
4929 struct cache_tree *nodes,
4930 u64 last, struct block_info *bits, int bits_nr,
4933 unsigned long node_start = last;
4934 struct cache_extent *cache;
4937 cache = search_cache_extent(reada, 0);
4939 bits[0].start = cache->start;
4940 bits[0].size = cache->size;
4945 if (node_start > 32768)
4946 node_start -= 32768;
4948 cache = search_cache_extent(nodes, node_start);
4950 cache = search_cache_extent(nodes, 0);
4953 cache = search_cache_extent(pending, 0);
4958 bits[ret].start = cache->start;
4959 bits[ret].size = cache->size;
4960 cache = next_cache_extent(cache);
4962 } while (cache && ret < bits_nr);
4968 bits[ret].start = cache->start;
4969 bits[ret].size = cache->size;
4970 cache = next_cache_extent(cache);
4972 } while (cache && ret < bits_nr);
4974 if (bits_nr - ret > 8) {
4975 u64 lookup = bits[0].start + bits[0].size;
4976 struct cache_extent *next;
4977 next = search_cache_extent(pending, lookup);
4979 if (next->start - lookup > 32768)
4981 bits[ret].start = next->start;
4982 bits[ret].size = next->size;
4983 lookup = next->start + next->size;
4987 next = next_cache_extent(next);
4995 static void free_chunk_record(struct cache_extent *cache)
4997 struct chunk_record *rec;
4999 rec = container_of(cache, struct chunk_record, cache);
5000 list_del_init(&rec->list);
5001 list_del_init(&rec->dextents);
5005 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5007 cache_tree_free_extents(chunk_cache, free_chunk_record);
5010 static void free_device_record(struct rb_node *node)
5012 struct device_record *rec;
5014 rec = container_of(node, struct device_record, node);
5018 FREE_RB_BASED_TREE(device_cache, free_device_record);
5020 int insert_block_group_record(struct block_group_tree *tree,
5021 struct block_group_record *bg_rec)
5025 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5029 list_add_tail(&bg_rec->list, &tree->block_groups);
5033 static void free_block_group_record(struct cache_extent *cache)
5035 struct block_group_record *rec;
5037 rec = container_of(cache, struct block_group_record, cache);
5038 list_del_init(&rec->list);
5042 void free_block_group_tree(struct block_group_tree *tree)
5044 cache_tree_free_extents(&tree->tree, free_block_group_record);
5047 int insert_device_extent_record(struct device_extent_tree *tree,
5048 struct device_extent_record *de_rec)
5053 * Device extent is a bit different from the other extents, because
5054 * the extents which belong to the different devices may have the
5055 * same start and size, so we need use the special extent cache
5056 * search/insert functions.
5058 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5062 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5063 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5067 static void free_device_extent_record(struct cache_extent *cache)
5069 struct device_extent_record *rec;
5071 rec = container_of(cache, struct device_extent_record, cache);
5072 if (!list_empty(&rec->chunk_list))
5073 list_del_init(&rec->chunk_list);
5074 if (!list_empty(&rec->device_list))
5075 list_del_init(&rec->device_list);
5079 void free_device_extent_tree(struct device_extent_tree *tree)
5081 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5084 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5085 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5086 struct extent_buffer *leaf, int slot)
5088 struct btrfs_extent_ref_v0 *ref0;
5089 struct btrfs_key key;
5091 btrfs_item_key_to_cpu(leaf, &key, slot);
5092 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5093 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5094 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5096 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5097 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5103 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5104 struct btrfs_key *key,
5107 struct btrfs_chunk *ptr;
5108 struct chunk_record *rec;
5111 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5112 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5114 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5116 fprintf(stderr, "memory allocation failed\n");
5120 INIT_LIST_HEAD(&rec->list);
5121 INIT_LIST_HEAD(&rec->dextents);
5124 rec->cache.start = key->offset;
5125 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5127 rec->generation = btrfs_header_generation(leaf);
5129 rec->objectid = key->objectid;
5130 rec->type = key->type;
5131 rec->offset = key->offset;
5133 rec->length = rec->cache.size;
5134 rec->owner = btrfs_chunk_owner(leaf, ptr);
5135 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5136 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5137 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5138 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5139 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5140 rec->num_stripes = num_stripes;
5141 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5143 for (i = 0; i < rec->num_stripes; ++i) {
5144 rec->stripes[i].devid =
5145 btrfs_stripe_devid_nr(leaf, ptr, i);
5146 rec->stripes[i].offset =
5147 btrfs_stripe_offset_nr(leaf, ptr, i);
5148 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5149 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5156 static int process_chunk_item(struct cache_tree *chunk_cache,
5157 struct btrfs_key *key, struct extent_buffer *eb,
5160 struct chunk_record *rec;
5163 rec = btrfs_new_chunk_record(eb, key, slot);
5164 ret = insert_cache_extent(chunk_cache, &rec->cache);
5166 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5167 rec->offset, rec->length);
5174 static int process_device_item(struct rb_root *dev_cache,
5175 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5177 struct btrfs_dev_item *ptr;
5178 struct device_record *rec;
5181 ptr = btrfs_item_ptr(eb,
5182 slot, struct btrfs_dev_item);
5184 rec = malloc(sizeof(*rec));
5186 fprintf(stderr, "memory allocation failed\n");
5190 rec->devid = key->offset;
5191 rec->generation = btrfs_header_generation(eb);
5193 rec->objectid = key->objectid;
5194 rec->type = key->type;
5195 rec->offset = key->offset;
5197 rec->devid = btrfs_device_id(eb, ptr);
5198 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5199 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5201 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5203 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5210 struct block_group_record *
5211 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5214 struct btrfs_block_group_item *ptr;
5215 struct block_group_record *rec;
5217 rec = calloc(1, sizeof(*rec));
5219 fprintf(stderr, "memory allocation failed\n");
5223 rec->cache.start = key->objectid;
5224 rec->cache.size = key->offset;
5226 rec->generation = btrfs_header_generation(leaf);
5228 rec->objectid = key->objectid;
5229 rec->type = key->type;
5230 rec->offset = key->offset;
5232 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5233 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5235 INIT_LIST_HEAD(&rec->list);
5240 static int process_block_group_item(struct block_group_tree *block_group_cache,
5241 struct btrfs_key *key,
5242 struct extent_buffer *eb, int slot)
5244 struct block_group_record *rec;
5247 rec = btrfs_new_block_group_record(eb, key, slot);
5248 ret = insert_block_group_record(block_group_cache, rec);
5250 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5251 rec->objectid, rec->offset);
5258 struct device_extent_record *
5259 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5260 struct btrfs_key *key, int slot)
5262 struct device_extent_record *rec;
5263 struct btrfs_dev_extent *ptr;
5265 rec = calloc(1, sizeof(*rec));
5267 fprintf(stderr, "memory allocation failed\n");
5271 rec->cache.objectid = key->objectid;
5272 rec->cache.start = key->offset;
5274 rec->generation = btrfs_header_generation(leaf);
5276 rec->objectid = key->objectid;
5277 rec->type = key->type;
5278 rec->offset = key->offset;
5280 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5281 rec->chunk_objecteid =
5282 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5284 btrfs_dev_extent_chunk_offset(leaf, ptr);
5285 rec->length = btrfs_dev_extent_length(leaf, ptr);
5286 rec->cache.size = rec->length;
5288 INIT_LIST_HEAD(&rec->chunk_list);
5289 INIT_LIST_HEAD(&rec->device_list);
5295 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5296 struct btrfs_key *key, struct extent_buffer *eb,
5299 struct device_extent_record *rec;
5302 rec = btrfs_new_device_extent_record(eb, key, slot);
5303 ret = insert_device_extent_record(dev_extent_cache, rec);
5306 "Device extent[%llu, %llu, %llu] existed.\n",
5307 rec->objectid, rec->offset, rec->length);
5314 static int process_extent_item(struct btrfs_root *root,
5315 struct cache_tree *extent_cache,
5316 struct extent_buffer *eb, int slot)
5318 struct btrfs_extent_item *ei;
5319 struct btrfs_extent_inline_ref *iref;
5320 struct btrfs_extent_data_ref *dref;
5321 struct btrfs_shared_data_ref *sref;
5322 struct btrfs_key key;
5323 struct extent_record tmpl;
5327 u32 item_size = btrfs_item_size_nr(eb, slot);
5333 btrfs_item_key_to_cpu(eb, &key, slot);
5335 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5337 num_bytes = root->nodesize;
5339 num_bytes = key.offset;
5342 if (item_size < sizeof(*ei)) {
5343 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5344 struct btrfs_extent_item_v0 *ei0;
5345 BUG_ON(item_size != sizeof(*ei0));
5346 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5347 refs = btrfs_extent_refs_v0(eb, ei0);
5351 memset(&tmpl, 0, sizeof(tmpl));
5352 tmpl.start = key.objectid;
5353 tmpl.nr = num_bytes;
5354 tmpl.extent_item_refs = refs;
5355 tmpl.metadata = metadata;
5357 tmpl.max_size = num_bytes;
5359 return add_extent_rec(extent_cache, &tmpl);
5362 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5363 refs = btrfs_extent_refs(eb, ei);
5364 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5369 memset(&tmpl, 0, sizeof(tmpl));
5370 tmpl.start = key.objectid;
5371 tmpl.nr = num_bytes;
5372 tmpl.extent_item_refs = refs;
5373 tmpl.metadata = metadata;
5375 tmpl.max_size = num_bytes;
5376 add_extent_rec(extent_cache, &tmpl);
5378 ptr = (unsigned long)(ei + 1);
5379 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5380 key.type == BTRFS_EXTENT_ITEM_KEY)
5381 ptr += sizeof(struct btrfs_tree_block_info);
5383 end = (unsigned long)ei + item_size;
5385 iref = (struct btrfs_extent_inline_ref *)ptr;
5386 type = btrfs_extent_inline_ref_type(eb, iref);
5387 offset = btrfs_extent_inline_ref_offset(eb, iref);
5389 case BTRFS_TREE_BLOCK_REF_KEY:
5390 add_tree_backref(extent_cache, key.objectid,
5393 case BTRFS_SHARED_BLOCK_REF_KEY:
5394 add_tree_backref(extent_cache, key.objectid,
5397 case BTRFS_EXTENT_DATA_REF_KEY:
5398 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5399 add_data_backref(extent_cache, key.objectid, 0,
5400 btrfs_extent_data_ref_root(eb, dref),
5401 btrfs_extent_data_ref_objectid(eb,
5403 btrfs_extent_data_ref_offset(eb, dref),
5404 btrfs_extent_data_ref_count(eb, dref),
5407 case BTRFS_SHARED_DATA_REF_KEY:
5408 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5409 add_data_backref(extent_cache, key.objectid, offset,
5411 btrfs_shared_data_ref_count(eb, sref),
5415 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5416 key.objectid, key.type, num_bytes);
5419 ptr += btrfs_extent_inline_ref_size(type);
5426 static int check_cache_range(struct btrfs_root *root,
5427 struct btrfs_block_group_cache *cache,
5428 u64 offset, u64 bytes)
5430 struct btrfs_free_space *entry;
5436 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5437 bytenr = btrfs_sb_offset(i);
5438 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5439 cache->key.objectid, bytenr, 0,
5440 &logical, &nr, &stripe_len);
5445 if (logical[nr] + stripe_len <= offset)
5447 if (offset + bytes <= logical[nr])
5449 if (logical[nr] == offset) {
5450 if (stripe_len >= bytes) {
5454 bytes -= stripe_len;
5455 offset += stripe_len;
5456 } else if (logical[nr] < offset) {
5457 if (logical[nr] + stripe_len >=
5462 bytes = (offset + bytes) -
5463 (logical[nr] + stripe_len);
5464 offset = logical[nr] + stripe_len;
5467 * Could be tricky, the super may land in the
5468 * middle of the area we're checking. First
5469 * check the easiest case, it's at the end.
5471 if (logical[nr] + stripe_len >=
5473 bytes = logical[nr] - offset;
5477 /* Check the left side */
5478 ret = check_cache_range(root, cache,
5480 logical[nr] - offset);
5486 /* Now we continue with the right side */
5487 bytes = (offset + bytes) -
5488 (logical[nr] + stripe_len);
5489 offset = logical[nr] + stripe_len;
5496 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5498 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5499 offset, offset+bytes);
5503 if (entry->offset != offset) {
5504 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5509 if (entry->bytes != bytes) {
5510 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5511 bytes, entry->bytes, offset);
5515 unlink_free_space(cache->free_space_ctl, entry);
5520 static int verify_space_cache(struct btrfs_root *root,
5521 struct btrfs_block_group_cache *cache)
5523 struct btrfs_path *path;
5524 struct extent_buffer *leaf;
5525 struct btrfs_key key;
5529 path = btrfs_alloc_path();
5533 root = root->fs_info->extent_root;
5535 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5537 key.objectid = last;
5539 key.type = BTRFS_EXTENT_ITEM_KEY;
5541 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5546 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5547 ret = btrfs_next_leaf(root, path);
5555 leaf = path->nodes[0];
5556 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5557 if (key.objectid >= cache->key.offset + cache->key.objectid)
5559 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5560 key.type != BTRFS_METADATA_ITEM_KEY) {
5565 if (last == key.objectid) {
5566 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5567 last = key.objectid + key.offset;
5569 last = key.objectid + root->nodesize;
5574 ret = check_cache_range(root, cache, last,
5575 key.objectid - last);
5578 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5579 last = key.objectid + key.offset;
5581 last = key.objectid + root->nodesize;
5585 if (last < cache->key.objectid + cache->key.offset)
5586 ret = check_cache_range(root, cache, last,
5587 cache->key.objectid +
5588 cache->key.offset - last);
5591 btrfs_free_path(path);
5594 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5595 fprintf(stderr, "There are still entries left in the space "
5603 static int check_space_cache(struct btrfs_root *root)
5605 struct btrfs_block_group_cache *cache;
5606 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5610 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5611 btrfs_super_generation(root->fs_info->super_copy) !=
5612 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5613 printf("cache and super generation don't match, space cache "
5614 "will be invalidated\n");
5618 if (ctx.progress_enabled) {
5619 ctx.tp = TASK_FREE_SPACE;
5620 task_start(ctx.info);
5624 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5628 start = cache->key.objectid + cache->key.offset;
5629 if (!cache->free_space_ctl) {
5630 if (btrfs_init_free_space_ctl(cache,
5631 root->sectorsize)) {
5636 btrfs_remove_free_space_cache(cache);
5639 if (btrfs_fs_compat_ro(root->fs_info,
5640 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5641 ret = exclude_super_stripes(root, cache);
5643 fprintf(stderr, "could not exclude super stripes: %s\n",
5648 ret = load_free_space_tree(root->fs_info, cache);
5649 free_excluded_extents(root, cache);
5651 fprintf(stderr, "could not load free space tree: %s\n",
5658 ret = load_free_space_cache(root->fs_info, cache);
5663 ret = verify_space_cache(root, cache);
5665 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5666 cache->key.objectid);
5671 task_stop(ctx.info);
5673 return error ? -EINVAL : 0;
5676 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5677 u64 num_bytes, unsigned long leaf_offset,
5678 struct extent_buffer *eb) {
5681 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5683 unsigned long csum_offset;
5687 u64 data_checked = 0;
5693 if (num_bytes % root->sectorsize)
5696 data = malloc(num_bytes);
5700 while (offset < num_bytes) {
5703 read_len = num_bytes - offset;
5704 /* read as much space once a time */
5705 ret = read_extent_data(root, data + offset,
5706 bytenr + offset, &read_len, mirror);
5710 /* verify every 4k data's checksum */
5711 while (data_checked < read_len) {
5713 tmp = offset + data_checked;
5715 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5716 csum, root->sectorsize);
5717 btrfs_csum_final(csum, (char *)&csum);
5719 csum_offset = leaf_offset +
5720 tmp / root->sectorsize * csum_size;
5721 read_extent_buffer(eb, (char *)&csum_expected,
5722 csum_offset, csum_size);
5723 /* try another mirror */
5724 if (csum != csum_expected) {
5725 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5726 mirror, bytenr + tmp,
5727 csum, csum_expected);
5728 num_copies = btrfs_num_copies(
5729 &root->fs_info->mapping_tree,
5731 if (mirror < num_copies - 1) {
5736 data_checked += root->sectorsize;
5745 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5748 struct btrfs_path *path;
5749 struct extent_buffer *leaf;
5750 struct btrfs_key key;
5753 path = btrfs_alloc_path();
5755 fprintf(stderr, "Error allocating path\n");
5759 key.objectid = bytenr;
5760 key.type = BTRFS_EXTENT_ITEM_KEY;
5761 key.offset = (u64)-1;
5764 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5767 fprintf(stderr, "Error looking up extent record %d\n", ret);
5768 btrfs_free_path(path);
5771 if (path->slots[0] > 0) {
5774 ret = btrfs_prev_leaf(root, path);
5777 } else if (ret > 0) {
5784 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5787 * Block group items come before extent items if they have the same
5788 * bytenr, so walk back one more just in case. Dear future traveller,
5789 * first congrats on mastering time travel. Now if it's not too much
5790 * trouble could you go back to 2006 and tell Chris to make the
5791 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5792 * EXTENT_ITEM_KEY please?
5794 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5795 if (path->slots[0] > 0) {
5798 ret = btrfs_prev_leaf(root, path);
5801 } else if (ret > 0) {
5806 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5810 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5811 ret = btrfs_next_leaf(root, path);
5813 fprintf(stderr, "Error going to next leaf "
5815 btrfs_free_path(path);
5821 leaf = path->nodes[0];
5822 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5823 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5827 if (key.objectid + key.offset < bytenr) {
5831 if (key.objectid > bytenr + num_bytes)
5834 if (key.objectid == bytenr) {
5835 if (key.offset >= num_bytes) {
5839 num_bytes -= key.offset;
5840 bytenr += key.offset;
5841 } else if (key.objectid < bytenr) {
5842 if (key.objectid + key.offset >= bytenr + num_bytes) {
5846 num_bytes = (bytenr + num_bytes) -
5847 (key.objectid + key.offset);
5848 bytenr = key.objectid + key.offset;
5850 if (key.objectid + key.offset < bytenr + num_bytes) {
5851 u64 new_start = key.objectid + key.offset;
5852 u64 new_bytes = bytenr + num_bytes - new_start;
5855 * Weird case, the extent is in the middle of
5856 * our range, we'll have to search one side
5857 * and then the other. Not sure if this happens
5858 * in real life, but no harm in coding it up
5859 * anyway just in case.
5861 btrfs_release_path(path);
5862 ret = check_extent_exists(root, new_start,
5865 fprintf(stderr, "Right section didn't "
5869 num_bytes = key.objectid - bytenr;
5872 num_bytes = key.objectid - bytenr;
5879 if (num_bytes && !ret) {
5880 fprintf(stderr, "There are no extents for csum range "
5881 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5885 btrfs_free_path(path);
5889 static int check_csums(struct btrfs_root *root)
5891 struct btrfs_path *path;
5892 struct extent_buffer *leaf;
5893 struct btrfs_key key;
5894 u64 offset = 0, num_bytes = 0;
5895 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5899 unsigned long leaf_offset;
5901 root = root->fs_info->csum_root;
5902 if (!extent_buffer_uptodate(root->node)) {
5903 fprintf(stderr, "No valid csum tree found\n");
5907 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5908 key.type = BTRFS_EXTENT_CSUM_KEY;
5911 path = btrfs_alloc_path();
5915 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5917 fprintf(stderr, "Error searching csum tree %d\n", ret);
5918 btrfs_free_path(path);
5922 if (ret > 0 && path->slots[0])
5927 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5928 ret = btrfs_next_leaf(root, path);
5930 fprintf(stderr, "Error going to next leaf "
5937 leaf = path->nodes[0];
5939 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5940 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5945 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5946 csum_size) * root->sectorsize;
5947 if (!check_data_csum)
5948 goto skip_csum_check;
5949 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5950 ret = check_extent_csums(root, key.offset, data_len,
5956 offset = key.offset;
5957 } else if (key.offset != offset + num_bytes) {
5958 ret = check_extent_exists(root, offset, num_bytes);
5960 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5961 "there is no extent record\n",
5962 offset, offset+num_bytes);
5965 offset = key.offset;
5968 num_bytes += data_len;
5972 btrfs_free_path(path);
5976 static int is_dropped_key(struct btrfs_key *key,
5977 struct btrfs_key *drop_key) {
5978 if (key->objectid < drop_key->objectid)
5980 else if (key->objectid == drop_key->objectid) {
5981 if (key->type < drop_key->type)
5983 else if (key->type == drop_key->type) {
5984 if (key->offset < drop_key->offset)
5992 * Here are the rules for FULL_BACKREF.
5994 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5995 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5997 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5998 * if it happened after the relocation occurred since we'll have dropped the
5999 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6000 * have no real way to know for sure.
6002 * We process the blocks one root at a time, and we start from the lowest root
6003 * objectid and go to the highest. So we can just lookup the owner backref for
6004 * the record and if we don't find it then we know it doesn't exist and we have
6007 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6008 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6009 * be set or not and then we can check later once we've gathered all the refs.
6011 static int calc_extent_flag(struct btrfs_root *root,
6012 struct cache_tree *extent_cache,
6013 struct extent_buffer *buf,
6014 struct root_item_record *ri,
6017 struct extent_record *rec;
6018 struct cache_extent *cache;
6019 struct tree_backref *tback;
6022 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6023 /* we have added this extent before */
6025 rec = container_of(cache, struct extent_record, cache);
6028 * Except file/reloc tree, we can not have
6031 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6036 if (buf->start == ri->bytenr)
6039 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6042 owner = btrfs_header_owner(buf);
6043 if (owner == ri->objectid)
6046 tback = find_tree_backref(rec, 0, owner);
6051 if (rec->flag_block_full_backref != FLAG_UNSET &&
6052 rec->flag_block_full_backref != 0)
6053 rec->bad_full_backref = 1;
6056 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6057 if (rec->flag_block_full_backref != FLAG_UNSET &&
6058 rec->flag_block_full_backref != 1)
6059 rec->bad_full_backref = 1;
6063 static int run_next_block(struct btrfs_root *root,
6064 struct block_info *bits,
6067 struct cache_tree *pending,
6068 struct cache_tree *seen,
6069 struct cache_tree *reada,
6070 struct cache_tree *nodes,
6071 struct cache_tree *extent_cache,
6072 struct cache_tree *chunk_cache,
6073 struct rb_root *dev_cache,
6074 struct block_group_tree *block_group_cache,
6075 struct device_extent_tree *dev_extent_cache,
6076 struct root_item_record *ri)
6078 struct extent_buffer *buf;
6079 struct extent_record *rec = NULL;
6090 struct btrfs_key key;
6091 struct cache_extent *cache;
6094 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6095 bits_nr, &reada_bits);
6100 for(i = 0; i < nritems; i++) {
6101 ret = add_cache_extent(reada, bits[i].start,
6106 /* fixme, get the parent transid */
6107 readahead_tree_block(root, bits[i].start,
6111 *last = bits[0].start;
6112 bytenr = bits[0].start;
6113 size = bits[0].size;
6115 cache = lookup_cache_extent(pending, bytenr, size);
6117 remove_cache_extent(pending, cache);
6120 cache = lookup_cache_extent(reada, bytenr, size);
6122 remove_cache_extent(reada, cache);
6125 cache = lookup_cache_extent(nodes, bytenr, size);
6127 remove_cache_extent(nodes, cache);
6130 cache = lookup_cache_extent(extent_cache, bytenr, size);
6132 rec = container_of(cache, struct extent_record, cache);
6133 gen = rec->parent_generation;
6136 /* fixme, get the real parent transid */
6137 buf = read_tree_block(root, bytenr, size, gen);
6138 if (!extent_buffer_uptodate(buf)) {
6139 record_bad_block_io(root->fs_info,
6140 extent_cache, bytenr, size);
6144 nritems = btrfs_header_nritems(buf);
6147 if (!init_extent_tree) {
6148 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6149 btrfs_header_level(buf), 1, NULL,
6152 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6154 fprintf(stderr, "Couldn't calc extent flags\n");
6155 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6160 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6162 fprintf(stderr, "Couldn't calc extent flags\n");
6163 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6167 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6169 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6170 ri->objectid == btrfs_header_owner(buf)) {
6172 * Ok we got to this block from it's original owner and
6173 * we have FULL_BACKREF set. Relocation can leave
6174 * converted blocks over so this is altogether possible,
6175 * however it's not possible if the generation > the
6176 * last snapshot, so check for this case.
6178 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6179 btrfs_header_generation(buf) > ri->last_snapshot) {
6180 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6181 rec->bad_full_backref = 1;
6186 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6187 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6188 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6189 rec->bad_full_backref = 1;
6193 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6194 rec->flag_block_full_backref = 1;
6198 rec->flag_block_full_backref = 0;
6200 owner = btrfs_header_owner(buf);
6203 ret = check_block(root, extent_cache, buf, flags);
6207 if (btrfs_is_leaf(buf)) {
6208 btree_space_waste += btrfs_leaf_free_space(root, buf);
6209 for (i = 0; i < nritems; i++) {
6210 struct btrfs_file_extent_item *fi;
6211 btrfs_item_key_to_cpu(buf, &key, i);
6212 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6213 process_extent_item(root, extent_cache, buf,
6217 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6218 process_extent_item(root, extent_cache, buf,
6222 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6224 btrfs_item_size_nr(buf, i);
6227 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6228 process_chunk_item(chunk_cache, &key, buf, i);
6231 if (key.type == BTRFS_DEV_ITEM_KEY) {
6232 process_device_item(dev_cache, &key, buf, i);
6235 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6236 process_block_group_item(block_group_cache,
6240 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6241 process_device_extent_item(dev_extent_cache,
6246 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6247 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6248 process_extent_ref_v0(extent_cache, buf, i);
6255 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6256 add_tree_backref(extent_cache, key.objectid, 0,
6260 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6261 add_tree_backref(extent_cache, key.objectid,
6265 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6266 struct btrfs_extent_data_ref *ref;
6267 ref = btrfs_item_ptr(buf, i,
6268 struct btrfs_extent_data_ref);
6269 add_data_backref(extent_cache,
6271 btrfs_extent_data_ref_root(buf, ref),
6272 btrfs_extent_data_ref_objectid(buf,
6274 btrfs_extent_data_ref_offset(buf, ref),
6275 btrfs_extent_data_ref_count(buf, ref),
6276 0, root->sectorsize);
6279 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6280 struct btrfs_shared_data_ref *ref;
6281 ref = btrfs_item_ptr(buf, i,
6282 struct btrfs_shared_data_ref);
6283 add_data_backref(extent_cache,
6284 key.objectid, key.offset, 0, 0, 0,
6285 btrfs_shared_data_ref_count(buf, ref),
6286 0, root->sectorsize);
6289 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6290 struct bad_item *bad;
6292 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6296 bad = malloc(sizeof(struct bad_item));
6299 INIT_LIST_HEAD(&bad->list);
6300 memcpy(&bad->key, &key,
6301 sizeof(struct btrfs_key));
6302 bad->root_id = owner;
6303 list_add_tail(&bad->list, &delete_items);
6306 if (key.type != BTRFS_EXTENT_DATA_KEY)
6308 fi = btrfs_item_ptr(buf, i,
6309 struct btrfs_file_extent_item);
6310 if (btrfs_file_extent_type(buf, fi) ==
6311 BTRFS_FILE_EXTENT_INLINE)
6313 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6316 data_bytes_allocated +=
6317 btrfs_file_extent_disk_num_bytes(buf, fi);
6318 if (data_bytes_allocated < root->sectorsize) {
6321 data_bytes_referenced +=
6322 btrfs_file_extent_num_bytes(buf, fi);
6323 add_data_backref(extent_cache,
6324 btrfs_file_extent_disk_bytenr(buf, fi),
6325 parent, owner, key.objectid, key.offset -
6326 btrfs_file_extent_offset(buf, fi), 1, 1,
6327 btrfs_file_extent_disk_num_bytes(buf, fi));
6331 struct btrfs_key first_key;
6333 first_key.objectid = 0;
6336 btrfs_item_key_to_cpu(buf, &first_key, 0);
6337 level = btrfs_header_level(buf);
6338 for (i = 0; i < nritems; i++) {
6339 struct extent_record tmpl;
6341 ptr = btrfs_node_blockptr(buf, i);
6342 size = root->nodesize;
6343 btrfs_node_key_to_cpu(buf, &key, i);
6345 if ((level == ri->drop_level)
6346 && is_dropped_key(&key, &ri->drop_key)) {
6351 memset(&tmpl, 0, sizeof(tmpl));
6352 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6353 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6358 tmpl.max_size = size;
6359 ret = add_extent_rec(extent_cache, &tmpl);
6362 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6365 add_pending(nodes, seen, ptr, size);
6367 add_pending(pending, seen, ptr, size);
6370 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6371 nritems) * sizeof(struct btrfs_key_ptr);
6373 total_btree_bytes += buf->len;
6374 if (fs_root_objectid(btrfs_header_owner(buf)))
6375 total_fs_tree_bytes += buf->len;
6376 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6377 total_extent_tree_bytes += buf->len;
6378 if (!found_old_backref &&
6379 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6380 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6381 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6382 found_old_backref = 1;
6384 free_extent_buffer(buf);
6388 static int add_root_to_pending(struct extent_buffer *buf,
6389 struct cache_tree *extent_cache,
6390 struct cache_tree *pending,
6391 struct cache_tree *seen,
6392 struct cache_tree *nodes,
6395 struct extent_record tmpl;
6397 if (btrfs_header_level(buf) > 0)
6398 add_pending(nodes, seen, buf->start, buf->len);
6400 add_pending(pending, seen, buf->start, buf->len);
6402 memset(&tmpl, 0, sizeof(tmpl));
6403 tmpl.start = buf->start;
6408 tmpl.max_size = buf->len;
6409 add_extent_rec(extent_cache, &tmpl);
6411 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6412 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6413 add_tree_backref(extent_cache, buf->start, buf->start,
6416 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6420 /* as we fix the tree, we might be deleting blocks that
6421 * we're tracking for repair. This hook makes sure we
6422 * remove any backrefs for blocks as we are fixing them.
6424 static int free_extent_hook(struct btrfs_trans_handle *trans,
6425 struct btrfs_root *root,
6426 u64 bytenr, u64 num_bytes, u64 parent,
6427 u64 root_objectid, u64 owner, u64 offset,
6430 struct extent_record *rec;
6431 struct cache_extent *cache;
6433 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6435 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6436 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6440 rec = container_of(cache, struct extent_record, cache);
6442 struct data_backref *back;
6443 back = find_data_backref(rec, parent, root_objectid, owner,
6444 offset, 1, bytenr, num_bytes);
6447 if (back->node.found_ref) {
6448 back->found_ref -= refs_to_drop;
6450 rec->refs -= refs_to_drop;
6452 if (back->node.found_extent_tree) {
6453 back->num_refs -= refs_to_drop;
6454 if (rec->extent_item_refs)
6455 rec->extent_item_refs -= refs_to_drop;
6457 if (back->found_ref == 0)
6458 back->node.found_ref = 0;
6459 if (back->num_refs == 0)
6460 back->node.found_extent_tree = 0;
6462 if (!back->node.found_extent_tree && back->node.found_ref) {
6463 rb_erase(&back->node.node, &rec->backref_tree);
6467 struct tree_backref *back;
6468 back = find_tree_backref(rec, parent, root_objectid);
6471 if (back->node.found_ref) {
6474 back->node.found_ref = 0;
6476 if (back->node.found_extent_tree) {
6477 if (rec->extent_item_refs)
6478 rec->extent_item_refs--;
6479 back->node.found_extent_tree = 0;
6481 if (!back->node.found_extent_tree && back->node.found_ref) {
6482 rb_erase(&back->node.node, &rec->backref_tree);
6486 maybe_free_extent_rec(extent_cache, rec);
6491 static int delete_extent_records(struct btrfs_trans_handle *trans,
6492 struct btrfs_root *root,
6493 struct btrfs_path *path,
6494 u64 bytenr, u64 new_len)
6496 struct btrfs_key key;
6497 struct btrfs_key found_key;
6498 struct extent_buffer *leaf;
6503 key.objectid = bytenr;
6505 key.offset = (u64)-1;
6508 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6515 if (path->slots[0] == 0)
6521 leaf = path->nodes[0];
6522 slot = path->slots[0];
6524 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6525 if (found_key.objectid != bytenr)
6528 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6529 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6530 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6531 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6532 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6533 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6534 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6535 btrfs_release_path(path);
6536 if (found_key.type == 0) {
6537 if (found_key.offset == 0)
6539 key.offset = found_key.offset - 1;
6540 key.type = found_key.type;
6542 key.type = found_key.type - 1;
6543 key.offset = (u64)-1;
6547 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6548 found_key.objectid, found_key.type, found_key.offset);
6550 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6553 btrfs_release_path(path);
6555 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6556 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6557 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6558 found_key.offset : root->nodesize;
6560 ret = btrfs_update_block_group(trans, root, bytenr,
6567 btrfs_release_path(path);
6572 * for a single backref, this will allocate a new extent
6573 * and add the backref to it.
6575 static int record_extent(struct btrfs_trans_handle *trans,
6576 struct btrfs_fs_info *info,
6577 struct btrfs_path *path,
6578 struct extent_record *rec,
6579 struct extent_backref *back,
6580 int allocated, u64 flags)
6583 struct btrfs_root *extent_root = info->extent_root;
6584 struct extent_buffer *leaf;
6585 struct btrfs_key ins_key;
6586 struct btrfs_extent_item *ei;
6587 struct tree_backref *tback;
6588 struct data_backref *dback;
6589 struct btrfs_tree_block_info *bi;
6592 rec->max_size = max_t(u64, rec->max_size,
6593 info->extent_root->nodesize);
6596 u32 item_size = sizeof(*ei);
6599 item_size += sizeof(*bi);
6601 ins_key.objectid = rec->start;
6602 ins_key.offset = rec->max_size;
6603 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6605 ret = btrfs_insert_empty_item(trans, extent_root, path,
6606 &ins_key, item_size);
6610 leaf = path->nodes[0];
6611 ei = btrfs_item_ptr(leaf, path->slots[0],
6612 struct btrfs_extent_item);
6614 btrfs_set_extent_refs(leaf, ei, 0);
6615 btrfs_set_extent_generation(leaf, ei, rec->generation);
6617 if (back->is_data) {
6618 btrfs_set_extent_flags(leaf, ei,
6619 BTRFS_EXTENT_FLAG_DATA);
6621 struct btrfs_disk_key copy_key;;
6623 tback = to_tree_backref(back);
6624 bi = (struct btrfs_tree_block_info *)(ei + 1);
6625 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6628 btrfs_set_disk_key_objectid(©_key,
6629 rec->info_objectid);
6630 btrfs_set_disk_key_type(©_key, 0);
6631 btrfs_set_disk_key_offset(©_key, 0);
6633 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6634 btrfs_set_tree_block_key(leaf, bi, ©_key);
6636 btrfs_set_extent_flags(leaf, ei,
6637 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6640 btrfs_mark_buffer_dirty(leaf);
6641 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6642 rec->max_size, 1, 0);
6645 btrfs_release_path(path);
6648 if (back->is_data) {
6652 dback = to_data_backref(back);
6653 if (back->full_backref)
6654 parent = dback->parent;
6658 for (i = 0; i < dback->found_ref; i++) {
6659 /* if parent != 0, we're doing a full backref
6660 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6661 * just makes the backref allocator create a data
6664 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6665 rec->start, rec->max_size,
6669 BTRFS_FIRST_FREE_OBJECTID :
6675 fprintf(stderr, "adding new data backref"
6676 " on %llu %s %llu owner %llu"
6677 " offset %llu found %d\n",
6678 (unsigned long long)rec->start,
6679 back->full_backref ?
6681 back->full_backref ?
6682 (unsigned long long)parent :
6683 (unsigned long long)dback->root,
6684 (unsigned long long)dback->owner,
6685 (unsigned long long)dback->offset,
6690 tback = to_tree_backref(back);
6691 if (back->full_backref)
6692 parent = tback->parent;
6696 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6697 rec->start, rec->max_size,
6698 parent, tback->root, 0, 0);
6699 fprintf(stderr, "adding new tree backref on "
6700 "start %llu len %llu parent %llu root %llu\n",
6701 rec->start, rec->max_size, parent, tback->root);
6704 btrfs_release_path(path);
6708 static struct extent_entry *find_entry(struct list_head *entries,
6709 u64 bytenr, u64 bytes)
6711 struct extent_entry *entry = NULL;
6713 list_for_each_entry(entry, entries, list) {
6714 if (entry->bytenr == bytenr && entry->bytes == bytes)
6721 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6723 struct extent_entry *entry, *best = NULL, *prev = NULL;
6725 list_for_each_entry(entry, entries, list) {
6732 * If there are as many broken entries as entries then we know
6733 * not to trust this particular entry.
6735 if (entry->broken == entry->count)
6739 * If our current entry == best then we can't be sure our best
6740 * is really the best, so we need to keep searching.
6742 if (best && best->count == entry->count) {
6748 /* Prev == entry, not good enough, have to keep searching */
6749 if (!prev->broken && prev->count == entry->count)
6753 best = (prev->count > entry->count) ? prev : entry;
6754 else if (best->count < entry->count)
6762 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6763 struct data_backref *dback, struct extent_entry *entry)
6765 struct btrfs_trans_handle *trans;
6766 struct btrfs_root *root;
6767 struct btrfs_file_extent_item *fi;
6768 struct extent_buffer *leaf;
6769 struct btrfs_key key;
6773 key.objectid = dback->root;
6774 key.type = BTRFS_ROOT_ITEM_KEY;
6775 key.offset = (u64)-1;
6776 root = btrfs_read_fs_root(info, &key);
6778 fprintf(stderr, "Couldn't find root for our ref\n");
6783 * The backref points to the original offset of the extent if it was
6784 * split, so we need to search down to the offset we have and then walk
6785 * forward until we find the backref we're looking for.
6787 key.objectid = dback->owner;
6788 key.type = BTRFS_EXTENT_DATA_KEY;
6789 key.offset = dback->offset;
6790 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6792 fprintf(stderr, "Error looking up ref %d\n", ret);
6797 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6798 ret = btrfs_next_leaf(root, path);
6800 fprintf(stderr, "Couldn't find our ref, next\n");
6804 leaf = path->nodes[0];
6805 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6806 if (key.objectid != dback->owner ||
6807 key.type != BTRFS_EXTENT_DATA_KEY) {
6808 fprintf(stderr, "Couldn't find our ref, search\n");
6811 fi = btrfs_item_ptr(leaf, path->slots[0],
6812 struct btrfs_file_extent_item);
6813 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6814 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6816 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6821 btrfs_release_path(path);
6823 trans = btrfs_start_transaction(root, 1);
6825 return PTR_ERR(trans);
6828 * Ok we have the key of the file extent we want to fix, now we can cow
6829 * down to the thing and fix it.
6831 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6833 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6834 key.objectid, key.type, key.offset, ret);
6838 fprintf(stderr, "Well that's odd, we just found this key "
6839 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6844 leaf = path->nodes[0];
6845 fi = btrfs_item_ptr(leaf, path->slots[0],
6846 struct btrfs_file_extent_item);
6848 if (btrfs_file_extent_compression(leaf, fi) &&
6849 dback->disk_bytenr != entry->bytenr) {
6850 fprintf(stderr, "Ref doesn't match the record start and is "
6851 "compressed, please take a btrfs-image of this file "
6852 "system and send it to a btrfs developer so they can "
6853 "complete this functionality for bytenr %Lu\n",
6854 dback->disk_bytenr);
6859 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6860 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6861 } else if (dback->disk_bytenr > entry->bytenr) {
6862 u64 off_diff, offset;
6864 off_diff = dback->disk_bytenr - entry->bytenr;
6865 offset = btrfs_file_extent_offset(leaf, fi);
6866 if (dback->disk_bytenr + offset +
6867 btrfs_file_extent_num_bytes(leaf, fi) >
6868 entry->bytenr + entry->bytes) {
6869 fprintf(stderr, "Ref is past the entry end, please "
6870 "take a btrfs-image of this file system and "
6871 "send it to a btrfs developer, ref %Lu\n",
6872 dback->disk_bytenr);
6877 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6878 btrfs_set_file_extent_offset(leaf, fi, offset);
6879 } else if (dback->disk_bytenr < entry->bytenr) {
6882 offset = btrfs_file_extent_offset(leaf, fi);
6883 if (dback->disk_bytenr + offset < entry->bytenr) {
6884 fprintf(stderr, "Ref is before the entry start, please"
6885 " take a btrfs-image of this file system and "
6886 "send it to a btrfs developer, ref %Lu\n",
6887 dback->disk_bytenr);
6892 offset += dback->disk_bytenr;
6893 offset -= entry->bytenr;
6894 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6895 btrfs_set_file_extent_offset(leaf, fi, offset);
6898 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6901 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6902 * only do this if we aren't using compression, otherwise it's a
6905 if (!btrfs_file_extent_compression(leaf, fi))
6906 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6908 printf("ram bytes may be wrong?\n");
6909 btrfs_mark_buffer_dirty(leaf);
6911 err = btrfs_commit_transaction(trans, root);
6912 btrfs_release_path(path);
6913 return ret ? ret : err;
6916 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6917 struct extent_record *rec)
6919 struct extent_backref *back, *tmp;
6920 struct data_backref *dback;
6921 struct extent_entry *entry, *best = NULL;
6924 int broken_entries = 0;
6929 * Metadata is easy and the backrefs should always agree on bytenr and
6930 * size, if not we've got bigger issues.
6935 rbtree_postorder_for_each_entry_safe(back, tmp,
6936 &rec->backref_tree, node) {
6937 if (back->full_backref || !back->is_data)
6940 dback = to_data_backref(back);
6943 * We only pay attention to backrefs that we found a real
6946 if (dback->found_ref == 0)
6950 * For now we only catch when the bytes don't match, not the
6951 * bytenr. We can easily do this at the same time, but I want
6952 * to have a fs image to test on before we just add repair
6953 * functionality willy-nilly so we know we won't screw up the
6957 entry = find_entry(&entries, dback->disk_bytenr,
6960 entry = malloc(sizeof(struct extent_entry));
6965 memset(entry, 0, sizeof(*entry));
6966 entry->bytenr = dback->disk_bytenr;
6967 entry->bytes = dback->bytes;
6968 list_add_tail(&entry->list, &entries);
6973 * If we only have on entry we may think the entries agree when
6974 * in reality they don't so we have to do some extra checking.
6976 if (dback->disk_bytenr != rec->start ||
6977 dback->bytes != rec->nr || back->broken)
6988 /* Yay all the backrefs agree, carry on good sir */
6989 if (nr_entries <= 1 && !mismatch)
6992 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6993 "%Lu\n", rec->start);
6996 * First we want to see if the backrefs can agree amongst themselves who
6997 * is right, so figure out which one of the entries has the highest
7000 best = find_most_right_entry(&entries);
7003 * Ok so we may have an even split between what the backrefs think, so
7004 * this is where we use the extent ref to see what it thinks.
7007 entry = find_entry(&entries, rec->start, rec->nr);
7008 if (!entry && (!broken_entries || !rec->found_rec)) {
7009 fprintf(stderr, "Backrefs don't agree with each other "
7010 "and extent record doesn't agree with anybody,"
7011 " so we can't fix bytenr %Lu bytes %Lu\n",
7012 rec->start, rec->nr);
7015 } else if (!entry) {
7017 * Ok our backrefs were broken, we'll assume this is the
7018 * correct value and add an entry for this range.
7020 entry = malloc(sizeof(struct extent_entry));
7025 memset(entry, 0, sizeof(*entry));
7026 entry->bytenr = rec->start;
7027 entry->bytes = rec->nr;
7028 list_add_tail(&entry->list, &entries);
7032 best = find_most_right_entry(&entries);
7034 fprintf(stderr, "Backrefs and extent record evenly "
7035 "split on who is right, this is going to "
7036 "require user input to fix bytenr %Lu bytes "
7037 "%Lu\n", rec->start, rec->nr);
7044 * I don't think this can happen currently as we'll abort() if we catch
7045 * this case higher up, but in case somebody removes that we still can't
7046 * deal with it properly here yet, so just bail out of that's the case.
7048 if (best->bytenr != rec->start) {
7049 fprintf(stderr, "Extent start and backref starts don't match, "
7050 "please use btrfs-image on this file system and send "
7051 "it to a btrfs developer so they can make fsck fix "
7052 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7053 rec->start, rec->nr);
7059 * Ok great we all agreed on an extent record, let's go find the real
7060 * references and fix up the ones that don't match.
7062 rbtree_postorder_for_each_entry_safe(back, tmp,
7063 &rec->backref_tree, node) {
7064 if (back->full_backref || !back->is_data)
7067 dback = to_data_backref(back);
7070 * Still ignoring backrefs that don't have a real ref attached
7073 if (dback->found_ref == 0)
7076 if (dback->bytes == best->bytes &&
7077 dback->disk_bytenr == best->bytenr)
7080 ret = repair_ref(info, path, dback, best);
7086 * Ok we messed with the actual refs, which means we need to drop our
7087 * entire cache and go back and rescan. I know this is a huge pain and
7088 * adds a lot of extra work, but it's the only way to be safe. Once all
7089 * the backrefs agree we may not need to do anything to the extent
7094 while (!list_empty(&entries)) {
7095 entry = list_entry(entries.next, struct extent_entry, list);
7096 list_del_init(&entry->list);
7102 static int process_duplicates(struct btrfs_root *root,
7103 struct cache_tree *extent_cache,
7104 struct extent_record *rec)
7106 struct extent_record *good, *tmp;
7107 struct cache_extent *cache;
7111 * If we found a extent record for this extent then return, or if we
7112 * have more than one duplicate we are likely going to need to delete
7115 if (rec->found_rec || rec->num_duplicates > 1)
7118 /* Shouldn't happen but just in case */
7119 BUG_ON(!rec->num_duplicates);
7122 * So this happens if we end up with a backref that doesn't match the
7123 * actual extent entry. So either the backref is bad or the extent
7124 * entry is bad. Either way we want to have the extent_record actually
7125 * reflect what we found in the extent_tree, so we need to take the
7126 * duplicate out and use that as the extent_record since the only way we
7127 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7129 remove_cache_extent(extent_cache, &rec->cache);
7131 good = to_extent_record(rec->dups.next);
7132 list_del_init(&good->list);
7133 INIT_LIST_HEAD(&good->backrefs);
7134 INIT_LIST_HEAD(&good->dups);
7135 good->cache.start = good->start;
7136 good->cache.size = good->nr;
7137 good->content_checked = 0;
7138 good->owner_ref_checked = 0;
7139 good->num_duplicates = 0;
7140 good->refs = rec->refs;
7141 list_splice_init(&rec->backrefs, &good->backrefs);
7143 cache = lookup_cache_extent(extent_cache, good->start,
7147 tmp = container_of(cache, struct extent_record, cache);
7150 * If we find another overlapping extent and it's found_rec is
7151 * set then it's a duplicate and we need to try and delete
7154 if (tmp->found_rec || tmp->num_duplicates > 0) {
7155 if (list_empty(&good->list))
7156 list_add_tail(&good->list,
7157 &duplicate_extents);
7158 good->num_duplicates += tmp->num_duplicates + 1;
7159 list_splice_init(&tmp->dups, &good->dups);
7160 list_del_init(&tmp->list);
7161 list_add_tail(&tmp->list, &good->dups);
7162 remove_cache_extent(extent_cache, &tmp->cache);
7167 * Ok we have another non extent item backed extent rec, so lets
7168 * just add it to this extent and carry on like we did above.
7170 good->refs += tmp->refs;
7171 list_splice_init(&tmp->backrefs, &good->backrefs);
7172 remove_cache_extent(extent_cache, &tmp->cache);
7175 ret = insert_cache_extent(extent_cache, &good->cache);
7178 return good->num_duplicates ? 0 : 1;
7181 static int delete_duplicate_records(struct btrfs_root *root,
7182 struct extent_record *rec)
7184 struct btrfs_trans_handle *trans;
7185 LIST_HEAD(delete_list);
7186 struct btrfs_path *path;
7187 struct extent_record *tmp, *good, *n;
7190 struct btrfs_key key;
7192 path = btrfs_alloc_path();
7199 /* Find the record that covers all of the duplicates. */
7200 list_for_each_entry(tmp, &rec->dups, list) {
7201 if (good->start < tmp->start)
7203 if (good->nr > tmp->nr)
7206 if (tmp->start + tmp->nr < good->start + good->nr) {
7207 fprintf(stderr, "Ok we have overlapping extents that "
7208 "aren't completely covered by each other, this "
7209 "is going to require more careful thought. "
7210 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7211 tmp->start, tmp->nr, good->start, good->nr);
7218 list_add_tail(&rec->list, &delete_list);
7220 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7223 list_move_tail(&tmp->list, &delete_list);
7226 root = root->fs_info->extent_root;
7227 trans = btrfs_start_transaction(root, 1);
7228 if (IS_ERR(trans)) {
7229 ret = PTR_ERR(trans);
7233 list_for_each_entry(tmp, &delete_list, list) {
7234 if (tmp->found_rec == 0)
7236 key.objectid = tmp->start;
7237 key.type = BTRFS_EXTENT_ITEM_KEY;
7238 key.offset = tmp->nr;
7240 /* Shouldn't happen but just in case */
7241 if (tmp->metadata) {
7242 fprintf(stderr, "Well this shouldn't happen, extent "
7243 "record overlaps but is metadata? "
7244 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7248 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7254 ret = btrfs_del_item(trans, root, path);
7257 btrfs_release_path(path);
7260 err = btrfs_commit_transaction(trans, root);
7264 while (!list_empty(&delete_list)) {
7265 tmp = to_extent_record(delete_list.next);
7266 list_del_init(&tmp->list);
7272 while (!list_empty(&rec->dups)) {
7273 tmp = to_extent_record(rec->dups.next);
7274 list_del_init(&tmp->list);
7278 btrfs_free_path(path);
7280 if (!ret && !nr_del)
7281 rec->num_duplicates = 0;
7283 return ret ? ret : nr_del;
7286 static int find_possible_backrefs(struct btrfs_fs_info *info,
7287 struct btrfs_path *path,
7288 struct cache_tree *extent_cache,
7289 struct extent_record *rec)
7291 struct btrfs_root *root;
7292 struct extent_backref *back, *tmp;
7293 struct data_backref *dback;
7294 struct cache_extent *cache;
7295 struct btrfs_file_extent_item *fi;
7296 struct btrfs_key key;
7300 rbtree_postorder_for_each_entry_safe(back, tmp,
7301 &rec->backref_tree, node) {
7302 /* Don't care about full backrefs (poor unloved backrefs) */
7303 if (back->full_backref || !back->is_data)
7306 dback = to_data_backref(back);
7308 /* We found this one, we don't need to do a lookup */
7309 if (dback->found_ref)
7312 key.objectid = dback->root;
7313 key.type = BTRFS_ROOT_ITEM_KEY;
7314 key.offset = (u64)-1;
7316 root = btrfs_read_fs_root(info, &key);
7318 /* No root, definitely a bad ref, skip */
7319 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7321 /* Other err, exit */
7323 return PTR_ERR(root);
7325 key.objectid = dback->owner;
7326 key.type = BTRFS_EXTENT_DATA_KEY;
7327 key.offset = dback->offset;
7328 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7330 btrfs_release_path(path);
7333 /* Didn't find it, we can carry on */
7338 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7339 struct btrfs_file_extent_item);
7340 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7341 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7342 btrfs_release_path(path);
7343 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7345 struct extent_record *tmp;
7346 tmp = container_of(cache, struct extent_record, cache);
7349 * If we found an extent record for the bytenr for this
7350 * particular backref then we can't add it to our
7351 * current extent record. We only want to add backrefs
7352 * that don't have a corresponding extent item in the
7353 * extent tree since they likely belong to this record
7354 * and we need to fix it if it doesn't match bytenrs.
7360 dback->found_ref += 1;
7361 dback->disk_bytenr = bytenr;
7362 dback->bytes = bytes;
7365 * Set this so the verify backref code knows not to trust the
7366 * values in this backref.
7375 * Record orphan data ref into corresponding root.
7377 * Return 0 if the extent item contains data ref and recorded.
7378 * Return 1 if the extent item contains no useful data ref
7379 * On that case, it may contains only shared_dataref or metadata backref
7380 * or the file extent exists(this should be handled by the extent bytenr
7382 * Return <0 if something goes wrong.
7384 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7385 struct extent_record *rec)
7387 struct btrfs_key key;
7388 struct btrfs_root *dest_root;
7389 struct extent_backref *back, *tmp;
7390 struct data_backref *dback;
7391 struct orphan_data_extent *orphan;
7392 struct btrfs_path *path;
7393 int recorded_data_ref = 0;
7398 path = btrfs_alloc_path();
7401 rbtree_postorder_for_each_entry_safe(back, tmp,
7402 &rec->backref_tree, node) {
7403 if (back->full_backref || !back->is_data ||
7404 !back->found_extent_tree)
7406 dback = to_data_backref(back);
7407 if (dback->found_ref)
7409 key.objectid = dback->root;
7410 key.type = BTRFS_ROOT_ITEM_KEY;
7411 key.offset = (u64)-1;
7413 dest_root = btrfs_read_fs_root(fs_info, &key);
7415 /* For non-exist root we just skip it */
7416 if (IS_ERR(dest_root) || !dest_root)
7419 key.objectid = dback->owner;
7420 key.type = BTRFS_EXTENT_DATA_KEY;
7421 key.offset = dback->offset;
7423 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7425 * For ret < 0, it's OK since the fs-tree may be corrupted,
7426 * we need to record it for inode/file extent rebuild.
7427 * For ret > 0, we record it only for file extent rebuild.
7428 * For ret == 0, the file extent exists but only bytenr
7429 * mismatch, let the original bytenr fix routine to handle,
7435 orphan = malloc(sizeof(*orphan));
7440 INIT_LIST_HEAD(&orphan->list);
7441 orphan->root = dback->root;
7442 orphan->objectid = dback->owner;
7443 orphan->offset = dback->offset;
7444 orphan->disk_bytenr = rec->cache.start;
7445 orphan->disk_len = rec->cache.size;
7446 list_add(&dest_root->orphan_data_extents, &orphan->list);
7447 recorded_data_ref = 1;
7450 btrfs_free_path(path);
7452 return !recorded_data_ref;
7458 * when an incorrect extent item is found, this will delete
7459 * all of the existing entries for it and recreate them
7460 * based on what the tree scan found.
7462 static int fixup_extent_refs(struct btrfs_fs_info *info,
7463 struct cache_tree *extent_cache,
7464 struct extent_record *rec)
7466 struct btrfs_trans_handle *trans = NULL;
7468 struct btrfs_path *path;
7469 struct cache_extent *cache;
7470 struct extent_backref *back, *tmp;
7474 if (rec->flag_block_full_backref)
7475 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7477 path = btrfs_alloc_path();
7481 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7483 * Sometimes the backrefs themselves are so broken they don't
7484 * get attached to any meaningful rec, so first go back and
7485 * check any of our backrefs that we couldn't find and throw
7486 * them into the list if we find the backref so that
7487 * verify_backrefs can figure out what to do.
7489 ret = find_possible_backrefs(info, path, extent_cache, rec);
7494 /* step one, make sure all of the backrefs agree */
7495 ret = verify_backrefs(info, path, rec);
7499 trans = btrfs_start_transaction(info->extent_root, 1);
7500 if (IS_ERR(trans)) {
7501 ret = PTR_ERR(trans);
7505 /* step two, delete all the existing records */
7506 ret = delete_extent_records(trans, info->extent_root, path,
7507 rec->start, rec->max_size);
7512 /* was this block corrupt? If so, don't add references to it */
7513 cache = lookup_cache_extent(info->corrupt_blocks,
7514 rec->start, rec->max_size);
7520 /* step three, recreate all the refs we did find */
7521 rbtree_postorder_for_each_entry_safe(back, tmp,
7522 &rec->backref_tree, node) {
7524 * if we didn't find any references, don't create a
7527 if (!back->found_ref)
7530 rec->bad_full_backref = 0;
7531 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7539 int err = btrfs_commit_transaction(trans, info->extent_root);
7544 btrfs_free_path(path);
7548 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7549 struct extent_record *rec)
7551 struct btrfs_trans_handle *trans;
7552 struct btrfs_root *root = fs_info->extent_root;
7553 struct btrfs_path *path;
7554 struct btrfs_extent_item *ei;
7555 struct btrfs_key key;
7559 key.objectid = rec->start;
7560 if (rec->metadata) {
7561 key.type = BTRFS_METADATA_ITEM_KEY;
7562 key.offset = rec->info_level;
7564 key.type = BTRFS_EXTENT_ITEM_KEY;
7565 key.offset = rec->max_size;
7568 path = btrfs_alloc_path();
7572 trans = btrfs_start_transaction(root, 0);
7573 if (IS_ERR(trans)) {
7574 btrfs_free_path(path);
7575 return PTR_ERR(trans);
7578 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7580 btrfs_free_path(path);
7581 btrfs_commit_transaction(trans, root);
7584 fprintf(stderr, "Didn't find extent for %llu\n",
7585 (unsigned long long)rec->start);
7586 btrfs_free_path(path);
7587 btrfs_commit_transaction(trans, root);
7591 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7592 struct btrfs_extent_item);
7593 flags = btrfs_extent_flags(path->nodes[0], ei);
7594 if (rec->flag_block_full_backref) {
7595 fprintf(stderr, "setting full backref on %llu\n",
7596 (unsigned long long)key.objectid);
7597 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7599 fprintf(stderr, "clearing full backref on %llu\n",
7600 (unsigned long long)key.objectid);
7601 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7603 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7604 btrfs_mark_buffer_dirty(path->nodes[0]);
7605 btrfs_free_path(path);
7606 return btrfs_commit_transaction(trans, root);
7609 /* right now we only prune from the extent allocation tree */
7610 static int prune_one_block(struct btrfs_trans_handle *trans,
7611 struct btrfs_fs_info *info,
7612 struct btrfs_corrupt_block *corrupt)
7615 struct btrfs_path path;
7616 struct extent_buffer *eb;
7620 int level = corrupt->level + 1;
7622 btrfs_init_path(&path);
7624 /* we want to stop at the parent to our busted block */
7625 path.lowest_level = level;
7627 ret = btrfs_search_slot(trans, info->extent_root,
7628 &corrupt->key, &path, -1, 1);
7633 eb = path.nodes[level];
7640 * hopefully the search gave us the block we want to prune,
7641 * lets try that first
7643 slot = path.slots[level];
7644 found = btrfs_node_blockptr(eb, slot);
7645 if (found == corrupt->cache.start)
7648 nritems = btrfs_header_nritems(eb);
7650 /* the search failed, lets scan this node and hope we find it */
7651 for (slot = 0; slot < nritems; slot++) {
7652 found = btrfs_node_blockptr(eb, slot);
7653 if (found == corrupt->cache.start)
7657 * we couldn't find the bad block. TODO, search all the nodes for pointers
7660 if (eb == info->extent_root->node) {
7665 btrfs_release_path(&path);
7670 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7671 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7674 btrfs_release_path(&path);
7678 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7680 struct btrfs_trans_handle *trans = NULL;
7681 struct cache_extent *cache;
7682 struct btrfs_corrupt_block *corrupt;
7685 cache = search_cache_extent(info->corrupt_blocks, 0);
7689 trans = btrfs_start_transaction(info->extent_root, 1);
7691 return PTR_ERR(trans);
7693 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7694 prune_one_block(trans, info, corrupt);
7695 remove_cache_extent(info->corrupt_blocks, cache);
7698 return btrfs_commit_transaction(trans, info->extent_root);
7702 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7704 struct btrfs_block_group_cache *cache;
7709 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7710 &start, &end, EXTENT_DIRTY);
7713 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7719 cache = btrfs_lookup_first_block_group(fs_info, start);
7724 start = cache->key.objectid + cache->key.offset;
7728 static int check_extent_refs(struct btrfs_root *root,
7729 struct cache_tree *extent_cache)
7731 struct extent_record *rec;
7732 struct cache_extent *cache;
7741 * if we're doing a repair, we have to make sure
7742 * we don't allocate from the problem extents.
7743 * In the worst case, this will be all the
7746 cache = search_cache_extent(extent_cache, 0);
7748 rec = container_of(cache, struct extent_record, cache);
7749 set_extent_dirty(root->fs_info->excluded_extents,
7751 rec->start + rec->max_size - 1,
7753 cache = next_cache_extent(cache);
7756 /* pin down all the corrupted blocks too */
7757 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7759 set_extent_dirty(root->fs_info->excluded_extents,
7761 cache->start + cache->size - 1,
7763 cache = next_cache_extent(cache);
7765 prune_corrupt_blocks(root->fs_info);
7766 reset_cached_block_groups(root->fs_info);
7769 reset_cached_block_groups(root->fs_info);
7772 * We need to delete any duplicate entries we find first otherwise we
7773 * could mess up the extent tree when we have backrefs that actually
7774 * belong to a different extent item and not the weird duplicate one.
7776 while (repair && !list_empty(&duplicate_extents)) {
7777 rec = to_extent_record(duplicate_extents.next);
7778 list_del_init(&rec->list);
7780 /* Sometimes we can find a backref before we find an actual
7781 * extent, so we need to process it a little bit to see if there
7782 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7783 * if this is a backref screwup. If we need to delete stuff
7784 * process_duplicates() will return 0, otherwise it will return
7787 if (process_duplicates(root, extent_cache, rec))
7789 ret = delete_duplicate_records(root, rec);
7793 * delete_duplicate_records will return the number of entries
7794 * deleted, so if it's greater than 0 then we know we actually
7795 * did something and we need to remove.
7809 cache = search_cache_extent(extent_cache, 0);
7812 rec = container_of(cache, struct extent_record, cache);
7813 if (rec->num_duplicates) {
7814 fprintf(stderr, "extent item %llu has multiple extent "
7815 "items\n", (unsigned long long)rec->start);
7820 if (rec->refs != rec->extent_item_refs) {
7821 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7822 (unsigned long long)rec->start,
7823 (unsigned long long)rec->nr);
7824 fprintf(stderr, "extent item %llu, found %llu\n",
7825 (unsigned long long)rec->extent_item_refs,
7826 (unsigned long long)rec->refs);
7827 ret = record_orphan_data_extents(root->fs_info, rec);
7834 * we can't use the extent to repair file
7835 * extent, let the fallback method handle it.
7837 if (!fixed && repair) {
7838 ret = fixup_extent_refs(
7849 if (all_backpointers_checked(rec, 1)) {
7850 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7851 (unsigned long long)rec->start,
7852 (unsigned long long)rec->nr);
7854 if (!fixed && !recorded && repair) {
7855 ret = fixup_extent_refs(root->fs_info,
7864 if (!rec->owner_ref_checked) {
7865 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7866 (unsigned long long)rec->start,
7867 (unsigned long long)rec->nr);
7868 if (!fixed && !recorded && repair) {
7869 ret = fixup_extent_refs(root->fs_info,
7878 if (rec->bad_full_backref) {
7879 fprintf(stderr, "bad full backref, on [%llu]\n",
7880 (unsigned long long)rec->start);
7882 ret = fixup_extent_flags(root->fs_info, rec);
7891 * Although it's not a extent ref's problem, we reuse this
7892 * routine for error reporting.
7893 * No repair function yet.
7895 if (rec->crossing_stripes) {
7897 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7898 rec->start, rec->start + rec->max_size);
7903 if (rec->wrong_chunk_type) {
7905 "bad extent [%llu, %llu), type mismatch with chunk\n",
7906 rec->start, rec->start + rec->max_size);
7911 remove_cache_extent(extent_cache, cache);
7912 free_all_extent_backrefs(rec);
7913 if (!init_extent_tree && repair && (!cur_err || fixed))
7914 clear_extent_dirty(root->fs_info->excluded_extents,
7916 rec->start + rec->max_size - 1,
7922 if (ret && ret != -EAGAIN) {
7923 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7926 struct btrfs_trans_handle *trans;
7928 root = root->fs_info->extent_root;
7929 trans = btrfs_start_transaction(root, 1);
7930 if (IS_ERR(trans)) {
7931 ret = PTR_ERR(trans);
7935 btrfs_fix_block_accounting(trans, root);
7936 ret = btrfs_commit_transaction(trans, root);
7941 fprintf(stderr, "repaired damaged extent references\n");
7947 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7951 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7952 stripe_size = length;
7953 stripe_size /= num_stripes;
7954 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7955 stripe_size = length * 2;
7956 stripe_size /= num_stripes;
7957 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7958 stripe_size = length;
7959 stripe_size /= (num_stripes - 1);
7960 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7961 stripe_size = length;
7962 stripe_size /= (num_stripes - 2);
7964 stripe_size = length;
7970 * Check the chunk with its block group/dev list ref:
7971 * Return 0 if all refs seems valid.
7972 * Return 1 if part of refs seems valid, need later check for rebuild ref
7973 * like missing block group and needs to search extent tree to rebuild them.
7974 * Return -1 if essential refs are missing and unable to rebuild.
7976 static int check_chunk_refs(struct chunk_record *chunk_rec,
7977 struct block_group_tree *block_group_cache,
7978 struct device_extent_tree *dev_extent_cache,
7981 struct cache_extent *block_group_item;
7982 struct block_group_record *block_group_rec;
7983 struct cache_extent *dev_extent_item;
7984 struct device_extent_record *dev_extent_rec;
7988 int metadump_v2 = 0;
7992 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7995 if (block_group_item) {
7996 block_group_rec = container_of(block_group_item,
7997 struct block_group_record,
7999 if (chunk_rec->length != block_group_rec->offset ||
8000 chunk_rec->offset != block_group_rec->objectid ||
8002 chunk_rec->type_flags != block_group_rec->flags)) {
8005 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8006 chunk_rec->objectid,
8011 chunk_rec->type_flags,
8012 block_group_rec->objectid,
8013 block_group_rec->type,
8014 block_group_rec->offset,
8015 block_group_rec->offset,
8016 block_group_rec->objectid,
8017 block_group_rec->flags);
8020 list_del_init(&block_group_rec->list);
8021 chunk_rec->bg_rec = block_group_rec;
8026 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8027 chunk_rec->objectid,
8032 chunk_rec->type_flags);
8039 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8040 chunk_rec->num_stripes);
8041 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8042 devid = chunk_rec->stripes[i].devid;
8043 offset = chunk_rec->stripes[i].offset;
8044 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8045 devid, offset, length);
8046 if (dev_extent_item) {
8047 dev_extent_rec = container_of(dev_extent_item,
8048 struct device_extent_record,
8050 if (dev_extent_rec->objectid != devid ||
8051 dev_extent_rec->offset != offset ||
8052 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8053 dev_extent_rec->length != length) {
8056 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8057 chunk_rec->objectid,
8060 chunk_rec->stripes[i].devid,
8061 chunk_rec->stripes[i].offset,
8062 dev_extent_rec->objectid,
8063 dev_extent_rec->offset,
8064 dev_extent_rec->length);
8067 list_move(&dev_extent_rec->chunk_list,
8068 &chunk_rec->dextents);
8073 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8074 chunk_rec->objectid,
8077 chunk_rec->stripes[i].devid,
8078 chunk_rec->stripes[i].offset);
8085 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8086 int check_chunks(struct cache_tree *chunk_cache,
8087 struct block_group_tree *block_group_cache,
8088 struct device_extent_tree *dev_extent_cache,
8089 struct list_head *good, struct list_head *bad,
8090 struct list_head *rebuild, int silent)
8092 struct cache_extent *chunk_item;
8093 struct chunk_record *chunk_rec;
8094 struct block_group_record *bg_rec;
8095 struct device_extent_record *dext_rec;
8099 chunk_item = first_cache_extent(chunk_cache);
8100 while (chunk_item) {
8101 chunk_rec = container_of(chunk_item, struct chunk_record,
8103 err = check_chunk_refs(chunk_rec, block_group_cache,
8104 dev_extent_cache, silent);
8107 if (err == 0 && good)
8108 list_add_tail(&chunk_rec->list, good);
8109 if (err > 0 && rebuild)
8110 list_add_tail(&chunk_rec->list, rebuild);
8112 list_add_tail(&chunk_rec->list, bad);
8113 chunk_item = next_cache_extent(chunk_item);
8116 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8119 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8127 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8131 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8142 static int check_device_used(struct device_record *dev_rec,
8143 struct device_extent_tree *dext_cache)
8145 struct cache_extent *cache;
8146 struct device_extent_record *dev_extent_rec;
8149 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8151 dev_extent_rec = container_of(cache,
8152 struct device_extent_record,
8154 if (dev_extent_rec->objectid != dev_rec->devid)
8157 list_del_init(&dev_extent_rec->device_list);
8158 total_byte += dev_extent_rec->length;
8159 cache = next_cache_extent(cache);
8162 if (total_byte != dev_rec->byte_used) {
8164 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8165 total_byte, dev_rec->byte_used, dev_rec->objectid,
8166 dev_rec->type, dev_rec->offset);
8173 /* check btrfs_dev_item -> btrfs_dev_extent */
8174 static int check_devices(struct rb_root *dev_cache,
8175 struct device_extent_tree *dev_extent_cache)
8177 struct rb_node *dev_node;
8178 struct device_record *dev_rec;
8179 struct device_extent_record *dext_rec;
8183 dev_node = rb_first(dev_cache);
8185 dev_rec = container_of(dev_node, struct device_record, node);
8186 err = check_device_used(dev_rec, dev_extent_cache);
8190 dev_node = rb_next(dev_node);
8192 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8195 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8196 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8203 static int add_root_item_to_list(struct list_head *head,
8204 u64 objectid, u64 bytenr, u64 last_snapshot,
8205 u8 level, u8 drop_level,
8206 int level_size, struct btrfs_key *drop_key)
8209 struct root_item_record *ri_rec;
8210 ri_rec = malloc(sizeof(*ri_rec));
8213 ri_rec->bytenr = bytenr;
8214 ri_rec->objectid = objectid;
8215 ri_rec->level = level;
8216 ri_rec->level_size = level_size;
8217 ri_rec->drop_level = drop_level;
8218 ri_rec->last_snapshot = last_snapshot;
8220 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8221 list_add_tail(&ri_rec->list, head);
8226 static void free_root_item_list(struct list_head *list)
8228 struct root_item_record *ri_rec;
8230 while (!list_empty(list)) {
8231 ri_rec = list_first_entry(list, struct root_item_record,
8233 list_del_init(&ri_rec->list);
8238 static int deal_root_from_list(struct list_head *list,
8239 struct btrfs_root *root,
8240 struct block_info *bits,
8242 struct cache_tree *pending,
8243 struct cache_tree *seen,
8244 struct cache_tree *reada,
8245 struct cache_tree *nodes,
8246 struct cache_tree *extent_cache,
8247 struct cache_tree *chunk_cache,
8248 struct rb_root *dev_cache,
8249 struct block_group_tree *block_group_cache,
8250 struct device_extent_tree *dev_extent_cache)
8255 while (!list_empty(list)) {
8256 struct root_item_record *rec;
8257 struct extent_buffer *buf;
8258 rec = list_entry(list->next,
8259 struct root_item_record, list);
8261 buf = read_tree_block(root->fs_info->tree_root,
8262 rec->bytenr, rec->level_size, 0);
8263 if (!extent_buffer_uptodate(buf)) {
8264 free_extent_buffer(buf);
8268 add_root_to_pending(buf, extent_cache, pending,
8269 seen, nodes, rec->objectid);
8271 * To rebuild extent tree, we need deal with snapshot
8272 * one by one, otherwise we deal with node firstly which
8273 * can maximize readahead.
8276 ret = run_next_block(root, bits, bits_nr, &last,
8277 pending, seen, reada, nodes,
8278 extent_cache, chunk_cache,
8279 dev_cache, block_group_cache,
8280 dev_extent_cache, rec);
8284 free_extent_buffer(buf);
8285 list_del(&rec->list);
8291 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8292 reada, nodes, extent_cache, chunk_cache,
8293 dev_cache, block_group_cache,
8294 dev_extent_cache, NULL);
8304 static int check_chunks_and_extents(struct btrfs_root *root)
8306 struct rb_root dev_cache;
8307 struct cache_tree chunk_cache;
8308 struct block_group_tree block_group_cache;
8309 struct device_extent_tree dev_extent_cache;
8310 struct cache_tree extent_cache;
8311 struct cache_tree seen;
8312 struct cache_tree pending;
8313 struct cache_tree reada;
8314 struct cache_tree nodes;
8315 struct extent_io_tree excluded_extents;
8316 struct cache_tree corrupt_blocks;
8317 struct btrfs_path path;
8318 struct btrfs_key key;
8319 struct btrfs_key found_key;
8321 struct block_info *bits;
8323 struct extent_buffer *leaf;
8325 struct btrfs_root_item ri;
8326 struct list_head dropping_trees;
8327 struct list_head normal_trees;
8328 struct btrfs_root *root1;
8333 dev_cache = RB_ROOT;
8334 cache_tree_init(&chunk_cache);
8335 block_group_tree_init(&block_group_cache);
8336 device_extent_tree_init(&dev_extent_cache);
8338 cache_tree_init(&extent_cache);
8339 cache_tree_init(&seen);
8340 cache_tree_init(&pending);
8341 cache_tree_init(&nodes);
8342 cache_tree_init(&reada);
8343 cache_tree_init(&corrupt_blocks);
8344 extent_io_tree_init(&excluded_extents);
8345 INIT_LIST_HEAD(&dropping_trees);
8346 INIT_LIST_HEAD(&normal_trees);
8349 root->fs_info->excluded_extents = &excluded_extents;
8350 root->fs_info->fsck_extent_cache = &extent_cache;
8351 root->fs_info->free_extent_hook = free_extent_hook;
8352 root->fs_info->corrupt_blocks = &corrupt_blocks;
8356 bits = malloc(bits_nr * sizeof(struct block_info));
8362 if (ctx.progress_enabled) {
8363 ctx.tp = TASK_EXTENTS;
8364 task_start(ctx.info);
8368 root1 = root->fs_info->tree_root;
8369 level = btrfs_header_level(root1->node);
8370 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8371 root1->node->start, 0, level, 0,
8372 root1->nodesize, NULL);
8375 root1 = root->fs_info->chunk_root;
8376 level = btrfs_header_level(root1->node);
8377 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8378 root1->node->start, 0, level, 0,
8379 root1->nodesize, NULL);
8382 btrfs_init_path(&path);
8385 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8386 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8391 leaf = path.nodes[0];
8392 slot = path.slots[0];
8393 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8394 ret = btrfs_next_leaf(root, &path);
8397 leaf = path.nodes[0];
8398 slot = path.slots[0];
8400 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8401 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8402 unsigned long offset;
8405 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8406 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8407 last_snapshot = btrfs_root_last_snapshot(&ri);
8408 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8409 level = btrfs_root_level(&ri);
8410 level_size = root->nodesize;
8411 ret = add_root_item_to_list(&normal_trees,
8413 btrfs_root_bytenr(&ri),
8414 last_snapshot, level,
8415 0, level_size, NULL);
8419 level = btrfs_root_level(&ri);
8420 level_size = root->nodesize;
8421 objectid = found_key.objectid;
8422 btrfs_disk_key_to_cpu(&found_key,
8424 ret = add_root_item_to_list(&dropping_trees,
8426 btrfs_root_bytenr(&ri),
8427 last_snapshot, level,
8429 level_size, &found_key);
8436 btrfs_release_path(&path);
8439 * check_block can return -EAGAIN if it fixes something, please keep
8440 * this in mind when dealing with return values from these functions, if
8441 * we get -EAGAIN we want to fall through and restart the loop.
8443 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8444 &seen, &reada, &nodes, &extent_cache,
8445 &chunk_cache, &dev_cache, &block_group_cache,
8452 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8453 &pending, &seen, &reada, &nodes,
8454 &extent_cache, &chunk_cache, &dev_cache,
8455 &block_group_cache, &dev_extent_cache);
8462 ret = check_chunks(&chunk_cache, &block_group_cache,
8463 &dev_extent_cache, NULL, NULL, NULL, 0);
8470 ret = check_extent_refs(root, &extent_cache);
8477 ret = check_devices(&dev_cache, &dev_extent_cache);
8482 task_stop(ctx.info);
8484 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8485 extent_io_tree_cleanup(&excluded_extents);
8486 root->fs_info->fsck_extent_cache = NULL;
8487 root->fs_info->free_extent_hook = NULL;
8488 root->fs_info->corrupt_blocks = NULL;
8489 root->fs_info->excluded_extents = NULL;
8492 free_chunk_cache_tree(&chunk_cache);
8493 free_device_cache_tree(&dev_cache);
8494 free_block_group_tree(&block_group_cache);
8495 free_device_extent_tree(&dev_extent_cache);
8496 free_extent_cache_tree(&seen);
8497 free_extent_cache_tree(&pending);
8498 free_extent_cache_tree(&reada);
8499 free_extent_cache_tree(&nodes);
8502 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8503 free_extent_cache_tree(&seen);
8504 free_extent_cache_tree(&pending);
8505 free_extent_cache_tree(&reada);
8506 free_extent_cache_tree(&nodes);
8507 free_chunk_cache_tree(&chunk_cache);
8508 free_block_group_tree(&block_group_cache);
8509 free_device_cache_tree(&dev_cache);
8510 free_device_extent_tree(&dev_extent_cache);
8511 free_extent_record_cache(root->fs_info, &extent_cache);
8512 free_root_item_list(&normal_trees);
8513 free_root_item_list(&dropping_trees);
8514 extent_io_tree_cleanup(&excluded_extents);
8518 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8519 struct btrfs_root *root, int overwrite)
8521 struct extent_buffer *c;
8522 struct extent_buffer *old = root->node;
8525 struct btrfs_disk_key disk_key = {0,0,0};
8531 extent_buffer_get(c);
8534 c = btrfs_alloc_free_block(trans, root,
8536 root->root_key.objectid,
8537 &disk_key, level, 0, 0);
8540 extent_buffer_get(c);
8544 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8545 btrfs_set_header_level(c, level);
8546 btrfs_set_header_bytenr(c, c->start);
8547 btrfs_set_header_generation(c, trans->transid);
8548 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8549 btrfs_set_header_owner(c, root->root_key.objectid);
8551 write_extent_buffer(c, root->fs_info->fsid,
8552 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8554 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8555 btrfs_header_chunk_tree_uuid(c),
8558 btrfs_mark_buffer_dirty(c);
8560 * this case can happen in the following case:
8562 * 1.overwrite previous root.
8564 * 2.reinit reloc data root, this is because we skip pin
8565 * down reloc data tree before which means we can allocate
8566 * same block bytenr here.
8568 if (old->start == c->start) {
8569 btrfs_set_root_generation(&root->root_item,
8571 root->root_item.level = btrfs_header_level(root->node);
8572 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8573 &root->root_key, &root->root_item);
8575 free_extent_buffer(c);
8579 free_extent_buffer(old);
8581 add_root_to_dirty_list(root);
8585 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8586 struct extent_buffer *eb, int tree_root)
8588 struct extent_buffer *tmp;
8589 struct btrfs_root_item *ri;
8590 struct btrfs_key key;
8593 int level = btrfs_header_level(eb);
8599 * If we have pinned this block before, don't pin it again.
8600 * This can not only avoid forever loop with broken filesystem
8601 * but also give us some speedups.
8603 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8604 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8607 btrfs_pin_extent(fs_info, eb->start, eb->len);
8609 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8610 nritems = btrfs_header_nritems(eb);
8611 for (i = 0; i < nritems; i++) {
8613 btrfs_item_key_to_cpu(eb, &key, i);
8614 if (key.type != BTRFS_ROOT_ITEM_KEY)
8616 /* Skip the extent root and reloc roots */
8617 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8618 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8619 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8621 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8622 bytenr = btrfs_disk_root_bytenr(eb, ri);
8625 * If at any point we start needing the real root we
8626 * will have to build a stump root for the root we are
8627 * in, but for now this doesn't actually use the root so
8628 * just pass in extent_root.
8630 tmp = read_tree_block(fs_info->extent_root, bytenr,
8632 if (!extent_buffer_uptodate(tmp)) {
8633 fprintf(stderr, "Error reading root block\n");
8636 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8637 free_extent_buffer(tmp);
8641 bytenr = btrfs_node_blockptr(eb, i);
8643 /* If we aren't the tree root don't read the block */
8644 if (level == 1 && !tree_root) {
8645 btrfs_pin_extent(fs_info, bytenr, nodesize);
8649 tmp = read_tree_block(fs_info->extent_root, bytenr,
8651 if (!extent_buffer_uptodate(tmp)) {
8652 fprintf(stderr, "Error reading tree block\n");
8655 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8656 free_extent_buffer(tmp);
8665 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8669 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8673 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8676 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8678 struct btrfs_block_group_cache *cache;
8679 struct btrfs_path *path;
8680 struct extent_buffer *leaf;
8681 struct btrfs_chunk *chunk;
8682 struct btrfs_key key;
8686 path = btrfs_alloc_path();
8691 key.type = BTRFS_CHUNK_ITEM_KEY;
8694 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8696 btrfs_free_path(path);
8701 * We do this in case the block groups were screwed up and had alloc
8702 * bits that aren't actually set on the chunks. This happens with
8703 * restored images every time and could happen in real life I guess.
8705 fs_info->avail_data_alloc_bits = 0;
8706 fs_info->avail_metadata_alloc_bits = 0;
8707 fs_info->avail_system_alloc_bits = 0;
8709 /* First we need to create the in-memory block groups */
8711 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8712 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8714 btrfs_free_path(path);
8722 leaf = path->nodes[0];
8723 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8724 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8729 chunk = btrfs_item_ptr(leaf, path->slots[0],
8730 struct btrfs_chunk);
8731 btrfs_add_block_group(fs_info, 0,
8732 btrfs_chunk_type(leaf, chunk),
8733 key.objectid, key.offset,
8734 btrfs_chunk_length(leaf, chunk));
8735 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8736 key.offset + btrfs_chunk_length(leaf, chunk),
8742 cache = btrfs_lookup_first_block_group(fs_info, start);
8746 start = cache->key.objectid + cache->key.offset;
8749 btrfs_free_path(path);
8753 static int reset_balance(struct btrfs_trans_handle *trans,
8754 struct btrfs_fs_info *fs_info)
8756 struct btrfs_root *root = fs_info->tree_root;
8757 struct btrfs_path *path;
8758 struct extent_buffer *leaf;
8759 struct btrfs_key key;
8760 int del_slot, del_nr = 0;
8764 path = btrfs_alloc_path();
8768 key.objectid = BTRFS_BALANCE_OBJECTID;
8769 key.type = BTRFS_BALANCE_ITEM_KEY;
8772 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8777 goto reinit_data_reloc;
8782 ret = btrfs_del_item(trans, root, path);
8785 btrfs_release_path(path);
8787 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8788 key.type = BTRFS_ROOT_ITEM_KEY;
8791 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8795 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8800 ret = btrfs_del_items(trans, root, path,
8807 btrfs_release_path(path);
8810 ret = btrfs_search_slot(trans, root, &key, path,
8817 leaf = path->nodes[0];
8818 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8819 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8821 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8826 del_slot = path->slots[0];
8835 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8839 btrfs_release_path(path);
8842 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8843 key.type = BTRFS_ROOT_ITEM_KEY;
8844 key.offset = (u64)-1;
8845 root = btrfs_read_fs_root(fs_info, &key);
8847 fprintf(stderr, "Error reading data reloc tree\n");
8848 ret = PTR_ERR(root);
8851 record_root_in_trans(trans, root);
8852 ret = btrfs_fsck_reinit_root(trans, root, 0);
8855 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8857 btrfs_free_path(path);
8861 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8862 struct btrfs_fs_info *fs_info)
8868 * The only reason we don't do this is because right now we're just
8869 * walking the trees we find and pinning down their bytes, we don't look
8870 * at any of the leaves. In order to do mixed groups we'd have to check
8871 * the leaves of any fs roots and pin down the bytes for any file
8872 * extents we find. Not hard but why do it if we don't have to?
8874 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8875 fprintf(stderr, "We don't support re-initing the extent tree "
8876 "for mixed block groups yet, please notify a btrfs "
8877 "developer you want to do this so they can add this "
8878 "functionality.\n");
8883 * first we need to walk all of the trees except the extent tree and pin
8884 * down the bytes that are in use so we don't overwrite any existing
8887 ret = pin_metadata_blocks(fs_info);
8889 fprintf(stderr, "error pinning down used bytes\n");
8894 * Need to drop all the block groups since we're going to recreate all
8897 btrfs_free_block_groups(fs_info);
8898 ret = reset_block_groups(fs_info);
8900 fprintf(stderr, "error resetting the block groups\n");
8904 /* Ok we can allocate now, reinit the extent root */
8905 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8907 fprintf(stderr, "extent root initialization failed\n");
8909 * When the transaction code is updated we should end the
8910 * transaction, but for now progs only knows about commit so
8911 * just return an error.
8917 * Now we have all the in-memory block groups setup so we can make
8918 * allocations properly, and the metadata we care about is safe since we
8919 * pinned all of it above.
8922 struct btrfs_block_group_cache *cache;
8924 cache = btrfs_lookup_first_block_group(fs_info, start);
8927 start = cache->key.objectid + cache->key.offset;
8928 ret = btrfs_insert_item(trans, fs_info->extent_root,
8929 &cache->key, &cache->item,
8930 sizeof(cache->item));
8932 fprintf(stderr, "Error adding block group\n");
8935 btrfs_extent_post_op(trans, fs_info->extent_root);
8938 ret = reset_balance(trans, fs_info);
8940 fprintf(stderr, "error resetting the pending balance\n");
8945 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8947 struct btrfs_path *path;
8948 struct btrfs_trans_handle *trans;
8949 struct btrfs_key key;
8952 printf("Recowing metadata block %llu\n", eb->start);
8953 key.objectid = btrfs_header_owner(eb);
8954 key.type = BTRFS_ROOT_ITEM_KEY;
8955 key.offset = (u64)-1;
8957 root = btrfs_read_fs_root(root->fs_info, &key);
8959 fprintf(stderr, "Couldn't find owner root %llu\n",
8961 return PTR_ERR(root);
8964 path = btrfs_alloc_path();
8968 trans = btrfs_start_transaction(root, 1);
8969 if (IS_ERR(trans)) {
8970 btrfs_free_path(path);
8971 return PTR_ERR(trans);
8974 path->lowest_level = btrfs_header_level(eb);
8975 if (path->lowest_level)
8976 btrfs_node_key_to_cpu(eb, &key, 0);
8978 btrfs_item_key_to_cpu(eb, &key, 0);
8980 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8981 btrfs_commit_transaction(trans, root);
8982 btrfs_free_path(path);
8986 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8988 struct btrfs_path *path;
8989 struct btrfs_trans_handle *trans;
8990 struct btrfs_key key;
8993 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8994 bad->key.type, bad->key.offset);
8995 key.objectid = bad->root_id;
8996 key.type = BTRFS_ROOT_ITEM_KEY;
8997 key.offset = (u64)-1;
8999 root = btrfs_read_fs_root(root->fs_info, &key);
9001 fprintf(stderr, "Couldn't find owner root %llu\n",
9003 return PTR_ERR(root);
9006 path = btrfs_alloc_path();
9010 trans = btrfs_start_transaction(root, 1);
9011 if (IS_ERR(trans)) {
9012 btrfs_free_path(path);
9013 return PTR_ERR(trans);
9016 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9022 ret = btrfs_del_item(trans, root, path);
9024 btrfs_commit_transaction(trans, root);
9025 btrfs_free_path(path);
9029 static int zero_log_tree(struct btrfs_root *root)
9031 struct btrfs_trans_handle *trans;
9034 trans = btrfs_start_transaction(root, 1);
9035 if (IS_ERR(trans)) {
9036 ret = PTR_ERR(trans);
9039 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9040 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9041 ret = btrfs_commit_transaction(trans, root);
9045 static int populate_csum(struct btrfs_trans_handle *trans,
9046 struct btrfs_root *csum_root, char *buf, u64 start,
9053 while (offset < len) {
9054 sectorsize = csum_root->sectorsize;
9055 ret = read_extent_data(csum_root, buf, start + offset,
9059 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9060 start + offset, buf, sectorsize);
9063 offset += sectorsize;
9068 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9069 struct btrfs_root *csum_root,
9070 struct btrfs_root *cur_root)
9072 struct btrfs_path *path;
9073 struct btrfs_key key;
9074 struct extent_buffer *node;
9075 struct btrfs_file_extent_item *fi;
9082 path = btrfs_alloc_path();
9085 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9095 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9098 /* Iterate all regular file extents and fill its csum */
9100 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9102 if (key.type != BTRFS_EXTENT_DATA_KEY)
9104 node = path->nodes[0];
9105 slot = path->slots[0];
9106 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9107 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9109 start = btrfs_file_extent_disk_bytenr(node, fi);
9110 len = btrfs_file_extent_disk_num_bytes(node, fi);
9112 ret = populate_csum(trans, csum_root, buf, start, len);
9119 * TODO: if next leaf is corrupted, jump to nearest next valid
9122 ret = btrfs_next_item(cur_root, path);
9132 btrfs_free_path(path);
9137 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9138 struct btrfs_root *csum_root)
9140 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9141 struct btrfs_path *path;
9142 struct btrfs_root *tree_root = fs_info->tree_root;
9143 struct btrfs_root *cur_root;
9144 struct extent_buffer *node;
9145 struct btrfs_key key;
9149 path = btrfs_alloc_path();
9153 key.objectid = BTRFS_FS_TREE_OBJECTID;
9155 key.type = BTRFS_ROOT_ITEM_KEY;
9157 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9166 node = path->nodes[0];
9167 slot = path->slots[0];
9168 btrfs_item_key_to_cpu(node, &key, slot);
9169 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9171 if (key.type != BTRFS_ROOT_ITEM_KEY)
9173 if (!is_fstree(key.objectid))
9175 key.offset = (u64)-1;
9177 cur_root = btrfs_read_fs_root(fs_info, &key);
9178 if (IS_ERR(cur_root) || !cur_root) {
9179 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9183 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9188 ret = btrfs_next_item(tree_root, path);
9198 btrfs_free_path(path);
9202 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9203 struct btrfs_root *csum_root)
9205 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9206 struct btrfs_path *path;
9207 struct btrfs_extent_item *ei;
9208 struct extent_buffer *leaf;
9210 struct btrfs_key key;
9213 path = btrfs_alloc_path();
9218 key.type = BTRFS_EXTENT_ITEM_KEY;
9221 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9223 btrfs_free_path(path);
9227 buf = malloc(csum_root->sectorsize);
9229 btrfs_free_path(path);
9234 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9235 ret = btrfs_next_leaf(extent_root, path);
9243 leaf = path->nodes[0];
9245 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9246 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9251 ei = btrfs_item_ptr(leaf, path->slots[0],
9252 struct btrfs_extent_item);
9253 if (!(btrfs_extent_flags(leaf, ei) &
9254 BTRFS_EXTENT_FLAG_DATA)) {
9259 ret = populate_csum(trans, csum_root, buf, key.objectid,
9266 btrfs_free_path(path);
9272 * Recalculate the csum and put it into the csum tree.
9274 * Extent tree init will wipe out all the extent info, so in that case, we
9275 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9276 * will use fs/subvol trees to init the csum tree.
9278 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9279 struct btrfs_root *csum_root,
9283 return fill_csum_tree_from_fs(trans, csum_root);
9285 return fill_csum_tree_from_extent(trans, csum_root);
9288 static void free_roots_info_cache(void)
9290 if (!roots_info_cache)
9293 while (!cache_tree_empty(roots_info_cache)) {
9294 struct cache_extent *entry;
9295 struct root_item_info *rii;
9297 entry = first_cache_extent(roots_info_cache);
9300 remove_cache_extent(roots_info_cache, entry);
9301 rii = container_of(entry, struct root_item_info, cache_extent);
9305 free(roots_info_cache);
9306 roots_info_cache = NULL;
9309 static int build_roots_info_cache(struct btrfs_fs_info *info)
9312 struct btrfs_key key;
9313 struct extent_buffer *leaf;
9314 struct btrfs_path *path;
9316 if (!roots_info_cache) {
9317 roots_info_cache = malloc(sizeof(*roots_info_cache));
9318 if (!roots_info_cache)
9320 cache_tree_init(roots_info_cache);
9323 path = btrfs_alloc_path();
9328 key.type = BTRFS_EXTENT_ITEM_KEY;
9331 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9334 leaf = path->nodes[0];
9337 struct btrfs_key found_key;
9338 struct btrfs_extent_item *ei;
9339 struct btrfs_extent_inline_ref *iref;
9340 int slot = path->slots[0];
9345 struct cache_extent *entry;
9346 struct root_item_info *rii;
9348 if (slot >= btrfs_header_nritems(leaf)) {
9349 ret = btrfs_next_leaf(info->extent_root, path);
9356 leaf = path->nodes[0];
9357 slot = path->slots[0];
9360 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9362 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9363 found_key.type != BTRFS_METADATA_ITEM_KEY)
9366 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9367 flags = btrfs_extent_flags(leaf, ei);
9369 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9370 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9373 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9374 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9375 level = found_key.offset;
9377 struct btrfs_tree_block_info *binfo;
9379 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9380 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9381 level = btrfs_tree_block_level(leaf, binfo);
9385 * For a root extent, it must be of the following type and the
9386 * first (and only one) iref in the item.
9388 type = btrfs_extent_inline_ref_type(leaf, iref);
9389 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9392 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9393 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9395 rii = malloc(sizeof(struct root_item_info));
9400 rii->cache_extent.start = root_id;
9401 rii->cache_extent.size = 1;
9402 rii->level = (u8)-1;
9403 entry = &rii->cache_extent;
9404 ret = insert_cache_extent(roots_info_cache, entry);
9407 rii = container_of(entry, struct root_item_info,
9411 ASSERT(rii->cache_extent.start == root_id);
9412 ASSERT(rii->cache_extent.size == 1);
9414 if (level > rii->level || rii->level == (u8)-1) {
9416 rii->bytenr = found_key.objectid;
9417 rii->gen = btrfs_extent_generation(leaf, ei);
9418 rii->node_count = 1;
9419 } else if (level == rii->level) {
9427 btrfs_free_path(path);
9432 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9433 struct btrfs_path *path,
9434 const struct btrfs_key *root_key,
9435 const int read_only_mode)
9437 const u64 root_id = root_key->objectid;
9438 struct cache_extent *entry;
9439 struct root_item_info *rii;
9440 struct btrfs_root_item ri;
9441 unsigned long offset;
9443 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9446 "Error: could not find extent items for root %llu\n",
9447 root_key->objectid);
9451 rii = container_of(entry, struct root_item_info, cache_extent);
9452 ASSERT(rii->cache_extent.start == root_id);
9453 ASSERT(rii->cache_extent.size == 1);
9455 if (rii->node_count != 1) {
9457 "Error: could not find btree root extent for root %llu\n",
9462 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9463 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9465 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9466 btrfs_root_level(&ri) != rii->level ||
9467 btrfs_root_generation(&ri) != rii->gen) {
9470 * If we're in repair mode but our caller told us to not update
9471 * the root item, i.e. just check if it needs to be updated, don't
9472 * print this message, since the caller will call us again shortly
9473 * for the same root item without read only mode (the caller will
9474 * open a transaction first).
9476 if (!(read_only_mode && repair))
9478 "%sroot item for root %llu,"
9479 " current bytenr %llu, current gen %llu, current level %u,"
9480 " new bytenr %llu, new gen %llu, new level %u\n",
9481 (read_only_mode ? "" : "fixing "),
9483 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9484 btrfs_root_level(&ri),
9485 rii->bytenr, rii->gen, rii->level);
9487 if (btrfs_root_generation(&ri) > rii->gen) {
9489 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9490 root_id, btrfs_root_generation(&ri), rii->gen);
9494 if (!read_only_mode) {
9495 btrfs_set_root_bytenr(&ri, rii->bytenr);
9496 btrfs_set_root_level(&ri, rii->level);
9497 btrfs_set_root_generation(&ri, rii->gen);
9498 write_extent_buffer(path->nodes[0], &ri,
9499 offset, sizeof(ri));
9509 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9510 * caused read-only snapshots to be corrupted if they were created at a moment
9511 * when the source subvolume/snapshot had orphan items. The issue was that the
9512 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9513 * node instead of the post orphan cleanup root node.
9514 * So this function, and its callees, just detects and fixes those cases. Even
9515 * though the regression was for read-only snapshots, this function applies to
9516 * any snapshot/subvolume root.
9517 * This must be run before any other repair code - not doing it so, makes other
9518 * repair code delete or modify backrefs in the extent tree for example, which
9519 * will result in an inconsistent fs after repairing the root items.
9521 static int repair_root_items(struct btrfs_fs_info *info)
9523 struct btrfs_path *path = NULL;
9524 struct btrfs_key key;
9525 struct extent_buffer *leaf;
9526 struct btrfs_trans_handle *trans = NULL;
9531 ret = build_roots_info_cache(info);
9535 path = btrfs_alloc_path();
9541 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9542 key.type = BTRFS_ROOT_ITEM_KEY;
9547 * Avoid opening and committing transactions if a leaf doesn't have
9548 * any root items that need to be fixed, so that we avoid rotating
9549 * backup roots unnecessarily.
9552 trans = btrfs_start_transaction(info->tree_root, 1);
9553 if (IS_ERR(trans)) {
9554 ret = PTR_ERR(trans);
9559 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9563 leaf = path->nodes[0];
9566 struct btrfs_key found_key;
9568 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9569 int no_more_keys = find_next_key(path, &key);
9571 btrfs_release_path(path);
9573 ret = btrfs_commit_transaction(trans,
9585 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9587 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9589 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9592 ret = maybe_repair_root_item(info, path, &found_key,
9597 if (!trans && repair) {
9600 btrfs_release_path(path);
9610 free_roots_info_cache();
9611 btrfs_free_path(path);
9613 btrfs_commit_transaction(trans, info->tree_root);
9620 const char * const cmd_check_usage[] = {
9621 "btrfs check [options] <device>",
9622 "Check structural integrity of a filesystem (unmounted).",
9623 "Check structural integrity of an unmounted filesystem. Verify internal",
9624 "trees' consistency and item connectivity. In the repair mode try to",
9625 "fix the problems found.",
9626 "WARNING: the repair mode is considered dangerous",
9628 "-s|--super <superblock> use this superblock copy",
9629 "-b|--backup use the first valid backup root copy",
9630 "--repair try to repair the filesystem",
9631 "--readonly run in read-only mode (default)",
9632 "--init-csum-tree create a new CRC tree",
9633 "--init-extent-tree create a new extent tree",
9634 "--check-data-csum verify checksums of data blocks",
9635 "-Q|--qgroup-report print a report on qgroup consistency",
9636 "-E|--subvol-extents <subvolid>",
9637 " print subvolume extents and sharing state",
9638 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9639 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9640 "-p|--progress indicate progress",
9644 int cmd_check(int argc, char **argv)
9646 struct cache_tree root_cache;
9647 struct btrfs_root *root;
9648 struct btrfs_fs_info *info;
9651 u64 tree_root_bytenr = 0;
9652 u64 chunk_root_bytenr = 0;
9653 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9656 int init_csum_tree = 0;
9658 int qgroup_report = 0;
9659 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9663 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9664 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9665 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9666 static const struct option long_options[] = {
9667 { "super", required_argument, NULL, 's' },
9668 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9669 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9670 { "init-csum-tree", no_argument, NULL,
9671 GETOPT_VAL_INIT_CSUM },
9672 { "init-extent-tree", no_argument, NULL,
9673 GETOPT_VAL_INIT_EXTENT },
9674 { "check-data-csum", no_argument, NULL,
9675 GETOPT_VAL_CHECK_CSUM },
9676 { "backup", no_argument, NULL, 'b' },
9677 { "subvol-extents", required_argument, NULL, 'E' },
9678 { "qgroup-report", no_argument, NULL, 'Q' },
9679 { "tree-root", required_argument, NULL, 'r' },
9680 { "chunk-root", required_argument, NULL,
9681 GETOPT_VAL_CHUNK_TREE },
9682 { "progress", no_argument, NULL, 'p' },
9686 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9690 case 'a': /* ignored */ break;
9692 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9695 num = arg_strtou64(optarg);
9696 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9698 "ERROR: super mirror should be less than: %d\n",
9699 BTRFS_SUPER_MIRROR_MAX);
9702 bytenr = btrfs_sb_offset(((int)num));
9703 printf("using SB copy %llu, bytenr %llu\n", num,
9704 (unsigned long long)bytenr);
9710 subvolid = arg_strtou64(optarg);
9713 tree_root_bytenr = arg_strtou64(optarg);
9715 case GETOPT_VAL_CHUNK_TREE:
9716 chunk_root_bytenr = arg_strtou64(optarg);
9719 ctx.progress_enabled = true;
9723 usage(cmd_check_usage);
9724 case GETOPT_VAL_REPAIR:
9725 printf("enabling repair mode\n");
9727 ctree_flags |= OPEN_CTREE_WRITES;
9729 case GETOPT_VAL_READONLY:
9732 case GETOPT_VAL_INIT_CSUM:
9733 printf("Creating a new CRC tree\n");
9736 ctree_flags |= OPEN_CTREE_WRITES;
9738 case GETOPT_VAL_INIT_EXTENT:
9739 init_extent_tree = 1;
9740 ctree_flags |= (OPEN_CTREE_WRITES |
9741 OPEN_CTREE_NO_BLOCK_GROUPS);
9744 case GETOPT_VAL_CHECK_CSUM:
9745 check_data_csum = 1;
9750 if (check_argc_exact(argc - optind, 1))
9751 usage(cmd_check_usage);
9753 if (ctx.progress_enabled) {
9754 ctx.tp = TASK_NOTHING;
9755 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9758 /* This check is the only reason for --readonly to exist */
9759 if (readonly && repair) {
9760 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9765 cache_tree_init(&root_cache);
9767 if((ret = check_mounted(argv[optind])) < 0) {
9768 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9771 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9776 /* only allow partial opening under repair mode */
9778 ctree_flags |= OPEN_CTREE_PARTIAL;
9780 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9781 chunk_root_bytenr, ctree_flags);
9783 fprintf(stderr, "Couldn't open file system\n");
9789 root = info->fs_root;
9792 * repair mode will force us to commit transaction which
9793 * will make us fail to load log tree when mounting.
9795 if (repair && btrfs_super_log_root(info->super_copy)) {
9796 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9801 ret = zero_log_tree(root);
9803 fprintf(stderr, "fail to zero log tree\n");
9808 uuid_unparse(info->super_copy->fsid, uuidbuf);
9809 if (qgroup_report) {
9810 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9812 ret = qgroup_verify_all(info);
9814 ret = report_qgroups(1);
9818 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9819 subvolid, argv[optind], uuidbuf);
9820 ret = print_extent_state(info, subvolid);
9823 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9825 if (!extent_buffer_uptodate(info->tree_root->node) ||
9826 !extent_buffer_uptodate(info->dev_root->node) ||
9827 !extent_buffer_uptodate(info->chunk_root->node)) {
9828 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9833 if (init_extent_tree || init_csum_tree) {
9834 struct btrfs_trans_handle *trans;
9836 trans = btrfs_start_transaction(info->extent_root, 0);
9837 if (IS_ERR(trans)) {
9838 fprintf(stderr, "Error starting transaction\n");
9839 ret = PTR_ERR(trans);
9843 if (init_extent_tree) {
9844 printf("Creating a new extent tree\n");
9845 ret = reinit_extent_tree(trans, info);
9850 if (init_csum_tree) {
9851 fprintf(stderr, "Reinit crc root\n");
9852 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9854 fprintf(stderr, "crc root initialization failed\n");
9859 ret = fill_csum_tree(trans, info->csum_root,
9862 fprintf(stderr, "crc refilling failed\n");
9867 * Ok now we commit and run the normal fsck, which will add
9868 * extent entries for all of the items it finds.
9870 ret = btrfs_commit_transaction(trans, info->extent_root);
9874 if (!extent_buffer_uptodate(info->extent_root->node)) {
9875 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9879 if (!extent_buffer_uptodate(info->csum_root->node)) {
9880 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9885 if (!ctx.progress_enabled)
9886 fprintf(stderr, "checking extents\n");
9887 ret = check_chunks_and_extents(root);
9889 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9891 ret = repair_root_items(info);
9895 fprintf(stderr, "Fixed %d roots.\n", ret);
9897 } else if (ret > 0) {
9899 "Found %d roots with an outdated root item.\n",
9902 "Please run a filesystem check with the option --repair to fix them.\n");
9907 if (!ctx.progress_enabled) {
9908 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9909 fprintf(stderr, "checking free space tree\n");
9911 fprintf(stderr, "checking free space cache\n");
9913 ret = check_space_cache(root);
9918 * We used to have to have these hole extents in between our real
9919 * extents so if we don't have this flag set we need to make sure there
9920 * are no gaps in the file extents for inodes, otherwise we can just
9921 * ignore it when this happens.
9923 no_holes = btrfs_fs_incompat(root->fs_info,
9924 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9925 if (!ctx.progress_enabled)
9926 fprintf(stderr, "checking fs roots\n");
9927 ret = check_fs_roots(root, &root_cache);
9931 fprintf(stderr, "checking csums\n");
9932 ret = check_csums(root);
9936 fprintf(stderr, "checking root refs\n");
9937 ret = check_root_refs(root, &root_cache);
9941 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9942 struct extent_buffer *eb;
9944 eb = list_first_entry(&root->fs_info->recow_ebs,
9945 struct extent_buffer, recow);
9946 list_del_init(&eb->recow);
9947 ret = recow_extent_buffer(root, eb);
9952 while (!list_empty(&delete_items)) {
9953 struct bad_item *bad;
9955 bad = list_first_entry(&delete_items, struct bad_item, list);
9956 list_del_init(&bad->list);
9958 ret = delete_bad_item(root, bad);
9962 if (info->quota_enabled) {
9964 fprintf(stderr, "checking quota groups\n");
9965 err = qgroup_verify_all(info);
9970 if (!list_empty(&root->fs_info->recow_ebs)) {
9971 fprintf(stderr, "Transid errors in file system\n");
9975 /* Don't override original ret */
9979 ret = report_qgroups(0);
9980 if (found_old_backref) { /*
9981 * there was a disk format change when mixed
9982 * backref was in testing tree. The old format
9983 * existed about one week.
9985 printf("\n * Found old mixed backref format. "
9986 "The old format is not supported! *"
9987 "\n * Please mount the FS in readonly mode, "
9988 "backup data and re-format the FS. *\n\n");
9991 printf("found %llu bytes used err is %d\n",
9992 (unsigned long long)bytes_used, ret);
9993 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9994 printf("total tree bytes: %llu\n",
9995 (unsigned long long)total_btree_bytes);
9996 printf("total fs tree bytes: %llu\n",
9997 (unsigned long long)total_fs_tree_bytes);
9998 printf("total extent tree bytes: %llu\n",
9999 (unsigned long long)total_extent_tree_bytes);
10000 printf("btree space waste bytes: %llu\n",
10001 (unsigned long long)btree_space_waste);
10002 printf("file data blocks allocated: %llu\n referenced %llu\n",
10003 (unsigned long long)data_bytes_allocated,
10004 (unsigned long long)data_bytes_referenced);
10006 free_qgroup_counts();
10007 free_root_recs_tree(&root_cache);
10011 if (ctx.progress_enabled)
10012 task_deinit(ctx.info);