2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
438 * Error bit for low memory mode check.
440 * Currently no caller cares about it yet. Just internal use for error
443 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
447 static void *print_status_check(void *p)
449 struct task_ctx *priv = p;
450 const char work_indicator[] = { '.', 'o', 'O', 'o' };
452 static char *task_position_string[] = {
454 "checking free space cache",
458 task_period_start(priv->info, 1000 /* 1s */);
460 if (priv->tp == TASK_NOTHING)
464 printf("%s [%c]\r", task_position_string[priv->tp],
465 work_indicator[count % 4]);
468 task_period_wait(priv->info);
473 static int print_status_return(void *p)
481 /* Compatible function to allow reuse of old codes */
482 static u64 first_extent_gap(struct rb_root *holes)
484 struct file_extent_hole *hole;
486 if (RB_EMPTY_ROOT(holes))
489 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
493 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
495 struct file_extent_hole *hole1;
496 struct file_extent_hole *hole2;
498 hole1 = rb_entry(node1, struct file_extent_hole, node);
499 hole2 = rb_entry(node2, struct file_extent_hole, node);
501 if (hole1->start > hole2->start)
503 if (hole1->start < hole2->start)
505 /* Now hole1->start == hole2->start */
506 if (hole1->len >= hole2->len)
508 * Hole 1 will be merge center
509 * Same hole will be merged later
512 /* Hole 2 will be merge center */
517 * Add a hole to the record
519 * This will do hole merge for copy_file_extent_holes(),
520 * which will ensure there won't be continuous holes.
522 static int add_file_extent_hole(struct rb_root *holes,
525 struct file_extent_hole *hole;
526 struct file_extent_hole *prev = NULL;
527 struct file_extent_hole *next = NULL;
529 hole = malloc(sizeof(*hole));
534 /* Since compare will not return 0, no -EEXIST will happen */
535 rb_insert(holes, &hole->node, compare_hole);
537 /* simple merge with previous hole */
538 if (rb_prev(&hole->node))
539 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
541 if (prev && prev->start + prev->len >= hole->start) {
542 hole->len = hole->start + hole->len - prev->start;
543 hole->start = prev->start;
544 rb_erase(&prev->node, holes);
549 /* iterate merge with next holes */
551 if (!rb_next(&hole->node))
553 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
555 if (hole->start + hole->len >= next->start) {
556 if (hole->start + hole->len <= next->start + next->len)
557 hole->len = next->start + next->len -
559 rb_erase(&next->node, holes);
568 static int compare_hole_range(struct rb_node *node, void *data)
570 struct file_extent_hole *hole;
573 hole = (struct file_extent_hole *)data;
576 hole = rb_entry(node, struct file_extent_hole, node);
577 if (start < hole->start)
579 if (start >= hole->start && start < hole->start + hole->len)
585 * Delete a hole in the record
587 * This will do the hole split and is much restrict than add.
589 static int del_file_extent_hole(struct rb_root *holes,
592 struct file_extent_hole *hole;
593 struct file_extent_hole tmp;
598 struct rb_node *node;
605 node = rb_search(holes, &tmp, compare_hole_range, NULL);
608 hole = rb_entry(node, struct file_extent_hole, node);
609 if (start + len > hole->start + hole->len)
613 * Now there will be no overlap, delete the hole and re-add the
614 * split(s) if they exists.
616 if (start > hole->start) {
617 prev_start = hole->start;
618 prev_len = start - hole->start;
621 if (hole->start + hole->len > start + len) {
622 next_start = start + len;
623 next_len = hole->start + hole->len - start - len;
626 rb_erase(node, holes);
629 ret = add_file_extent_hole(holes, prev_start, prev_len);
634 ret = add_file_extent_hole(holes, next_start, next_len);
641 static int copy_file_extent_holes(struct rb_root *dst,
644 struct file_extent_hole *hole;
645 struct rb_node *node;
648 node = rb_first(src);
650 hole = rb_entry(node, struct file_extent_hole, node);
651 ret = add_file_extent_hole(dst, hole->start, hole->len);
654 node = rb_next(node);
659 static void free_file_extent_holes(struct rb_root *holes)
661 struct rb_node *node;
662 struct file_extent_hole *hole;
664 node = rb_first(holes);
666 hole = rb_entry(node, struct file_extent_hole, node);
667 rb_erase(node, holes);
669 node = rb_first(holes);
673 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
675 static void record_root_in_trans(struct btrfs_trans_handle *trans,
676 struct btrfs_root *root)
678 if (root->last_trans != trans->transid) {
679 root->track_dirty = 1;
680 root->last_trans = trans->transid;
681 root->commit_root = root->node;
682 extent_buffer_get(root->node);
686 static u8 imode_to_type(u32 imode)
689 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
690 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
691 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
692 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
693 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
694 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
695 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
696 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
699 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
703 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
705 struct device_record *rec1;
706 struct device_record *rec2;
708 rec1 = rb_entry(node1, struct device_record, node);
709 rec2 = rb_entry(node2, struct device_record, node);
710 if (rec1->devid > rec2->devid)
712 else if (rec1->devid < rec2->devid)
718 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
720 struct inode_record *rec;
721 struct inode_backref *backref;
722 struct inode_backref *orig;
723 struct inode_backref *tmp;
724 struct orphan_data_extent *src_orphan;
725 struct orphan_data_extent *dst_orphan;
729 rec = malloc(sizeof(*rec));
731 return ERR_PTR(-ENOMEM);
732 memcpy(rec, orig_rec, sizeof(*rec));
734 INIT_LIST_HEAD(&rec->backrefs);
735 INIT_LIST_HEAD(&rec->orphan_extents);
736 rec->holes = RB_ROOT;
738 list_for_each_entry(orig, &orig_rec->backrefs, list) {
739 size = sizeof(*orig) + orig->namelen + 1;
740 backref = malloc(size);
745 memcpy(backref, orig, size);
746 list_add_tail(&backref->list, &rec->backrefs);
748 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
749 dst_orphan = malloc(sizeof(*dst_orphan));
754 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
755 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
757 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
763 if (!list_empty(&rec->backrefs))
764 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
765 list_del(&orig->list);
769 if (!list_empty(&rec->orphan_extents))
770 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
771 list_del(&orig->list);
780 static void print_orphan_data_extents(struct list_head *orphan_extents,
783 struct orphan_data_extent *orphan;
785 if (list_empty(orphan_extents))
787 printf("The following data extent is lost in tree %llu:\n",
789 list_for_each_entry(orphan, orphan_extents, list) {
790 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
791 orphan->objectid, orphan->offset, orphan->disk_bytenr,
796 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
798 u64 root_objectid = root->root_key.objectid;
799 int errors = rec->errors;
803 /* reloc root errors, we print its corresponding fs root objectid*/
804 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
805 root_objectid = root->root_key.offset;
806 fprintf(stderr, "reloc");
808 fprintf(stderr, "root %llu inode %llu errors %x",
809 (unsigned long long) root_objectid,
810 (unsigned long long) rec->ino, rec->errors);
812 if (errors & I_ERR_NO_INODE_ITEM)
813 fprintf(stderr, ", no inode item");
814 if (errors & I_ERR_NO_ORPHAN_ITEM)
815 fprintf(stderr, ", no orphan item");
816 if (errors & I_ERR_DUP_INODE_ITEM)
817 fprintf(stderr, ", dup inode item");
818 if (errors & I_ERR_DUP_DIR_INDEX)
819 fprintf(stderr, ", dup dir index");
820 if (errors & I_ERR_ODD_DIR_ITEM)
821 fprintf(stderr, ", odd dir item");
822 if (errors & I_ERR_ODD_FILE_EXTENT)
823 fprintf(stderr, ", odd file extent");
824 if (errors & I_ERR_BAD_FILE_EXTENT)
825 fprintf(stderr, ", bad file extent");
826 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
827 fprintf(stderr, ", file extent overlap");
828 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
829 fprintf(stderr, ", file extent discount");
830 if (errors & I_ERR_DIR_ISIZE_WRONG)
831 fprintf(stderr, ", dir isize wrong");
832 if (errors & I_ERR_FILE_NBYTES_WRONG)
833 fprintf(stderr, ", nbytes wrong");
834 if (errors & I_ERR_ODD_CSUM_ITEM)
835 fprintf(stderr, ", odd csum item");
836 if (errors & I_ERR_SOME_CSUM_MISSING)
837 fprintf(stderr, ", some csum missing");
838 if (errors & I_ERR_LINK_COUNT_WRONG)
839 fprintf(stderr, ", link count wrong");
840 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
841 fprintf(stderr, ", orphan file extent");
842 fprintf(stderr, "\n");
843 /* Print the orphan extents if needed */
844 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
845 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
847 /* Print the holes if needed */
848 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
849 struct file_extent_hole *hole;
850 struct rb_node *node;
853 node = rb_first(&rec->holes);
854 fprintf(stderr, "Found file extent holes:\n");
857 hole = rb_entry(node, struct file_extent_hole, node);
858 fprintf(stderr, "\tstart: %llu, len: %llu\n",
859 hole->start, hole->len);
860 node = rb_next(node);
863 fprintf(stderr, "\tstart: 0, len: %llu\n",
864 round_up(rec->isize, root->sectorsize));
868 static void print_ref_error(int errors)
870 if (errors & REF_ERR_NO_DIR_ITEM)
871 fprintf(stderr, ", no dir item");
872 if (errors & REF_ERR_NO_DIR_INDEX)
873 fprintf(stderr, ", no dir index");
874 if (errors & REF_ERR_NO_INODE_REF)
875 fprintf(stderr, ", no inode ref");
876 if (errors & REF_ERR_DUP_DIR_ITEM)
877 fprintf(stderr, ", dup dir item");
878 if (errors & REF_ERR_DUP_DIR_INDEX)
879 fprintf(stderr, ", dup dir index");
880 if (errors & REF_ERR_DUP_INODE_REF)
881 fprintf(stderr, ", dup inode ref");
882 if (errors & REF_ERR_INDEX_UNMATCH)
883 fprintf(stderr, ", index mismatch");
884 if (errors & REF_ERR_FILETYPE_UNMATCH)
885 fprintf(stderr, ", filetype mismatch");
886 if (errors & REF_ERR_NAME_TOO_LONG)
887 fprintf(stderr, ", name too long");
888 if (errors & REF_ERR_NO_ROOT_REF)
889 fprintf(stderr, ", no root ref");
890 if (errors & REF_ERR_NO_ROOT_BACKREF)
891 fprintf(stderr, ", no root backref");
892 if (errors & REF_ERR_DUP_ROOT_REF)
893 fprintf(stderr, ", dup root ref");
894 if (errors & REF_ERR_DUP_ROOT_BACKREF)
895 fprintf(stderr, ", dup root backref");
896 fprintf(stderr, "\n");
899 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
902 struct ptr_node *node;
903 struct cache_extent *cache;
904 struct inode_record *rec = NULL;
907 cache = lookup_cache_extent(inode_cache, ino, 1);
909 node = container_of(cache, struct ptr_node, cache);
911 if (mod && rec->refs > 1) {
912 node->data = clone_inode_rec(rec);
913 if (IS_ERR(node->data))
919 rec = calloc(1, sizeof(*rec));
921 return ERR_PTR(-ENOMEM);
923 rec->extent_start = (u64)-1;
925 INIT_LIST_HEAD(&rec->backrefs);
926 INIT_LIST_HEAD(&rec->orphan_extents);
927 rec->holes = RB_ROOT;
929 node = malloc(sizeof(*node));
932 return ERR_PTR(-ENOMEM);
934 node->cache.start = ino;
935 node->cache.size = 1;
938 if (ino == BTRFS_FREE_INO_OBJECTID)
941 ret = insert_cache_extent(inode_cache, &node->cache);
943 return ERR_PTR(-EEXIST);
948 static void free_orphan_data_extents(struct list_head *orphan_extents)
950 struct orphan_data_extent *orphan;
952 while (!list_empty(orphan_extents)) {
953 orphan = list_entry(orphan_extents->next,
954 struct orphan_data_extent, list);
955 list_del(&orphan->list);
960 static void free_inode_rec(struct inode_record *rec)
962 struct inode_backref *backref;
967 while (!list_empty(&rec->backrefs)) {
968 backref = to_inode_backref(rec->backrefs.next);
969 list_del(&backref->list);
972 free_orphan_data_extents(&rec->orphan_extents);
973 free_file_extent_holes(&rec->holes);
977 static int can_free_inode_rec(struct inode_record *rec)
979 if (!rec->errors && rec->checked && rec->found_inode_item &&
980 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
985 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
986 struct inode_record *rec)
988 struct cache_extent *cache;
989 struct inode_backref *tmp, *backref;
990 struct ptr_node *node;
991 unsigned char filetype;
993 if (!rec->found_inode_item)
996 filetype = imode_to_type(rec->imode);
997 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
998 if (backref->found_dir_item && backref->found_dir_index) {
999 if (backref->filetype != filetype)
1000 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1001 if (!backref->errors && backref->found_inode_ref &&
1002 rec->nlink == rec->found_link) {
1003 list_del(&backref->list);
1009 if (!rec->checked || rec->merging)
1012 if (S_ISDIR(rec->imode)) {
1013 if (rec->found_size != rec->isize)
1014 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1015 if (rec->found_file_extent)
1016 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1017 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1018 if (rec->found_dir_item)
1019 rec->errors |= I_ERR_ODD_DIR_ITEM;
1020 if (rec->found_size != rec->nbytes)
1021 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1022 if (rec->nlink > 0 && !no_holes &&
1023 (rec->extent_end < rec->isize ||
1024 first_extent_gap(&rec->holes) < rec->isize))
1025 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1028 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1029 if (rec->found_csum_item && rec->nodatasum)
1030 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1031 if (rec->some_csum_missing && !rec->nodatasum)
1032 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1035 BUG_ON(rec->refs != 1);
1036 if (can_free_inode_rec(rec)) {
1037 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1038 node = container_of(cache, struct ptr_node, cache);
1039 BUG_ON(node->data != rec);
1040 remove_cache_extent(inode_cache, &node->cache);
1042 free_inode_rec(rec);
1046 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1048 struct btrfs_path path;
1049 struct btrfs_key key;
1052 key.objectid = BTRFS_ORPHAN_OBJECTID;
1053 key.type = BTRFS_ORPHAN_ITEM_KEY;
1056 btrfs_init_path(&path);
1057 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1058 btrfs_release_path(&path);
1064 static int process_inode_item(struct extent_buffer *eb,
1065 int slot, struct btrfs_key *key,
1066 struct shared_node *active_node)
1068 struct inode_record *rec;
1069 struct btrfs_inode_item *item;
1071 rec = active_node->current;
1072 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1073 if (rec->found_inode_item) {
1074 rec->errors |= I_ERR_DUP_INODE_ITEM;
1077 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1078 rec->nlink = btrfs_inode_nlink(eb, item);
1079 rec->isize = btrfs_inode_size(eb, item);
1080 rec->nbytes = btrfs_inode_nbytes(eb, item);
1081 rec->imode = btrfs_inode_mode(eb, item);
1082 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1084 rec->found_inode_item = 1;
1085 if (rec->nlink == 0)
1086 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1087 maybe_free_inode_rec(&active_node->inode_cache, rec);
1091 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1093 int namelen, u64 dir)
1095 struct inode_backref *backref;
1097 list_for_each_entry(backref, &rec->backrefs, list) {
1098 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1100 if (backref->dir != dir || backref->namelen != namelen)
1102 if (memcmp(name, backref->name, namelen))
1107 backref = malloc(sizeof(*backref) + namelen + 1);
1110 memset(backref, 0, sizeof(*backref));
1112 backref->namelen = namelen;
1113 memcpy(backref->name, name, namelen);
1114 backref->name[namelen] = '\0';
1115 list_add_tail(&backref->list, &rec->backrefs);
1119 static int add_inode_backref(struct cache_tree *inode_cache,
1120 u64 ino, u64 dir, u64 index,
1121 const char *name, int namelen,
1122 int filetype, int itemtype, int errors)
1124 struct inode_record *rec;
1125 struct inode_backref *backref;
1127 rec = get_inode_rec(inode_cache, ino, 1);
1128 BUG_ON(IS_ERR(rec));
1129 backref = get_inode_backref(rec, name, namelen, dir);
1132 backref->errors |= errors;
1133 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1134 if (backref->found_dir_index)
1135 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1136 if (backref->found_inode_ref && backref->index != index)
1137 backref->errors |= REF_ERR_INDEX_UNMATCH;
1138 if (backref->found_dir_item && backref->filetype != filetype)
1139 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1141 backref->index = index;
1142 backref->filetype = filetype;
1143 backref->found_dir_index = 1;
1144 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1146 if (backref->found_dir_item)
1147 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1148 if (backref->found_dir_index && backref->filetype != filetype)
1149 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1151 backref->filetype = filetype;
1152 backref->found_dir_item = 1;
1153 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1154 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1155 if (backref->found_inode_ref)
1156 backref->errors |= REF_ERR_DUP_INODE_REF;
1157 if (backref->found_dir_index && backref->index != index)
1158 backref->errors |= REF_ERR_INDEX_UNMATCH;
1160 backref->index = index;
1162 backref->ref_type = itemtype;
1163 backref->found_inode_ref = 1;
1168 maybe_free_inode_rec(inode_cache, rec);
1172 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1173 struct cache_tree *dst_cache)
1175 struct inode_backref *backref;
1180 list_for_each_entry(backref, &src->backrefs, list) {
1181 if (backref->found_dir_index) {
1182 add_inode_backref(dst_cache, dst->ino, backref->dir,
1183 backref->index, backref->name,
1184 backref->namelen, backref->filetype,
1185 BTRFS_DIR_INDEX_KEY, backref->errors);
1187 if (backref->found_dir_item) {
1189 add_inode_backref(dst_cache, dst->ino,
1190 backref->dir, 0, backref->name,
1191 backref->namelen, backref->filetype,
1192 BTRFS_DIR_ITEM_KEY, backref->errors);
1194 if (backref->found_inode_ref) {
1195 add_inode_backref(dst_cache, dst->ino,
1196 backref->dir, backref->index,
1197 backref->name, backref->namelen, 0,
1198 backref->ref_type, backref->errors);
1202 if (src->found_dir_item)
1203 dst->found_dir_item = 1;
1204 if (src->found_file_extent)
1205 dst->found_file_extent = 1;
1206 if (src->found_csum_item)
1207 dst->found_csum_item = 1;
1208 if (src->some_csum_missing)
1209 dst->some_csum_missing = 1;
1210 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1211 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1216 BUG_ON(src->found_link < dir_count);
1217 dst->found_link += src->found_link - dir_count;
1218 dst->found_size += src->found_size;
1219 if (src->extent_start != (u64)-1) {
1220 if (dst->extent_start == (u64)-1) {
1221 dst->extent_start = src->extent_start;
1222 dst->extent_end = src->extent_end;
1224 if (dst->extent_end > src->extent_start)
1225 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1226 else if (dst->extent_end < src->extent_start) {
1227 ret = add_file_extent_hole(&dst->holes,
1229 src->extent_start - dst->extent_end);
1231 if (dst->extent_end < src->extent_end)
1232 dst->extent_end = src->extent_end;
1236 dst->errors |= src->errors;
1237 if (src->found_inode_item) {
1238 if (!dst->found_inode_item) {
1239 dst->nlink = src->nlink;
1240 dst->isize = src->isize;
1241 dst->nbytes = src->nbytes;
1242 dst->imode = src->imode;
1243 dst->nodatasum = src->nodatasum;
1244 dst->found_inode_item = 1;
1246 dst->errors |= I_ERR_DUP_INODE_ITEM;
1254 static int splice_shared_node(struct shared_node *src_node,
1255 struct shared_node *dst_node)
1257 struct cache_extent *cache;
1258 struct ptr_node *node, *ins;
1259 struct cache_tree *src, *dst;
1260 struct inode_record *rec, *conflict;
1261 u64 current_ino = 0;
1265 if (--src_node->refs == 0)
1267 if (src_node->current)
1268 current_ino = src_node->current->ino;
1270 src = &src_node->root_cache;
1271 dst = &dst_node->root_cache;
1273 cache = search_cache_extent(src, 0);
1275 node = container_of(cache, struct ptr_node, cache);
1277 cache = next_cache_extent(cache);
1280 remove_cache_extent(src, &node->cache);
1283 ins = malloc(sizeof(*ins));
1285 ins->cache.start = node->cache.start;
1286 ins->cache.size = node->cache.size;
1290 ret = insert_cache_extent(dst, &ins->cache);
1291 if (ret == -EEXIST) {
1292 conflict = get_inode_rec(dst, rec->ino, 1);
1293 BUG_ON(IS_ERR(conflict));
1294 merge_inode_recs(rec, conflict, dst);
1296 conflict->checked = 1;
1297 if (dst_node->current == conflict)
1298 dst_node->current = NULL;
1300 maybe_free_inode_rec(dst, conflict);
1301 free_inode_rec(rec);
1308 if (src == &src_node->root_cache) {
1309 src = &src_node->inode_cache;
1310 dst = &dst_node->inode_cache;
1314 if (current_ino > 0 && (!dst_node->current ||
1315 current_ino > dst_node->current->ino)) {
1316 if (dst_node->current) {
1317 dst_node->current->checked = 1;
1318 maybe_free_inode_rec(dst, dst_node->current);
1320 dst_node->current = get_inode_rec(dst, current_ino, 1);
1321 BUG_ON(IS_ERR(dst_node->current));
1326 static void free_inode_ptr(struct cache_extent *cache)
1328 struct ptr_node *node;
1329 struct inode_record *rec;
1331 node = container_of(cache, struct ptr_node, cache);
1333 free_inode_rec(rec);
1337 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1339 static struct shared_node *find_shared_node(struct cache_tree *shared,
1342 struct cache_extent *cache;
1343 struct shared_node *node;
1345 cache = lookup_cache_extent(shared, bytenr, 1);
1347 node = container_of(cache, struct shared_node, cache);
1353 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1356 struct shared_node *node;
1358 node = calloc(1, sizeof(*node));
1361 node->cache.start = bytenr;
1362 node->cache.size = 1;
1363 cache_tree_init(&node->root_cache);
1364 cache_tree_init(&node->inode_cache);
1367 ret = insert_cache_extent(shared, &node->cache);
1372 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1373 struct walk_control *wc, int level)
1375 struct shared_node *node;
1376 struct shared_node *dest;
1379 if (level == wc->active_node)
1382 BUG_ON(wc->active_node <= level);
1383 node = find_shared_node(&wc->shared, bytenr);
1385 ret = add_shared_node(&wc->shared, bytenr, refs);
1387 node = find_shared_node(&wc->shared, bytenr);
1388 wc->nodes[level] = node;
1389 wc->active_node = level;
1393 if (wc->root_level == wc->active_node &&
1394 btrfs_root_refs(&root->root_item) == 0) {
1395 if (--node->refs == 0) {
1396 free_inode_recs_tree(&node->root_cache);
1397 free_inode_recs_tree(&node->inode_cache);
1398 remove_cache_extent(&wc->shared, &node->cache);
1404 dest = wc->nodes[wc->active_node];
1405 splice_shared_node(node, dest);
1406 if (node->refs == 0) {
1407 remove_cache_extent(&wc->shared, &node->cache);
1413 static int leave_shared_node(struct btrfs_root *root,
1414 struct walk_control *wc, int level)
1416 struct shared_node *node;
1417 struct shared_node *dest;
1420 if (level == wc->root_level)
1423 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1427 BUG_ON(i >= BTRFS_MAX_LEVEL);
1429 node = wc->nodes[wc->active_node];
1430 wc->nodes[wc->active_node] = NULL;
1431 wc->active_node = i;
1433 dest = wc->nodes[wc->active_node];
1434 if (wc->active_node < wc->root_level ||
1435 btrfs_root_refs(&root->root_item) > 0) {
1436 BUG_ON(node->refs <= 1);
1437 splice_shared_node(node, dest);
1439 BUG_ON(node->refs < 2);
1448 * 1 - if the root with id child_root_id is a child of root parent_root_id
1449 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1450 * has other root(s) as parent(s)
1451 * 2 - if the root child_root_id doesn't have any parent roots
1453 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1456 struct btrfs_path path;
1457 struct btrfs_key key;
1458 struct extent_buffer *leaf;
1462 btrfs_init_path(&path);
1464 key.objectid = parent_root_id;
1465 key.type = BTRFS_ROOT_REF_KEY;
1466 key.offset = child_root_id;
1467 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1471 btrfs_release_path(&path);
1475 key.objectid = child_root_id;
1476 key.type = BTRFS_ROOT_BACKREF_KEY;
1478 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1484 leaf = path.nodes[0];
1485 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1486 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1489 leaf = path.nodes[0];
1492 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1493 if (key.objectid != child_root_id ||
1494 key.type != BTRFS_ROOT_BACKREF_KEY)
1499 if (key.offset == parent_root_id) {
1500 btrfs_release_path(&path);
1507 btrfs_release_path(&path);
1510 return has_parent ? 0 : 2;
1513 static int process_dir_item(struct btrfs_root *root,
1514 struct extent_buffer *eb,
1515 int slot, struct btrfs_key *key,
1516 struct shared_node *active_node)
1526 struct btrfs_dir_item *di;
1527 struct inode_record *rec;
1528 struct cache_tree *root_cache;
1529 struct cache_tree *inode_cache;
1530 struct btrfs_key location;
1531 char namebuf[BTRFS_NAME_LEN];
1533 root_cache = &active_node->root_cache;
1534 inode_cache = &active_node->inode_cache;
1535 rec = active_node->current;
1536 rec->found_dir_item = 1;
1538 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1539 total = btrfs_item_size_nr(eb, slot);
1540 while (cur < total) {
1542 btrfs_dir_item_key_to_cpu(eb, di, &location);
1543 name_len = btrfs_dir_name_len(eb, di);
1544 data_len = btrfs_dir_data_len(eb, di);
1545 filetype = btrfs_dir_type(eb, di);
1547 rec->found_size += name_len;
1548 if (name_len <= BTRFS_NAME_LEN) {
1552 len = BTRFS_NAME_LEN;
1553 error = REF_ERR_NAME_TOO_LONG;
1555 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1557 if (location.type == BTRFS_INODE_ITEM_KEY) {
1558 add_inode_backref(inode_cache, location.objectid,
1559 key->objectid, key->offset, namebuf,
1560 len, filetype, key->type, error);
1561 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1562 add_inode_backref(root_cache, location.objectid,
1563 key->objectid, key->offset,
1564 namebuf, len, filetype,
1567 fprintf(stderr, "invalid location in dir item %u\n",
1569 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1570 key->objectid, key->offset, namebuf,
1571 len, filetype, key->type, error);
1574 len = sizeof(*di) + name_len + data_len;
1575 di = (struct btrfs_dir_item *)((char *)di + len);
1578 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1579 rec->errors |= I_ERR_DUP_DIR_INDEX;
1584 static int process_inode_ref(struct extent_buffer *eb,
1585 int slot, struct btrfs_key *key,
1586 struct shared_node *active_node)
1594 struct cache_tree *inode_cache;
1595 struct btrfs_inode_ref *ref;
1596 char namebuf[BTRFS_NAME_LEN];
1598 inode_cache = &active_node->inode_cache;
1600 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1601 total = btrfs_item_size_nr(eb, slot);
1602 while (cur < total) {
1603 name_len = btrfs_inode_ref_name_len(eb, ref);
1604 index = btrfs_inode_ref_index(eb, ref);
1605 if (name_len <= BTRFS_NAME_LEN) {
1609 len = BTRFS_NAME_LEN;
1610 error = REF_ERR_NAME_TOO_LONG;
1612 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1613 add_inode_backref(inode_cache, key->objectid, key->offset,
1614 index, namebuf, len, 0, key->type, error);
1616 len = sizeof(*ref) + name_len;
1617 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1623 static int process_inode_extref(struct extent_buffer *eb,
1624 int slot, struct btrfs_key *key,
1625 struct shared_node *active_node)
1634 struct cache_tree *inode_cache;
1635 struct btrfs_inode_extref *extref;
1636 char namebuf[BTRFS_NAME_LEN];
1638 inode_cache = &active_node->inode_cache;
1640 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1641 total = btrfs_item_size_nr(eb, slot);
1642 while (cur < total) {
1643 name_len = btrfs_inode_extref_name_len(eb, extref);
1644 index = btrfs_inode_extref_index(eb, extref);
1645 parent = btrfs_inode_extref_parent(eb, extref);
1646 if (name_len <= BTRFS_NAME_LEN) {
1650 len = BTRFS_NAME_LEN;
1651 error = REF_ERR_NAME_TOO_LONG;
1653 read_extent_buffer(eb, namebuf,
1654 (unsigned long)(extref + 1), len);
1655 add_inode_backref(inode_cache, key->objectid, parent,
1656 index, namebuf, len, 0, key->type, error);
1658 len = sizeof(*extref) + name_len;
1659 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1666 static int count_csum_range(struct btrfs_root *root, u64 start,
1667 u64 len, u64 *found)
1669 struct btrfs_key key;
1670 struct btrfs_path path;
1671 struct extent_buffer *leaf;
1676 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1678 btrfs_init_path(&path);
1680 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1682 key.type = BTRFS_EXTENT_CSUM_KEY;
1684 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1688 if (ret > 0 && path.slots[0] > 0) {
1689 leaf = path.nodes[0];
1690 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1691 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1692 key.type == BTRFS_EXTENT_CSUM_KEY)
1697 leaf = path.nodes[0];
1698 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1699 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1704 leaf = path.nodes[0];
1707 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1708 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1709 key.type != BTRFS_EXTENT_CSUM_KEY)
1712 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1713 if (key.offset >= start + len)
1716 if (key.offset > start)
1719 size = btrfs_item_size_nr(leaf, path.slots[0]);
1720 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1721 if (csum_end > start) {
1722 size = min(csum_end - start, len);
1731 btrfs_release_path(&path);
1737 static int process_file_extent(struct btrfs_root *root,
1738 struct extent_buffer *eb,
1739 int slot, struct btrfs_key *key,
1740 struct shared_node *active_node)
1742 struct inode_record *rec;
1743 struct btrfs_file_extent_item *fi;
1745 u64 disk_bytenr = 0;
1746 u64 extent_offset = 0;
1747 u64 mask = root->sectorsize - 1;
1751 rec = active_node->current;
1752 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1753 rec->found_file_extent = 1;
1755 if (rec->extent_start == (u64)-1) {
1756 rec->extent_start = key->offset;
1757 rec->extent_end = key->offset;
1760 if (rec->extent_end > key->offset)
1761 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1762 else if (rec->extent_end < key->offset) {
1763 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1764 key->offset - rec->extent_end);
1769 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1770 extent_type = btrfs_file_extent_type(eb, fi);
1772 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1773 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1775 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1776 rec->found_size += num_bytes;
1777 num_bytes = (num_bytes + mask) & ~mask;
1778 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1779 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1780 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1781 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1782 extent_offset = btrfs_file_extent_offset(eb, fi);
1783 if (num_bytes == 0 || (num_bytes & mask))
1784 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785 if (num_bytes + extent_offset >
1786 btrfs_file_extent_ram_bytes(eb, fi))
1787 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1789 (btrfs_file_extent_compression(eb, fi) ||
1790 btrfs_file_extent_encryption(eb, fi) ||
1791 btrfs_file_extent_other_encoding(eb, fi)))
1792 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1793 if (disk_bytenr > 0)
1794 rec->found_size += num_bytes;
1796 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1798 rec->extent_end = key->offset + num_bytes;
1801 * The data reloc tree will copy full extents into its inode and then
1802 * copy the corresponding csums. Because the extent it copied could be
1803 * a preallocated extent that hasn't been written to yet there may be no
1804 * csums to copy, ergo we won't have csums for our file extent. This is
1805 * ok so just don't bother checking csums if the inode belongs to the
1808 if (disk_bytenr > 0 &&
1809 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1811 if (btrfs_file_extent_compression(eb, fi))
1812 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1814 disk_bytenr += extent_offset;
1816 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1819 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1821 rec->found_csum_item = 1;
1822 if (found < num_bytes)
1823 rec->some_csum_missing = 1;
1824 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1826 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1832 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1833 struct walk_control *wc)
1835 struct btrfs_key key;
1839 struct cache_tree *inode_cache;
1840 struct shared_node *active_node;
1842 if (wc->root_level == wc->active_node &&
1843 btrfs_root_refs(&root->root_item) == 0)
1846 active_node = wc->nodes[wc->active_node];
1847 inode_cache = &active_node->inode_cache;
1848 nritems = btrfs_header_nritems(eb);
1849 for (i = 0; i < nritems; i++) {
1850 btrfs_item_key_to_cpu(eb, &key, i);
1852 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1854 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1857 if (active_node->current == NULL ||
1858 active_node->current->ino < key.objectid) {
1859 if (active_node->current) {
1860 active_node->current->checked = 1;
1861 maybe_free_inode_rec(inode_cache,
1862 active_node->current);
1864 active_node->current = get_inode_rec(inode_cache,
1866 BUG_ON(IS_ERR(active_node->current));
1869 case BTRFS_DIR_ITEM_KEY:
1870 case BTRFS_DIR_INDEX_KEY:
1871 ret = process_dir_item(root, eb, i, &key, active_node);
1873 case BTRFS_INODE_REF_KEY:
1874 ret = process_inode_ref(eb, i, &key, active_node);
1876 case BTRFS_INODE_EXTREF_KEY:
1877 ret = process_inode_extref(eb, i, &key, active_node);
1879 case BTRFS_INODE_ITEM_KEY:
1880 ret = process_inode_item(eb, i, &key, active_node);
1882 case BTRFS_EXTENT_DATA_KEY:
1883 ret = process_file_extent(root, eb, i, &key,
1893 static void reada_walk_down(struct btrfs_root *root,
1894 struct extent_buffer *node, int slot)
1903 level = btrfs_header_level(node);
1907 nritems = btrfs_header_nritems(node);
1908 blocksize = root->nodesize;
1909 for (i = slot; i < nritems; i++) {
1910 bytenr = btrfs_node_blockptr(node, i);
1911 ptr_gen = btrfs_node_ptr_generation(node, i);
1912 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1917 * Check the child node/leaf by the following condition:
1918 * 1. the first item key of the node/leaf should be the same with the one
1920 * 2. block in parent node should match the child node/leaf.
1921 * 3. generation of parent node and child's header should be consistent.
1923 * Or the child node/leaf pointed by the key in parent is not valid.
1925 * We hope to check leaf owner too, but since subvol may share leaves,
1926 * which makes leaf owner check not so strong, key check should be
1927 * sufficient enough for that case.
1929 static int check_child_node(struct btrfs_root *root,
1930 struct extent_buffer *parent, int slot,
1931 struct extent_buffer *child)
1933 struct btrfs_key parent_key;
1934 struct btrfs_key child_key;
1937 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1938 if (btrfs_header_level(child) == 0)
1939 btrfs_item_key_to_cpu(child, &child_key, 0);
1941 btrfs_node_key_to_cpu(child, &child_key, 0);
1943 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1946 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1947 parent_key.objectid, parent_key.type, parent_key.offset,
1948 child_key.objectid, child_key.type, child_key.offset);
1950 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1952 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1953 btrfs_node_blockptr(parent, slot),
1954 btrfs_header_bytenr(child));
1956 if (btrfs_node_ptr_generation(parent, slot) !=
1957 btrfs_header_generation(child)) {
1959 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1960 btrfs_header_generation(child),
1961 btrfs_node_ptr_generation(parent, slot));
1967 u64 bytenr[BTRFS_MAX_LEVEL];
1968 u64 refs[BTRFS_MAX_LEVEL];
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972 struct walk_control *wc, int *level,
1973 struct node_refs *nrefs)
1975 enum btrfs_tree_block_status status;
1978 struct extent_buffer *next;
1979 struct extent_buffer *cur;
1984 WARN_ON(*level < 0);
1985 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1987 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988 refs = nrefs->refs[*level];
1991 ret = btrfs_lookup_extent_info(NULL, root,
1992 path->nodes[*level]->start,
1993 *level, 1, &refs, NULL);
1998 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999 nrefs->refs[*level] = refs;
2003 ret = enter_shared_node(root, path->nodes[*level]->start,
2011 while (*level >= 0) {
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014 cur = path->nodes[*level];
2016 if (btrfs_header_level(cur) != *level)
2019 if (path->slots[*level] >= btrfs_header_nritems(cur))
2022 ret = process_one_leaf(root, cur, wc);
2027 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2029 blocksize = root->nodesize;
2031 if (bytenr == nrefs->bytenr[*level - 1]) {
2032 refs = nrefs->refs[*level - 1];
2034 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2035 *level - 1, 1, &refs, NULL);
2039 nrefs->bytenr[*level - 1] = bytenr;
2040 nrefs->refs[*level - 1] = refs;
2045 ret = enter_shared_node(root, bytenr, refs,
2048 path->slots[*level]++;
2053 next = btrfs_find_tree_block(root, bytenr, blocksize);
2054 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2055 free_extent_buffer(next);
2056 reada_walk_down(root, cur, path->slots[*level]);
2057 next = read_tree_block(root, bytenr, blocksize,
2059 if (!extent_buffer_uptodate(next)) {
2060 struct btrfs_key node_key;
2062 btrfs_node_key_to_cpu(path->nodes[*level],
2064 path->slots[*level]);
2065 btrfs_add_corrupt_extent_record(root->fs_info,
2067 path->nodes[*level]->start,
2068 root->nodesize, *level);
2074 ret = check_child_node(root, cur, path->slots[*level], next);
2080 if (btrfs_is_leaf(next))
2081 status = btrfs_check_leaf(root, NULL, next);
2083 status = btrfs_check_node(root, NULL, next);
2084 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085 free_extent_buffer(next);
2090 *level = *level - 1;
2091 free_extent_buffer(path->nodes[*level]);
2092 path->nodes[*level] = next;
2093 path->slots[*level] = 0;
2096 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2100 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2101 struct walk_control *wc, int *level)
2104 struct extent_buffer *leaf;
2106 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2107 leaf = path->nodes[i];
2108 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2113 free_extent_buffer(path->nodes[*level]);
2114 path->nodes[*level] = NULL;
2115 BUG_ON(*level > wc->active_node);
2116 if (*level == wc->active_node)
2117 leave_shared_node(root, wc, *level);
2124 static int check_root_dir(struct inode_record *rec)
2126 struct inode_backref *backref;
2129 if (!rec->found_inode_item || rec->errors)
2131 if (rec->nlink != 1 || rec->found_link != 0)
2133 if (list_empty(&rec->backrefs))
2135 backref = to_inode_backref(rec->backrefs.next);
2136 if (!backref->found_inode_ref)
2138 if (backref->index != 0 || backref->namelen != 2 ||
2139 memcmp(backref->name, "..", 2))
2141 if (backref->found_dir_index || backref->found_dir_item)
2148 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2149 struct btrfs_root *root, struct btrfs_path *path,
2150 struct inode_record *rec)
2152 struct btrfs_inode_item *ei;
2153 struct btrfs_key key;
2156 key.objectid = rec->ino;
2157 key.type = BTRFS_INODE_ITEM_KEY;
2158 key.offset = (u64)-1;
2160 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2164 if (!path->slots[0]) {
2171 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2172 if (key.objectid != rec->ino) {
2177 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2178 struct btrfs_inode_item);
2179 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2180 btrfs_mark_buffer_dirty(path->nodes[0]);
2181 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2182 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2183 root->root_key.objectid);
2185 btrfs_release_path(path);
2189 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2190 struct btrfs_root *root,
2191 struct btrfs_path *path,
2192 struct inode_record *rec)
2196 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2197 btrfs_release_path(path);
2199 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2203 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2204 struct btrfs_root *root,
2205 struct btrfs_path *path,
2206 struct inode_record *rec)
2208 struct btrfs_inode_item *ei;
2209 struct btrfs_key key;
2212 key.objectid = rec->ino;
2213 key.type = BTRFS_INODE_ITEM_KEY;
2216 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2223 /* Since ret == 0, no need to check anything */
2224 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2225 struct btrfs_inode_item);
2226 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2227 btrfs_mark_buffer_dirty(path->nodes[0]);
2228 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2229 printf("reset nbytes for ino %llu root %llu\n",
2230 rec->ino, root->root_key.objectid);
2232 btrfs_release_path(path);
2236 static int add_missing_dir_index(struct btrfs_root *root,
2237 struct cache_tree *inode_cache,
2238 struct inode_record *rec,
2239 struct inode_backref *backref)
2241 struct btrfs_path *path;
2242 struct btrfs_trans_handle *trans;
2243 struct btrfs_dir_item *dir_item;
2244 struct extent_buffer *leaf;
2245 struct btrfs_key key;
2246 struct btrfs_disk_key disk_key;
2247 struct inode_record *dir_rec;
2248 unsigned long name_ptr;
2249 u32 data_size = sizeof(*dir_item) + backref->namelen;
2252 path = btrfs_alloc_path();
2256 trans = btrfs_start_transaction(root, 1);
2257 if (IS_ERR(trans)) {
2258 btrfs_free_path(path);
2259 return PTR_ERR(trans);
2262 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2263 (unsigned long long)rec->ino);
2264 key.objectid = backref->dir;
2265 key.type = BTRFS_DIR_INDEX_KEY;
2266 key.offset = backref->index;
2268 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2271 leaf = path->nodes[0];
2272 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2274 disk_key.objectid = cpu_to_le64(rec->ino);
2275 disk_key.type = BTRFS_INODE_ITEM_KEY;
2276 disk_key.offset = 0;
2278 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2279 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2280 btrfs_set_dir_data_len(leaf, dir_item, 0);
2281 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2282 name_ptr = (unsigned long)(dir_item + 1);
2283 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2284 btrfs_mark_buffer_dirty(leaf);
2285 btrfs_free_path(path);
2286 btrfs_commit_transaction(trans, root);
2288 backref->found_dir_index = 1;
2289 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2290 BUG_ON(IS_ERR(dir_rec));
2293 dir_rec->found_size += backref->namelen;
2294 if (dir_rec->found_size == dir_rec->isize &&
2295 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2296 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2297 if (dir_rec->found_size != dir_rec->isize)
2298 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2303 static int delete_dir_index(struct btrfs_root *root,
2304 struct cache_tree *inode_cache,
2305 struct inode_record *rec,
2306 struct inode_backref *backref)
2308 struct btrfs_trans_handle *trans;
2309 struct btrfs_dir_item *di;
2310 struct btrfs_path *path;
2313 path = btrfs_alloc_path();
2317 trans = btrfs_start_transaction(root, 1);
2318 if (IS_ERR(trans)) {
2319 btrfs_free_path(path);
2320 return PTR_ERR(trans);
2324 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2325 (unsigned long long)backref->dir,
2326 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2327 (unsigned long long)root->objectid);
2329 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2330 backref->name, backref->namelen,
2331 backref->index, -1);
2334 btrfs_free_path(path);
2335 btrfs_commit_transaction(trans, root);
2342 ret = btrfs_del_item(trans, root, path);
2344 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2346 btrfs_free_path(path);
2347 btrfs_commit_transaction(trans, root);
2351 static int create_inode_item(struct btrfs_root *root,
2352 struct inode_record *rec,
2353 struct inode_backref *backref, int root_dir)
2355 struct btrfs_trans_handle *trans;
2356 struct btrfs_inode_item inode_item;
2357 time_t now = time(NULL);
2360 trans = btrfs_start_transaction(root, 1);
2361 if (IS_ERR(trans)) {
2362 ret = PTR_ERR(trans);
2366 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2367 "be incomplete, please check permissions and content after "
2368 "the fsck completes.\n", (unsigned long long)root->objectid,
2369 (unsigned long long)rec->ino);
2371 memset(&inode_item, 0, sizeof(inode_item));
2372 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2374 btrfs_set_stack_inode_nlink(&inode_item, 1);
2376 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2377 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2378 if (rec->found_dir_item) {
2379 if (rec->found_file_extent)
2380 fprintf(stderr, "root %llu inode %llu has both a dir "
2381 "item and extents, unsure if it is a dir or a "
2382 "regular file so setting it as a directory\n",
2383 (unsigned long long)root->objectid,
2384 (unsigned long long)rec->ino);
2385 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2386 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2387 } else if (!rec->found_dir_item) {
2388 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2389 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2391 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2392 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2393 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2394 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2395 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2396 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2397 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2398 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2400 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2402 btrfs_commit_transaction(trans, root);
2406 static int repair_inode_backrefs(struct btrfs_root *root,
2407 struct inode_record *rec,
2408 struct cache_tree *inode_cache,
2411 struct inode_backref *tmp, *backref;
2412 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2416 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2417 if (!delete && rec->ino == root_dirid) {
2418 if (!rec->found_inode_item) {
2419 ret = create_inode_item(root, rec, backref, 1);
2426 /* Index 0 for root dir's are special, don't mess with it */
2427 if (rec->ino == root_dirid && backref->index == 0)
2431 ((backref->found_dir_index && !backref->found_inode_ref) ||
2432 (backref->found_dir_index && backref->found_inode_ref &&
2433 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2434 ret = delete_dir_index(root, inode_cache, rec, backref);
2438 list_del(&backref->list);
2442 if (!delete && !backref->found_dir_index &&
2443 backref->found_dir_item && backref->found_inode_ref) {
2444 ret = add_missing_dir_index(root, inode_cache, rec,
2449 if (backref->found_dir_item &&
2450 backref->found_dir_index &&
2451 backref->found_dir_index) {
2452 if (!backref->errors &&
2453 backref->found_inode_ref) {
2454 list_del(&backref->list);
2460 if (!delete && (!backref->found_dir_index &&
2461 !backref->found_dir_item &&
2462 backref->found_inode_ref)) {
2463 struct btrfs_trans_handle *trans;
2464 struct btrfs_key location;
2466 ret = check_dir_conflict(root, backref->name,
2472 * let nlink fixing routine to handle it,
2473 * which can do it better.
2478 location.objectid = rec->ino;
2479 location.type = BTRFS_INODE_ITEM_KEY;
2480 location.offset = 0;
2482 trans = btrfs_start_transaction(root, 1);
2483 if (IS_ERR(trans)) {
2484 ret = PTR_ERR(trans);
2487 fprintf(stderr, "adding missing dir index/item pair "
2489 (unsigned long long)rec->ino);
2490 ret = btrfs_insert_dir_item(trans, root, backref->name,
2492 backref->dir, &location,
2493 imode_to_type(rec->imode),
2496 btrfs_commit_transaction(trans, root);
2500 if (!delete && (backref->found_inode_ref &&
2501 backref->found_dir_index &&
2502 backref->found_dir_item &&
2503 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2504 !rec->found_inode_item)) {
2505 ret = create_inode_item(root, rec, backref, 0);
2512 return ret ? ret : repaired;
2516 * To determine the file type for nlink/inode_item repair
2518 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2519 * Return -ENOENT if file type is not found.
2521 static int find_file_type(struct inode_record *rec, u8 *type)
2523 struct inode_backref *backref;
2525 /* For inode item recovered case */
2526 if (rec->found_inode_item) {
2527 *type = imode_to_type(rec->imode);
2531 list_for_each_entry(backref, &rec->backrefs, list) {
2532 if (backref->found_dir_index || backref->found_dir_item) {
2533 *type = backref->filetype;
2541 * To determine the file name for nlink repair
2543 * Return 0 if file name is found, set name and namelen.
2544 * Return -ENOENT if file name is not found.
2546 static int find_file_name(struct inode_record *rec,
2547 char *name, int *namelen)
2549 struct inode_backref *backref;
2551 list_for_each_entry(backref, &rec->backrefs, list) {
2552 if (backref->found_dir_index || backref->found_dir_item ||
2553 backref->found_inode_ref) {
2554 memcpy(name, backref->name, backref->namelen);
2555 *namelen = backref->namelen;
2562 /* Reset the nlink of the inode to the correct one */
2563 static int reset_nlink(struct btrfs_trans_handle *trans,
2564 struct btrfs_root *root,
2565 struct btrfs_path *path,
2566 struct inode_record *rec)
2568 struct inode_backref *backref;
2569 struct inode_backref *tmp;
2570 struct btrfs_key key;
2571 struct btrfs_inode_item *inode_item;
2574 /* We don't believe this either, reset it and iterate backref */
2575 rec->found_link = 0;
2577 /* Remove all backref including the valid ones */
2578 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2579 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2580 backref->index, backref->name,
2581 backref->namelen, 0);
2585 /* remove invalid backref, so it won't be added back */
2586 if (!(backref->found_dir_index &&
2587 backref->found_dir_item &&
2588 backref->found_inode_ref)) {
2589 list_del(&backref->list);
2596 /* Set nlink to 0 */
2597 key.objectid = rec->ino;
2598 key.type = BTRFS_INODE_ITEM_KEY;
2600 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2607 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2608 struct btrfs_inode_item);
2609 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2610 btrfs_mark_buffer_dirty(path->nodes[0]);
2611 btrfs_release_path(path);
2614 * Add back valid inode_ref/dir_item/dir_index,
2615 * add_link() will handle the nlink inc, so new nlink must be correct
2617 list_for_each_entry(backref, &rec->backrefs, list) {
2618 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2619 backref->name, backref->namelen,
2620 backref->filetype, &backref->index, 1);
2625 btrfs_release_path(path);
2629 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2630 struct btrfs_root *root,
2631 struct btrfs_path *path,
2632 struct inode_record *rec)
2634 char *dir_name = "lost+found";
2635 char namebuf[BTRFS_NAME_LEN] = {0};
2640 int name_recovered = 0;
2641 int type_recovered = 0;
2645 * Get file name and type first before these invalid inode ref
2646 * are deleted by remove_all_invalid_backref()
2648 name_recovered = !find_file_name(rec, namebuf, &namelen);
2649 type_recovered = !find_file_type(rec, &type);
2651 if (!name_recovered) {
2652 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2653 rec->ino, rec->ino);
2654 namelen = count_digits(rec->ino);
2655 sprintf(namebuf, "%llu", rec->ino);
2658 if (!type_recovered) {
2659 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2661 type = BTRFS_FT_REG_FILE;
2665 ret = reset_nlink(trans, root, path, rec);
2668 "Failed to reset nlink for inode %llu: %s\n",
2669 rec->ino, strerror(-ret));
2673 if (rec->found_link == 0) {
2674 lost_found_ino = root->highest_inode;
2675 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2680 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2681 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2684 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2685 dir_name, strerror(-ret));
2688 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2689 namebuf, namelen, type, NULL, 1);
2691 * Add ".INO" suffix several times to handle case where
2692 * "FILENAME.INO" is already taken by another file.
2694 while (ret == -EEXIST) {
2696 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2698 if (namelen + count_digits(rec->ino) + 1 >
2703 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2705 namelen += count_digits(rec->ino) + 1;
2706 ret = btrfs_add_link(trans, root, rec->ino,
2707 lost_found_ino, namebuf,
2708 namelen, type, NULL, 1);
2712 "Failed to link the inode %llu to %s dir: %s\n",
2713 rec->ino, dir_name, strerror(-ret));
2717 * Just increase the found_link, don't actually add the
2718 * backref. This will make things easier and this inode
2719 * record will be freed after the repair is done.
2720 * So fsck will not report problem about this inode.
2723 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2724 namelen, namebuf, dir_name);
2726 printf("Fixed the nlink of inode %llu\n", rec->ino);
2729 * Clear the flag anyway, or we will loop forever for the same inode
2730 * as it will not be removed from the bad inode list and the dead loop
2733 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2734 btrfs_release_path(path);
2739 * Check if there is any normal(reg or prealloc) file extent for given
2741 * This is used to determine the file type when neither its dir_index/item or
2742 * inode_item exists.
2744 * This will *NOT* report error, if any error happens, just consider it does
2745 * not have any normal file extent.
2747 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2749 struct btrfs_path *path;
2750 struct btrfs_key key;
2751 struct btrfs_key found_key;
2752 struct btrfs_file_extent_item *fi;
2756 path = btrfs_alloc_path();
2760 key.type = BTRFS_EXTENT_DATA_KEY;
2763 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2768 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2769 ret = btrfs_next_leaf(root, path);
2776 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2778 if (found_key.objectid != ino ||
2779 found_key.type != BTRFS_EXTENT_DATA_KEY)
2781 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2782 struct btrfs_file_extent_item);
2783 type = btrfs_file_extent_type(path->nodes[0], fi);
2784 if (type != BTRFS_FILE_EXTENT_INLINE) {
2790 btrfs_free_path(path);
2794 static u32 btrfs_type_to_imode(u8 type)
2796 static u32 imode_by_btrfs_type[] = {
2797 [BTRFS_FT_REG_FILE] = S_IFREG,
2798 [BTRFS_FT_DIR] = S_IFDIR,
2799 [BTRFS_FT_CHRDEV] = S_IFCHR,
2800 [BTRFS_FT_BLKDEV] = S_IFBLK,
2801 [BTRFS_FT_FIFO] = S_IFIFO,
2802 [BTRFS_FT_SOCK] = S_IFSOCK,
2803 [BTRFS_FT_SYMLINK] = S_IFLNK,
2806 return imode_by_btrfs_type[(type)];
2809 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2810 struct btrfs_root *root,
2811 struct btrfs_path *path,
2812 struct inode_record *rec)
2816 int type_recovered = 0;
2819 printf("Trying to rebuild inode:%llu\n", rec->ino);
2821 type_recovered = !find_file_type(rec, &filetype);
2824 * Try to determine inode type if type not found.
2826 * For found regular file extent, it must be FILE.
2827 * For found dir_item/index, it must be DIR.
2829 * For undetermined one, use FILE as fallback.
2832 * 1. If found backref(inode_index/item is already handled) to it,
2834 * Need new inode-inode ref structure to allow search for that.
2836 if (!type_recovered) {
2837 if (rec->found_file_extent &&
2838 find_normal_file_extent(root, rec->ino)) {
2840 filetype = BTRFS_FT_REG_FILE;
2841 } else if (rec->found_dir_item) {
2843 filetype = BTRFS_FT_DIR;
2844 } else if (!list_empty(&rec->orphan_extents)) {
2846 filetype = BTRFS_FT_REG_FILE;
2848 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2851 filetype = BTRFS_FT_REG_FILE;
2855 ret = btrfs_new_inode(trans, root, rec->ino,
2856 mode | btrfs_type_to_imode(filetype));
2861 * Here inode rebuild is done, we only rebuild the inode item,
2862 * don't repair the nlink(like move to lost+found).
2863 * That is the job of nlink repair.
2865 * We just fill the record and return
2867 rec->found_dir_item = 1;
2868 rec->imode = mode | btrfs_type_to_imode(filetype);
2870 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2871 /* Ensure the inode_nlinks repair function will be called */
2872 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2877 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2878 struct btrfs_root *root,
2879 struct btrfs_path *path,
2880 struct inode_record *rec)
2882 struct orphan_data_extent *orphan;
2883 struct orphan_data_extent *tmp;
2886 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2888 * Check for conflicting file extents
2890 * Here we don't know whether the extents is compressed or not,
2891 * so we can only assume it not compressed nor data offset,
2892 * and use its disk_len as extent length.
2894 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2895 orphan->offset, orphan->disk_len, 0);
2896 btrfs_release_path(path);
2901 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2902 orphan->disk_bytenr, orphan->disk_len);
2903 ret = btrfs_free_extent(trans,
2904 root->fs_info->extent_root,
2905 orphan->disk_bytenr, orphan->disk_len,
2906 0, root->objectid, orphan->objectid,
2911 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2912 orphan->offset, orphan->disk_bytenr,
2913 orphan->disk_len, orphan->disk_len);
2917 /* Update file size info */
2918 rec->found_size += orphan->disk_len;
2919 if (rec->found_size == rec->nbytes)
2920 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2922 /* Update the file extent hole info too */
2923 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2927 if (RB_EMPTY_ROOT(&rec->holes))
2928 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2930 list_del(&orphan->list);
2933 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2938 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2939 struct btrfs_root *root,
2940 struct btrfs_path *path,
2941 struct inode_record *rec)
2943 struct rb_node *node;
2944 struct file_extent_hole *hole;
2948 node = rb_first(&rec->holes);
2952 hole = rb_entry(node, struct file_extent_hole, node);
2953 ret = btrfs_punch_hole(trans, root, rec->ino,
2954 hole->start, hole->len);
2957 ret = del_file_extent_hole(&rec->holes, hole->start,
2961 if (RB_EMPTY_ROOT(&rec->holes))
2962 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2963 node = rb_first(&rec->holes);
2965 /* special case for a file losing all its file extent */
2967 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2968 round_up(rec->isize, root->sectorsize));
2972 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2973 rec->ino, root->objectid);
2978 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2980 struct btrfs_trans_handle *trans;
2981 struct btrfs_path *path;
2984 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2985 I_ERR_NO_ORPHAN_ITEM |
2986 I_ERR_LINK_COUNT_WRONG |
2987 I_ERR_NO_INODE_ITEM |
2988 I_ERR_FILE_EXTENT_ORPHAN |
2989 I_ERR_FILE_EXTENT_DISCOUNT|
2990 I_ERR_FILE_NBYTES_WRONG)))
2993 path = btrfs_alloc_path();
2998 * For nlink repair, it may create a dir and add link, so
2999 * 2 for parent(256)'s dir_index and dir_item
3000 * 2 for lost+found dir's inode_item and inode_ref
3001 * 1 for the new inode_ref of the file
3002 * 2 for lost+found dir's dir_index and dir_item for the file
3004 trans = btrfs_start_transaction(root, 7);
3005 if (IS_ERR(trans)) {
3006 btrfs_free_path(path);
3007 return PTR_ERR(trans);
3010 if (rec->errors & I_ERR_NO_INODE_ITEM)
3011 ret = repair_inode_no_item(trans, root, path, rec);
3012 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3013 ret = repair_inode_orphan_extent(trans, root, path, rec);
3014 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3015 ret = repair_inode_discount_extent(trans, root, path, rec);
3016 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3017 ret = repair_inode_isize(trans, root, path, rec);
3018 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3019 ret = repair_inode_orphan_item(trans, root, path, rec);
3020 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3021 ret = repair_inode_nlinks(trans, root, path, rec);
3022 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3023 ret = repair_inode_nbytes(trans, root, path, rec);
3024 btrfs_commit_transaction(trans, root);
3025 btrfs_free_path(path);
3029 static int check_inode_recs(struct btrfs_root *root,
3030 struct cache_tree *inode_cache)
3032 struct cache_extent *cache;
3033 struct ptr_node *node;
3034 struct inode_record *rec;
3035 struct inode_backref *backref;
3040 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3042 if (btrfs_root_refs(&root->root_item) == 0) {
3043 if (!cache_tree_empty(inode_cache))
3044 fprintf(stderr, "warning line %d\n", __LINE__);
3049 * We need to record the highest inode number for later 'lost+found'
3051 * We must select an ino not used/referred by any existing inode, or
3052 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3053 * this may cause 'lost+found' dir has wrong nlinks.
3055 cache = last_cache_extent(inode_cache);
3057 node = container_of(cache, struct ptr_node, cache);
3059 if (rec->ino > root->highest_inode)
3060 root->highest_inode = rec->ino;
3064 * We need to repair backrefs first because we could change some of the
3065 * errors in the inode recs.
3067 * We also need to go through and delete invalid backrefs first and then
3068 * add the correct ones second. We do this because we may get EEXIST
3069 * when adding back the correct index because we hadn't yet deleted the
3072 * For example, if we were missing a dir index then the directories
3073 * isize would be wrong, so if we fixed the isize to what we thought it
3074 * would be and then fixed the backref we'd still have a invalid fs, so
3075 * we need to add back the dir index and then check to see if the isize
3080 if (stage == 3 && !err)
3083 cache = search_cache_extent(inode_cache, 0);
3084 while (repair && cache) {
3085 node = container_of(cache, struct ptr_node, cache);
3087 cache = next_cache_extent(cache);
3089 /* Need to free everything up and rescan */
3091 remove_cache_extent(inode_cache, &node->cache);
3093 free_inode_rec(rec);
3097 if (list_empty(&rec->backrefs))
3100 ret = repair_inode_backrefs(root, rec, inode_cache,
3114 rec = get_inode_rec(inode_cache, root_dirid, 0);
3115 BUG_ON(IS_ERR(rec));
3117 ret = check_root_dir(rec);
3119 fprintf(stderr, "root %llu root dir %llu error\n",
3120 (unsigned long long)root->root_key.objectid,
3121 (unsigned long long)root_dirid);
3122 print_inode_error(root, rec);
3127 struct btrfs_trans_handle *trans;
3129 trans = btrfs_start_transaction(root, 1);
3130 if (IS_ERR(trans)) {
3131 err = PTR_ERR(trans);
3136 "root %llu missing its root dir, recreating\n",
3137 (unsigned long long)root->objectid);
3139 ret = btrfs_make_root_dir(trans, root, root_dirid);
3142 btrfs_commit_transaction(trans, root);
3146 fprintf(stderr, "root %llu root dir %llu not found\n",
3147 (unsigned long long)root->root_key.objectid,
3148 (unsigned long long)root_dirid);
3152 cache = search_cache_extent(inode_cache, 0);
3155 node = container_of(cache, struct ptr_node, cache);
3157 remove_cache_extent(inode_cache, &node->cache);
3159 if (rec->ino == root_dirid ||
3160 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3161 free_inode_rec(rec);
3165 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3166 ret = check_orphan_item(root, rec->ino);
3168 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3169 if (can_free_inode_rec(rec)) {
3170 free_inode_rec(rec);
3175 if (!rec->found_inode_item)
3176 rec->errors |= I_ERR_NO_INODE_ITEM;
3177 if (rec->found_link != rec->nlink)
3178 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3180 ret = try_repair_inode(root, rec);
3181 if (ret == 0 && can_free_inode_rec(rec)) {
3182 free_inode_rec(rec);
3188 if (!(repair && ret == 0))
3190 print_inode_error(root, rec);
3191 list_for_each_entry(backref, &rec->backrefs, list) {
3192 if (!backref->found_dir_item)
3193 backref->errors |= REF_ERR_NO_DIR_ITEM;
3194 if (!backref->found_dir_index)
3195 backref->errors |= REF_ERR_NO_DIR_INDEX;
3196 if (!backref->found_inode_ref)
3197 backref->errors |= REF_ERR_NO_INODE_REF;
3198 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3199 " namelen %u name %s filetype %d errors %x",
3200 (unsigned long long)backref->dir,
3201 (unsigned long long)backref->index,
3202 backref->namelen, backref->name,
3203 backref->filetype, backref->errors);
3204 print_ref_error(backref->errors);
3206 free_inode_rec(rec);
3208 return (error > 0) ? -1 : 0;
3211 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3214 struct cache_extent *cache;
3215 struct root_record *rec = NULL;
3218 cache = lookup_cache_extent(root_cache, objectid, 1);
3220 rec = container_of(cache, struct root_record, cache);
3222 rec = calloc(1, sizeof(*rec));
3224 return ERR_PTR(-ENOMEM);
3225 rec->objectid = objectid;
3226 INIT_LIST_HEAD(&rec->backrefs);
3227 rec->cache.start = objectid;
3228 rec->cache.size = 1;
3230 ret = insert_cache_extent(root_cache, &rec->cache);
3232 return ERR_PTR(-EEXIST);
3237 static struct root_backref *get_root_backref(struct root_record *rec,
3238 u64 ref_root, u64 dir, u64 index,
3239 const char *name, int namelen)
3241 struct root_backref *backref;
3243 list_for_each_entry(backref, &rec->backrefs, list) {
3244 if (backref->ref_root != ref_root || backref->dir != dir ||
3245 backref->namelen != namelen)
3247 if (memcmp(name, backref->name, namelen))
3252 backref = calloc(1, sizeof(*backref) + namelen + 1);
3255 backref->ref_root = ref_root;
3257 backref->index = index;
3258 backref->namelen = namelen;
3259 memcpy(backref->name, name, namelen);
3260 backref->name[namelen] = '\0';
3261 list_add_tail(&backref->list, &rec->backrefs);
3265 static void free_root_record(struct cache_extent *cache)
3267 struct root_record *rec;
3268 struct root_backref *backref;
3270 rec = container_of(cache, struct root_record, cache);
3271 while (!list_empty(&rec->backrefs)) {
3272 backref = to_root_backref(rec->backrefs.next);
3273 list_del(&backref->list);
3280 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3282 static int add_root_backref(struct cache_tree *root_cache,
3283 u64 root_id, u64 ref_root, u64 dir, u64 index,
3284 const char *name, int namelen,
3285 int item_type, int errors)
3287 struct root_record *rec;
3288 struct root_backref *backref;
3290 rec = get_root_rec(root_cache, root_id);
3291 BUG_ON(IS_ERR(rec));
3292 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3295 backref->errors |= errors;
3297 if (item_type != BTRFS_DIR_ITEM_KEY) {
3298 if (backref->found_dir_index || backref->found_back_ref ||
3299 backref->found_forward_ref) {
3300 if (backref->index != index)
3301 backref->errors |= REF_ERR_INDEX_UNMATCH;
3303 backref->index = index;
3307 if (item_type == BTRFS_DIR_ITEM_KEY) {
3308 if (backref->found_forward_ref)
3310 backref->found_dir_item = 1;
3311 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3312 backref->found_dir_index = 1;
3313 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3314 if (backref->found_forward_ref)
3315 backref->errors |= REF_ERR_DUP_ROOT_REF;
3316 else if (backref->found_dir_item)
3318 backref->found_forward_ref = 1;
3319 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3320 if (backref->found_back_ref)
3321 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3322 backref->found_back_ref = 1;
3327 if (backref->found_forward_ref && backref->found_dir_item)
3328 backref->reachable = 1;
3332 static int merge_root_recs(struct btrfs_root *root,
3333 struct cache_tree *src_cache,
3334 struct cache_tree *dst_cache)
3336 struct cache_extent *cache;
3337 struct ptr_node *node;
3338 struct inode_record *rec;
3339 struct inode_backref *backref;
3342 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3343 free_inode_recs_tree(src_cache);
3348 cache = search_cache_extent(src_cache, 0);
3351 node = container_of(cache, struct ptr_node, cache);
3353 remove_cache_extent(src_cache, &node->cache);
3356 ret = is_child_root(root, root->objectid, rec->ino);
3362 list_for_each_entry(backref, &rec->backrefs, list) {
3363 BUG_ON(backref->found_inode_ref);
3364 if (backref->found_dir_item)
3365 add_root_backref(dst_cache, rec->ino,
3366 root->root_key.objectid, backref->dir,
3367 backref->index, backref->name,
3368 backref->namelen, BTRFS_DIR_ITEM_KEY,
3370 if (backref->found_dir_index)
3371 add_root_backref(dst_cache, rec->ino,
3372 root->root_key.objectid, backref->dir,
3373 backref->index, backref->name,
3374 backref->namelen, BTRFS_DIR_INDEX_KEY,
3378 free_inode_rec(rec);
3385 static int check_root_refs(struct btrfs_root *root,
3386 struct cache_tree *root_cache)
3388 struct root_record *rec;
3389 struct root_record *ref_root;
3390 struct root_backref *backref;
3391 struct cache_extent *cache;
3397 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3398 BUG_ON(IS_ERR(rec));
3401 /* fixme: this can not detect circular references */
3404 cache = search_cache_extent(root_cache, 0);
3408 rec = container_of(cache, struct root_record, cache);
3409 cache = next_cache_extent(cache);
3411 if (rec->found_ref == 0)
3414 list_for_each_entry(backref, &rec->backrefs, list) {
3415 if (!backref->reachable)
3418 ref_root = get_root_rec(root_cache,
3420 BUG_ON(IS_ERR(ref_root));
3421 if (ref_root->found_ref > 0)
3424 backref->reachable = 0;
3426 if (rec->found_ref == 0)
3432 cache = search_cache_extent(root_cache, 0);
3436 rec = container_of(cache, struct root_record, cache);
3437 cache = next_cache_extent(cache);
3439 if (rec->found_ref == 0 &&
3440 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3441 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3442 ret = check_orphan_item(root->fs_info->tree_root,
3448 * If we don't have a root item then we likely just have
3449 * a dir item in a snapshot for this root but no actual
3450 * ref key or anything so it's meaningless.
3452 if (!rec->found_root_item)
3455 fprintf(stderr, "fs tree %llu not referenced\n",
3456 (unsigned long long)rec->objectid);
3460 if (rec->found_ref > 0 && !rec->found_root_item)
3462 list_for_each_entry(backref, &rec->backrefs, list) {
3463 if (!backref->found_dir_item)
3464 backref->errors |= REF_ERR_NO_DIR_ITEM;
3465 if (!backref->found_dir_index)
3466 backref->errors |= REF_ERR_NO_DIR_INDEX;
3467 if (!backref->found_back_ref)
3468 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3469 if (!backref->found_forward_ref)
3470 backref->errors |= REF_ERR_NO_ROOT_REF;
3471 if (backref->reachable && backref->errors)
3478 fprintf(stderr, "fs tree %llu refs %u %s\n",
3479 (unsigned long long)rec->objectid, rec->found_ref,
3480 rec->found_root_item ? "" : "not found");
3482 list_for_each_entry(backref, &rec->backrefs, list) {
3483 if (!backref->reachable)
3485 if (!backref->errors && rec->found_root_item)
3487 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3488 " index %llu namelen %u name %s errors %x\n",
3489 (unsigned long long)backref->ref_root,
3490 (unsigned long long)backref->dir,
3491 (unsigned long long)backref->index,
3492 backref->namelen, backref->name,
3494 print_ref_error(backref->errors);
3497 return errors > 0 ? 1 : 0;
3500 static int process_root_ref(struct extent_buffer *eb, int slot,
3501 struct btrfs_key *key,
3502 struct cache_tree *root_cache)
3508 struct btrfs_root_ref *ref;
3509 char namebuf[BTRFS_NAME_LEN];
3512 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3514 dirid = btrfs_root_ref_dirid(eb, ref);
3515 index = btrfs_root_ref_sequence(eb, ref);
3516 name_len = btrfs_root_ref_name_len(eb, ref);
3518 if (name_len <= BTRFS_NAME_LEN) {
3522 len = BTRFS_NAME_LEN;
3523 error = REF_ERR_NAME_TOO_LONG;
3525 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3527 if (key->type == BTRFS_ROOT_REF_KEY) {
3528 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3529 index, namebuf, len, key->type, error);
3531 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3532 index, namebuf, len, key->type, error);
3537 static void free_corrupt_block(struct cache_extent *cache)
3539 struct btrfs_corrupt_block *corrupt;
3541 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3545 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3548 * Repair the btree of the given root.
3550 * The fix is to remove the node key in corrupt_blocks cache_tree.
3551 * and rebalance the tree.
3552 * After the fix, the btree should be writeable.
3554 static int repair_btree(struct btrfs_root *root,
3555 struct cache_tree *corrupt_blocks)
3557 struct btrfs_trans_handle *trans;
3558 struct btrfs_path *path;
3559 struct btrfs_corrupt_block *corrupt;
3560 struct cache_extent *cache;
3561 struct btrfs_key key;
3566 if (cache_tree_empty(corrupt_blocks))
3569 path = btrfs_alloc_path();
3573 trans = btrfs_start_transaction(root, 1);
3574 if (IS_ERR(trans)) {
3575 ret = PTR_ERR(trans);
3576 fprintf(stderr, "Error starting transaction: %s\n",
3580 cache = first_cache_extent(corrupt_blocks);
3582 corrupt = container_of(cache, struct btrfs_corrupt_block,
3584 level = corrupt->level;
3585 path->lowest_level = level;
3586 key.objectid = corrupt->key.objectid;
3587 key.type = corrupt->key.type;
3588 key.offset = corrupt->key.offset;
3591 * Here we don't want to do any tree balance, since it may
3592 * cause a balance with corrupted brother leaf/node,
3593 * so ins_len set to 0 here.
3594 * Balance will be done after all corrupt node/leaf is deleted.
3596 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3599 offset = btrfs_node_blockptr(path->nodes[level],
3600 path->slots[level]);
3602 /* Remove the ptr */
3603 ret = btrfs_del_ptr(trans, root, path, level,
3604 path->slots[level]);
3608 * Remove the corresponding extent
3609 * return value is not concerned.
3611 btrfs_release_path(path);
3612 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3613 0, root->root_key.objectid,
3615 cache = next_cache_extent(cache);
3618 /* Balance the btree using btrfs_search_slot() */
3619 cache = first_cache_extent(corrupt_blocks);
3621 corrupt = container_of(cache, struct btrfs_corrupt_block,
3623 memcpy(&key, &corrupt->key, sizeof(key));
3624 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3627 /* return will always >0 since it won't find the item */
3629 btrfs_release_path(path);
3630 cache = next_cache_extent(cache);
3633 btrfs_commit_transaction(trans, root);
3635 btrfs_free_path(path);
3639 static int check_fs_root(struct btrfs_root *root,
3640 struct cache_tree *root_cache,
3641 struct walk_control *wc)
3647 struct btrfs_path path;
3648 struct shared_node root_node;
3649 struct root_record *rec;
3650 struct btrfs_root_item *root_item = &root->root_item;
3651 struct cache_tree corrupt_blocks;
3652 struct orphan_data_extent *orphan;
3653 struct orphan_data_extent *tmp;
3654 enum btrfs_tree_block_status status;
3655 struct node_refs nrefs;
3658 * Reuse the corrupt_block cache tree to record corrupted tree block
3660 * Unlike the usage in extent tree check, here we do it in a per
3661 * fs/subvol tree base.
3663 cache_tree_init(&corrupt_blocks);
3664 root->fs_info->corrupt_blocks = &corrupt_blocks;
3666 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3667 rec = get_root_rec(root_cache, root->root_key.objectid);
3668 BUG_ON(IS_ERR(rec));
3669 if (btrfs_root_refs(root_item) > 0)
3670 rec->found_root_item = 1;
3673 btrfs_init_path(&path);
3674 memset(&root_node, 0, sizeof(root_node));
3675 cache_tree_init(&root_node.root_cache);
3676 cache_tree_init(&root_node.inode_cache);
3677 memset(&nrefs, 0, sizeof(nrefs));
3679 /* Move the orphan extent record to corresponding inode_record */
3680 list_for_each_entry_safe(orphan, tmp,
3681 &root->orphan_data_extents, list) {
3682 struct inode_record *inode;
3684 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3686 BUG_ON(IS_ERR(inode));
3687 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3688 list_move(&orphan->list, &inode->orphan_extents);
3691 level = btrfs_header_level(root->node);
3692 memset(wc->nodes, 0, sizeof(wc->nodes));
3693 wc->nodes[level] = &root_node;
3694 wc->active_node = level;
3695 wc->root_level = level;
3697 /* We may not have checked the root block, lets do that now */
3698 if (btrfs_is_leaf(root->node))
3699 status = btrfs_check_leaf(root, NULL, root->node);
3701 status = btrfs_check_node(root, NULL, root->node);
3702 if (status != BTRFS_TREE_BLOCK_CLEAN)
3705 if (btrfs_root_refs(root_item) > 0 ||
3706 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3707 path.nodes[level] = root->node;
3708 extent_buffer_get(root->node);
3709 path.slots[level] = 0;
3711 struct btrfs_key key;
3712 struct btrfs_disk_key found_key;
3714 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3715 level = root_item->drop_level;
3716 path.lowest_level = level;
3717 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3720 btrfs_node_key(path.nodes[level], &found_key,
3722 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3723 sizeof(found_key)));
3727 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3733 wret = walk_up_tree(root, &path, wc, &level);
3740 btrfs_release_path(&path);
3742 if (!cache_tree_empty(&corrupt_blocks)) {
3743 struct cache_extent *cache;
3744 struct btrfs_corrupt_block *corrupt;
3746 printf("The following tree block(s) is corrupted in tree %llu:\n",
3747 root->root_key.objectid);
3748 cache = first_cache_extent(&corrupt_blocks);
3750 corrupt = container_of(cache,
3751 struct btrfs_corrupt_block,
3753 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3754 cache->start, corrupt->level,
3755 corrupt->key.objectid, corrupt->key.type,
3756 corrupt->key.offset);
3757 cache = next_cache_extent(cache);
3760 printf("Try to repair the btree for root %llu\n",
3761 root->root_key.objectid);
3762 ret = repair_btree(root, &corrupt_blocks);
3764 fprintf(stderr, "Failed to repair btree: %s\n",
3767 printf("Btree for root %llu is fixed\n",
3768 root->root_key.objectid);
3772 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3776 if (root_node.current) {
3777 root_node.current->checked = 1;
3778 maybe_free_inode_rec(&root_node.inode_cache,
3782 err = check_inode_recs(root, &root_node.inode_cache);
3786 free_corrupt_blocks_tree(&corrupt_blocks);
3787 root->fs_info->corrupt_blocks = NULL;
3788 free_orphan_data_extents(&root->orphan_data_extents);
3792 static int fs_root_objectid(u64 objectid)
3794 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3795 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3797 return is_fstree(objectid);
3800 static int check_fs_roots(struct btrfs_root *root,
3801 struct cache_tree *root_cache)
3803 struct btrfs_path path;
3804 struct btrfs_key key;
3805 struct walk_control wc;
3806 struct extent_buffer *leaf, *tree_node;
3807 struct btrfs_root *tmp_root;
3808 struct btrfs_root *tree_root = root->fs_info->tree_root;
3812 if (ctx.progress_enabled) {
3813 ctx.tp = TASK_FS_ROOTS;
3814 task_start(ctx.info);
3818 * Just in case we made any changes to the extent tree that weren't
3819 * reflected into the free space cache yet.
3822 reset_cached_block_groups(root->fs_info);
3823 memset(&wc, 0, sizeof(wc));
3824 cache_tree_init(&wc.shared);
3825 btrfs_init_path(&path);
3830 key.type = BTRFS_ROOT_ITEM_KEY;
3831 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3836 tree_node = tree_root->node;
3838 if (tree_node != tree_root->node) {
3839 free_root_recs_tree(root_cache);
3840 btrfs_release_path(&path);
3843 leaf = path.nodes[0];
3844 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3845 ret = btrfs_next_leaf(tree_root, &path);
3851 leaf = path.nodes[0];
3853 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3854 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3855 fs_root_objectid(key.objectid)) {
3856 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3857 tmp_root = btrfs_read_fs_root_no_cache(
3858 root->fs_info, &key);
3860 key.offset = (u64)-1;
3861 tmp_root = btrfs_read_fs_root(
3862 root->fs_info, &key);
3864 if (IS_ERR(tmp_root)) {
3868 ret = check_fs_root(tmp_root, root_cache, &wc);
3869 if (ret == -EAGAIN) {
3870 free_root_recs_tree(root_cache);
3871 btrfs_release_path(&path);
3876 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3877 btrfs_free_fs_root(tmp_root);
3878 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3879 key.type == BTRFS_ROOT_BACKREF_KEY) {
3880 process_root_ref(leaf, path.slots[0], &key,
3887 btrfs_release_path(&path);
3889 free_extent_cache_tree(&wc.shared);
3890 if (!cache_tree_empty(&wc.shared))
3891 fprintf(stderr, "warning line %d\n", __LINE__);
3893 task_stop(ctx.info);
3898 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3901 struct extent_backref *back;
3902 struct tree_backref *tback;
3903 struct data_backref *dback;
3907 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3908 back = rb_node_to_extent_backref(n);
3909 if (!back->found_extent_tree) {
3913 if (back->is_data) {
3914 dback = to_data_backref(back);
3915 fprintf(stderr, "Backref %llu %s %llu"
3916 " owner %llu offset %llu num_refs %lu"
3917 " not found in extent tree\n",
3918 (unsigned long long)rec->start,
3919 back->full_backref ?
3921 back->full_backref ?
3922 (unsigned long long)dback->parent:
3923 (unsigned long long)dback->root,
3924 (unsigned long long)dback->owner,
3925 (unsigned long long)dback->offset,
3926 (unsigned long)dback->num_refs);
3928 tback = to_tree_backref(back);
3929 fprintf(stderr, "Backref %llu parent %llu"
3930 " root %llu not found in extent tree\n",
3931 (unsigned long long)rec->start,
3932 (unsigned long long)tback->parent,
3933 (unsigned long long)tback->root);
3936 if (!back->is_data && !back->found_ref) {
3940 tback = to_tree_backref(back);
3941 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3942 (unsigned long long)rec->start,
3943 back->full_backref ? "parent" : "root",
3944 back->full_backref ?
3945 (unsigned long long)tback->parent :
3946 (unsigned long long)tback->root, back);
3948 if (back->is_data) {
3949 dback = to_data_backref(back);
3950 if (dback->found_ref != dback->num_refs) {
3954 fprintf(stderr, "Incorrect local backref count"
3955 " on %llu %s %llu owner %llu"
3956 " offset %llu found %u wanted %u back %p\n",
3957 (unsigned long long)rec->start,
3958 back->full_backref ?
3960 back->full_backref ?
3961 (unsigned long long)dback->parent:
3962 (unsigned long long)dback->root,
3963 (unsigned long long)dback->owner,
3964 (unsigned long long)dback->offset,
3965 dback->found_ref, dback->num_refs, back);
3967 if (dback->disk_bytenr != rec->start) {
3971 fprintf(stderr, "Backref disk bytenr does not"
3972 " match extent record, bytenr=%llu, "
3973 "ref bytenr=%llu\n",
3974 (unsigned long long)rec->start,
3975 (unsigned long long)dback->disk_bytenr);
3978 if (dback->bytes != rec->nr) {
3982 fprintf(stderr, "Backref bytes do not match "
3983 "extent backref, bytenr=%llu, ref "
3984 "bytes=%llu, backref bytes=%llu\n",
3985 (unsigned long long)rec->start,
3986 (unsigned long long)rec->nr,
3987 (unsigned long long)dback->bytes);
3990 if (!back->is_data) {
3993 dback = to_data_backref(back);
3994 found += dback->found_ref;
3997 if (found != rec->refs) {
4001 fprintf(stderr, "Incorrect global backref count "
4002 "on %llu found %llu wanted %llu\n",
4003 (unsigned long long)rec->start,
4004 (unsigned long long)found,
4005 (unsigned long long)rec->refs);
4011 static void __free_one_backref(struct rb_node *node)
4013 struct extent_backref *back = rb_node_to_extent_backref(node);
4018 static void free_all_extent_backrefs(struct extent_record *rec)
4020 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4023 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4024 struct cache_tree *extent_cache)
4026 struct cache_extent *cache;
4027 struct extent_record *rec;
4030 cache = first_cache_extent(extent_cache);
4033 rec = container_of(cache, struct extent_record, cache);
4034 remove_cache_extent(extent_cache, cache);
4035 free_all_extent_backrefs(rec);
4040 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4041 struct extent_record *rec)
4043 if (rec->content_checked && rec->owner_ref_checked &&
4044 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4045 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4046 !rec->bad_full_backref && !rec->crossing_stripes &&
4047 !rec->wrong_chunk_type) {
4048 remove_cache_extent(extent_cache, &rec->cache);
4049 free_all_extent_backrefs(rec);
4050 list_del_init(&rec->list);
4056 static int check_owner_ref(struct btrfs_root *root,
4057 struct extent_record *rec,
4058 struct extent_buffer *buf)
4060 struct extent_backref *node, *tmp;
4061 struct tree_backref *back;
4062 struct btrfs_root *ref_root;
4063 struct btrfs_key key;
4064 struct btrfs_path path;
4065 struct extent_buffer *parent;
4070 rbtree_postorder_for_each_entry_safe(node, tmp,
4071 &rec->backref_tree, node) {
4074 if (!node->found_ref)
4076 if (node->full_backref)
4078 back = to_tree_backref(node);
4079 if (btrfs_header_owner(buf) == back->root)
4082 BUG_ON(rec->is_root);
4084 /* try to find the block by search corresponding fs tree */
4085 key.objectid = btrfs_header_owner(buf);
4086 key.type = BTRFS_ROOT_ITEM_KEY;
4087 key.offset = (u64)-1;
4089 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4090 if (IS_ERR(ref_root))
4093 level = btrfs_header_level(buf);
4095 btrfs_item_key_to_cpu(buf, &key, 0);
4097 btrfs_node_key_to_cpu(buf, &key, 0);
4099 btrfs_init_path(&path);
4100 path.lowest_level = level + 1;
4101 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4105 parent = path.nodes[level + 1];
4106 if (parent && buf->start == btrfs_node_blockptr(parent,
4107 path.slots[level + 1]))
4110 btrfs_release_path(&path);
4111 return found ? 0 : 1;
4114 static int is_extent_tree_record(struct extent_record *rec)
4116 struct extent_backref *ref, *tmp;
4117 struct tree_backref *back;
4120 rbtree_postorder_for_each_entry_safe(ref, tmp,
4121 &rec->backref_tree, node) {
4124 back = to_tree_backref(ref);
4125 if (ref->full_backref)
4127 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4134 static int record_bad_block_io(struct btrfs_fs_info *info,
4135 struct cache_tree *extent_cache,
4138 struct extent_record *rec;
4139 struct cache_extent *cache;
4140 struct btrfs_key key;
4142 cache = lookup_cache_extent(extent_cache, start, len);
4146 rec = container_of(cache, struct extent_record, cache);
4147 if (!is_extent_tree_record(rec))
4150 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4151 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4154 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4155 struct extent_buffer *buf, int slot)
4157 if (btrfs_header_level(buf)) {
4158 struct btrfs_key_ptr ptr1, ptr2;
4160 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4161 sizeof(struct btrfs_key_ptr));
4162 read_extent_buffer(buf, &ptr2,
4163 btrfs_node_key_ptr_offset(slot + 1),
4164 sizeof(struct btrfs_key_ptr));
4165 write_extent_buffer(buf, &ptr1,
4166 btrfs_node_key_ptr_offset(slot + 1),
4167 sizeof(struct btrfs_key_ptr));
4168 write_extent_buffer(buf, &ptr2,
4169 btrfs_node_key_ptr_offset(slot),
4170 sizeof(struct btrfs_key_ptr));
4172 struct btrfs_disk_key key;
4173 btrfs_node_key(buf, &key, 0);
4174 btrfs_fixup_low_keys(root, path, &key,
4175 btrfs_header_level(buf) + 1);
4178 struct btrfs_item *item1, *item2;
4179 struct btrfs_key k1, k2;
4180 char *item1_data, *item2_data;
4181 u32 item1_offset, item2_offset, item1_size, item2_size;
4183 item1 = btrfs_item_nr(slot);
4184 item2 = btrfs_item_nr(slot + 1);
4185 btrfs_item_key_to_cpu(buf, &k1, slot);
4186 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4187 item1_offset = btrfs_item_offset(buf, item1);
4188 item2_offset = btrfs_item_offset(buf, item2);
4189 item1_size = btrfs_item_size(buf, item1);
4190 item2_size = btrfs_item_size(buf, item2);
4192 item1_data = malloc(item1_size);
4195 item2_data = malloc(item2_size);
4201 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4202 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4204 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4205 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4209 btrfs_set_item_offset(buf, item1, item2_offset);
4210 btrfs_set_item_offset(buf, item2, item1_offset);
4211 btrfs_set_item_size(buf, item1, item2_size);
4212 btrfs_set_item_size(buf, item2, item1_size);
4214 path->slots[0] = slot;
4215 btrfs_set_item_key_unsafe(root, path, &k2);
4216 path->slots[0] = slot + 1;
4217 btrfs_set_item_key_unsafe(root, path, &k1);
4222 static int fix_key_order(struct btrfs_trans_handle *trans,
4223 struct btrfs_root *root,
4224 struct btrfs_path *path)
4226 struct extent_buffer *buf;
4227 struct btrfs_key k1, k2;
4229 int level = path->lowest_level;
4232 buf = path->nodes[level];
4233 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4235 btrfs_node_key_to_cpu(buf, &k1, i);
4236 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4238 btrfs_item_key_to_cpu(buf, &k1, i);
4239 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4241 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4243 ret = swap_values(root, path, buf, i);
4246 btrfs_mark_buffer_dirty(buf);
4252 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4253 struct btrfs_root *root,
4254 struct btrfs_path *path,
4255 struct extent_buffer *buf, int slot)
4257 struct btrfs_key key;
4258 int nritems = btrfs_header_nritems(buf);
4260 btrfs_item_key_to_cpu(buf, &key, slot);
4262 /* These are all the keys we can deal with missing. */
4263 if (key.type != BTRFS_DIR_INDEX_KEY &&
4264 key.type != BTRFS_EXTENT_ITEM_KEY &&
4265 key.type != BTRFS_METADATA_ITEM_KEY &&
4266 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4267 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4270 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4271 (unsigned long long)key.objectid, key.type,
4272 (unsigned long long)key.offset, slot, buf->start);
4273 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4274 btrfs_item_nr_offset(slot + 1),
4275 sizeof(struct btrfs_item) *
4276 (nritems - slot - 1));
4277 btrfs_set_header_nritems(buf, nritems - 1);
4279 struct btrfs_disk_key disk_key;
4281 btrfs_item_key(buf, &disk_key, 0);
4282 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4284 btrfs_mark_buffer_dirty(buf);
4288 static int fix_item_offset(struct btrfs_trans_handle *trans,
4289 struct btrfs_root *root,
4290 struct btrfs_path *path)
4292 struct extent_buffer *buf;
4296 /* We should only get this for leaves */
4297 BUG_ON(path->lowest_level);
4298 buf = path->nodes[0];
4300 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4301 unsigned int shift = 0, offset;
4303 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4304 BTRFS_LEAF_DATA_SIZE(root)) {
4305 if (btrfs_item_end_nr(buf, i) >
4306 BTRFS_LEAF_DATA_SIZE(root)) {
4307 ret = delete_bogus_item(trans, root, path,
4311 fprintf(stderr, "item is off the end of the "
4312 "leaf, can't fix\n");
4316 shift = BTRFS_LEAF_DATA_SIZE(root) -
4317 btrfs_item_end_nr(buf, i);
4318 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4319 btrfs_item_offset_nr(buf, i - 1)) {
4320 if (btrfs_item_end_nr(buf, i) >
4321 btrfs_item_offset_nr(buf, i - 1)) {
4322 ret = delete_bogus_item(trans, root, path,
4326 fprintf(stderr, "items overlap, can't fix\n");
4330 shift = btrfs_item_offset_nr(buf, i - 1) -
4331 btrfs_item_end_nr(buf, i);
4336 printf("Shifting item nr %d by %u bytes in block %llu\n",
4337 i, shift, (unsigned long long)buf->start);
4338 offset = btrfs_item_offset_nr(buf, i);
4339 memmove_extent_buffer(buf,
4340 btrfs_leaf_data(buf) + offset + shift,
4341 btrfs_leaf_data(buf) + offset,
4342 btrfs_item_size_nr(buf, i));
4343 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4345 btrfs_mark_buffer_dirty(buf);
4349 * We may have moved things, in which case we want to exit so we don't
4350 * write those changes out. Once we have proper abort functionality in
4351 * progs this can be changed to something nicer.
4358 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4359 * then just return -EIO.
4361 static int try_to_fix_bad_block(struct btrfs_root *root,
4362 struct extent_buffer *buf,
4363 enum btrfs_tree_block_status status)
4365 struct btrfs_trans_handle *trans;
4366 struct ulist *roots;
4367 struct ulist_node *node;
4368 struct btrfs_root *search_root;
4369 struct btrfs_path *path;
4370 struct ulist_iterator iter;
4371 struct btrfs_key root_key, key;
4374 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4375 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4378 path = btrfs_alloc_path();
4382 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4385 btrfs_free_path(path);
4389 ULIST_ITER_INIT(&iter);
4390 while ((node = ulist_next(roots, &iter))) {
4391 root_key.objectid = node->val;
4392 root_key.type = BTRFS_ROOT_ITEM_KEY;
4393 root_key.offset = (u64)-1;
4395 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4402 trans = btrfs_start_transaction(search_root, 0);
4403 if (IS_ERR(trans)) {
4404 ret = PTR_ERR(trans);
4408 path->lowest_level = btrfs_header_level(buf);
4409 path->skip_check_block = 1;
4410 if (path->lowest_level)
4411 btrfs_node_key_to_cpu(buf, &key, 0);
4413 btrfs_item_key_to_cpu(buf, &key, 0);
4414 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4417 btrfs_commit_transaction(trans, search_root);
4420 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4421 ret = fix_key_order(trans, search_root, path);
4422 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4423 ret = fix_item_offset(trans, search_root, path);
4425 btrfs_commit_transaction(trans, search_root);
4428 btrfs_release_path(path);
4429 btrfs_commit_transaction(trans, search_root);
4432 btrfs_free_path(path);
4436 static int check_block(struct btrfs_root *root,
4437 struct cache_tree *extent_cache,
4438 struct extent_buffer *buf, u64 flags)
4440 struct extent_record *rec;
4441 struct cache_extent *cache;
4442 struct btrfs_key key;
4443 enum btrfs_tree_block_status status;
4447 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4450 rec = container_of(cache, struct extent_record, cache);
4451 rec->generation = btrfs_header_generation(buf);
4453 level = btrfs_header_level(buf);
4454 if (btrfs_header_nritems(buf) > 0) {
4457 btrfs_item_key_to_cpu(buf, &key, 0);
4459 btrfs_node_key_to_cpu(buf, &key, 0);
4461 rec->info_objectid = key.objectid;
4463 rec->info_level = level;
4465 if (btrfs_is_leaf(buf))
4466 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4468 status = btrfs_check_node(root, &rec->parent_key, buf);
4470 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4472 status = try_to_fix_bad_block(root, buf, status);
4473 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4475 fprintf(stderr, "bad block %llu\n",
4476 (unsigned long long)buf->start);
4479 * Signal to callers we need to start the scan over
4480 * again since we'll have cowed blocks.
4485 rec->content_checked = 1;
4486 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4487 rec->owner_ref_checked = 1;
4489 ret = check_owner_ref(root, rec, buf);
4491 rec->owner_ref_checked = 1;
4495 maybe_free_extent_rec(extent_cache, rec);
4500 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4501 u64 parent, u64 root)
4503 struct rb_node *node;
4504 struct tree_backref *back = NULL;
4505 struct tree_backref match = {
4512 match.parent = parent;
4513 match.node.full_backref = 1;
4518 node = rb_search(&rec->backref_tree, &match.node.node,
4519 (rb_compare_keys)compare_extent_backref, NULL);
4521 back = to_tree_backref(rb_node_to_extent_backref(node));
4526 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4527 u64 parent, u64 root)
4529 struct tree_backref *ref = malloc(sizeof(*ref));
4533 memset(&ref->node, 0, sizeof(ref->node));
4535 ref->parent = parent;
4536 ref->node.full_backref = 1;
4539 ref->node.full_backref = 0;
4541 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4546 static struct data_backref *find_data_backref(struct extent_record *rec,
4547 u64 parent, u64 root,
4548 u64 owner, u64 offset,
4550 u64 disk_bytenr, u64 bytes)
4552 struct rb_node *node;
4553 struct data_backref *back = NULL;
4554 struct data_backref match = {
4561 .found_ref = found_ref,
4562 .disk_bytenr = disk_bytenr,
4566 match.parent = parent;
4567 match.node.full_backref = 1;
4572 node = rb_search(&rec->backref_tree, &match.node.node,
4573 (rb_compare_keys)compare_extent_backref, NULL);
4575 back = to_data_backref(rb_node_to_extent_backref(node));
4580 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4581 u64 parent, u64 root,
4582 u64 owner, u64 offset,
4585 struct data_backref *ref = malloc(sizeof(*ref));
4589 memset(&ref->node, 0, sizeof(ref->node));
4590 ref->node.is_data = 1;
4593 ref->parent = parent;
4596 ref->node.full_backref = 1;
4600 ref->offset = offset;
4601 ref->node.full_backref = 0;
4603 ref->bytes = max_size;
4606 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4607 if (max_size > rec->max_size)
4608 rec->max_size = max_size;
4612 /* Check if the type of extent matches with its chunk */
4613 static void check_extent_type(struct extent_record *rec)
4615 struct btrfs_block_group_cache *bg_cache;
4617 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4621 /* data extent, check chunk directly*/
4622 if (!rec->metadata) {
4623 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4624 rec->wrong_chunk_type = 1;
4628 /* metadata extent, check the obvious case first */
4629 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4630 BTRFS_BLOCK_GROUP_METADATA))) {
4631 rec->wrong_chunk_type = 1;
4636 * Check SYSTEM extent, as it's also marked as metadata, we can only
4637 * make sure it's a SYSTEM extent by its backref
4639 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4640 struct extent_backref *node;
4641 struct tree_backref *tback;
4644 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4645 if (node->is_data) {
4646 /* tree block shouldn't have data backref */
4647 rec->wrong_chunk_type = 1;
4650 tback = container_of(node, struct tree_backref, node);
4652 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4653 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4655 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4656 if (!(bg_cache->flags & bg_type))
4657 rec->wrong_chunk_type = 1;
4662 * Allocate a new extent record, fill default values from @tmpl and insert int
4663 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4664 * the cache, otherwise it fails.
4666 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4667 struct extent_record *tmpl)
4669 struct extent_record *rec;
4672 rec = malloc(sizeof(*rec));
4675 rec->start = tmpl->start;
4676 rec->max_size = tmpl->max_size;
4677 rec->nr = max(tmpl->nr, tmpl->max_size);
4678 rec->found_rec = tmpl->found_rec;
4679 rec->content_checked = tmpl->content_checked;
4680 rec->owner_ref_checked = tmpl->owner_ref_checked;
4681 rec->num_duplicates = 0;
4682 rec->metadata = tmpl->metadata;
4683 rec->flag_block_full_backref = FLAG_UNSET;
4684 rec->bad_full_backref = 0;
4685 rec->crossing_stripes = 0;
4686 rec->wrong_chunk_type = 0;
4687 rec->is_root = tmpl->is_root;
4688 rec->refs = tmpl->refs;
4689 rec->extent_item_refs = tmpl->extent_item_refs;
4690 rec->parent_generation = tmpl->parent_generation;
4691 INIT_LIST_HEAD(&rec->backrefs);
4692 INIT_LIST_HEAD(&rec->dups);
4693 INIT_LIST_HEAD(&rec->list);
4694 rec->backref_tree = RB_ROOT;
4695 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4696 rec->cache.start = tmpl->start;
4697 rec->cache.size = tmpl->nr;
4698 ret = insert_cache_extent(extent_cache, &rec->cache);
4700 bytes_used += rec->nr;
4703 rec->crossing_stripes = check_crossing_stripes(rec->start,
4704 global_info->tree_root->nodesize);
4705 check_extent_type(rec);
4710 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4712 * - refs - if found, increase refs
4713 * - is_root - if found, set
4714 * - content_checked - if found, set
4715 * - owner_ref_checked - if found, set
4717 * If not found, create a new one, initialize and insert.
4719 static int add_extent_rec(struct cache_tree *extent_cache,
4720 struct extent_record *tmpl)
4722 struct extent_record *rec;
4723 struct cache_extent *cache;
4727 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4729 rec = container_of(cache, struct extent_record, cache);
4733 rec->nr = max(tmpl->nr, tmpl->max_size);
4736 * We need to make sure to reset nr to whatever the extent
4737 * record says was the real size, this way we can compare it to
4740 if (tmpl->found_rec) {
4741 if (tmpl->start != rec->start || rec->found_rec) {
4742 struct extent_record *tmp;
4745 if (list_empty(&rec->list))
4746 list_add_tail(&rec->list,
4747 &duplicate_extents);
4750 * We have to do this song and dance in case we
4751 * find an extent record that falls inside of
4752 * our current extent record but does not have
4753 * the same objectid.
4755 tmp = malloc(sizeof(*tmp));
4758 tmp->start = tmpl->start;
4759 tmp->max_size = tmpl->max_size;
4762 tmp->metadata = tmpl->metadata;
4763 tmp->extent_item_refs = tmpl->extent_item_refs;
4764 INIT_LIST_HEAD(&tmp->list);
4765 list_add_tail(&tmp->list, &rec->dups);
4766 rec->num_duplicates++;
4773 if (tmpl->extent_item_refs && !dup) {
4774 if (rec->extent_item_refs) {
4775 fprintf(stderr, "block %llu rec "
4776 "extent_item_refs %llu, passed %llu\n",
4777 (unsigned long long)tmpl->start,
4778 (unsigned long long)
4779 rec->extent_item_refs,
4780 (unsigned long long)tmpl->extent_item_refs);
4782 rec->extent_item_refs = tmpl->extent_item_refs;
4786 if (tmpl->content_checked)
4787 rec->content_checked = 1;
4788 if (tmpl->owner_ref_checked)
4789 rec->owner_ref_checked = 1;
4790 memcpy(&rec->parent_key, &tmpl->parent_key,
4791 sizeof(tmpl->parent_key));
4792 if (tmpl->parent_generation)
4793 rec->parent_generation = tmpl->parent_generation;
4794 if (rec->max_size < tmpl->max_size)
4795 rec->max_size = tmpl->max_size;
4798 * A metadata extent can't cross stripe_len boundary, otherwise
4799 * kernel scrub won't be able to handle it.
4800 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4804 rec->crossing_stripes = check_crossing_stripes(
4805 rec->start, global_info->tree_root->nodesize);
4806 check_extent_type(rec);
4807 maybe_free_extent_rec(extent_cache, rec);
4811 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4816 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4817 u64 parent, u64 root, int found_ref)
4819 struct extent_record *rec;
4820 struct tree_backref *back;
4821 struct cache_extent *cache;
4823 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4825 struct extent_record tmpl;
4827 memset(&tmpl, 0, sizeof(tmpl));
4828 tmpl.start = bytenr;
4832 add_extent_rec_nolookup(extent_cache, &tmpl);
4834 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4839 rec = container_of(cache, struct extent_record, cache);
4840 if (rec->start != bytenr) {
4844 back = find_tree_backref(rec, parent, root);
4846 back = alloc_tree_backref(rec, parent, root);
4851 if (back->node.found_ref) {
4852 fprintf(stderr, "Extent back ref already exists "
4853 "for %llu parent %llu root %llu \n",
4854 (unsigned long long)bytenr,
4855 (unsigned long long)parent,
4856 (unsigned long long)root);
4858 back->node.found_ref = 1;
4860 if (back->node.found_extent_tree) {
4861 fprintf(stderr, "Extent back ref already exists "
4862 "for %llu parent %llu root %llu \n",
4863 (unsigned long long)bytenr,
4864 (unsigned long long)parent,
4865 (unsigned long long)root);
4867 back->node.found_extent_tree = 1;
4869 check_extent_type(rec);
4870 maybe_free_extent_rec(extent_cache, rec);
4874 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4875 u64 parent, u64 root, u64 owner, u64 offset,
4876 u32 num_refs, int found_ref, u64 max_size)
4878 struct extent_record *rec;
4879 struct data_backref *back;
4880 struct cache_extent *cache;
4882 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4884 struct extent_record tmpl;
4886 memset(&tmpl, 0, sizeof(tmpl));
4887 tmpl.start = bytenr;
4889 tmpl.max_size = max_size;
4891 add_extent_rec_nolookup(extent_cache, &tmpl);
4893 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4898 rec = container_of(cache, struct extent_record, cache);
4899 if (rec->max_size < max_size)
4900 rec->max_size = max_size;
4903 * If found_ref is set then max_size is the real size and must match the
4904 * existing refs. So if we have already found a ref then we need to
4905 * make sure that this ref matches the existing one, otherwise we need
4906 * to add a new backref so we can notice that the backrefs don't match
4907 * and we need to figure out who is telling the truth. This is to
4908 * account for that awful fsync bug I introduced where we'd end up with
4909 * a btrfs_file_extent_item that would have its length include multiple
4910 * prealloc extents or point inside of a prealloc extent.
4912 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4915 back = alloc_data_backref(rec, parent, root, owner, offset,
4921 BUG_ON(num_refs != 1);
4922 if (back->node.found_ref)
4923 BUG_ON(back->bytes != max_size);
4924 back->node.found_ref = 1;
4925 back->found_ref += 1;
4926 back->bytes = max_size;
4927 back->disk_bytenr = bytenr;
4929 rec->content_checked = 1;
4930 rec->owner_ref_checked = 1;
4932 if (back->node.found_extent_tree) {
4933 fprintf(stderr, "Extent back ref already exists "
4934 "for %llu parent %llu root %llu "
4935 "owner %llu offset %llu num_refs %lu\n",
4936 (unsigned long long)bytenr,
4937 (unsigned long long)parent,
4938 (unsigned long long)root,
4939 (unsigned long long)owner,
4940 (unsigned long long)offset,
4941 (unsigned long)num_refs);
4943 back->num_refs = num_refs;
4944 back->node.found_extent_tree = 1;
4946 maybe_free_extent_rec(extent_cache, rec);
4950 static int add_pending(struct cache_tree *pending,
4951 struct cache_tree *seen, u64 bytenr, u32 size)
4954 ret = add_cache_extent(seen, bytenr, size);
4957 add_cache_extent(pending, bytenr, size);
4961 static int pick_next_pending(struct cache_tree *pending,
4962 struct cache_tree *reada,
4963 struct cache_tree *nodes,
4964 u64 last, struct block_info *bits, int bits_nr,
4967 unsigned long node_start = last;
4968 struct cache_extent *cache;
4971 cache = search_cache_extent(reada, 0);
4973 bits[0].start = cache->start;
4974 bits[0].size = cache->size;
4979 if (node_start > 32768)
4980 node_start -= 32768;
4982 cache = search_cache_extent(nodes, node_start);
4984 cache = search_cache_extent(nodes, 0);
4987 cache = search_cache_extent(pending, 0);
4992 bits[ret].start = cache->start;
4993 bits[ret].size = cache->size;
4994 cache = next_cache_extent(cache);
4996 } while (cache && ret < bits_nr);
5002 bits[ret].start = cache->start;
5003 bits[ret].size = cache->size;
5004 cache = next_cache_extent(cache);
5006 } while (cache && ret < bits_nr);
5008 if (bits_nr - ret > 8) {
5009 u64 lookup = bits[0].start + bits[0].size;
5010 struct cache_extent *next;
5011 next = search_cache_extent(pending, lookup);
5013 if (next->start - lookup > 32768)
5015 bits[ret].start = next->start;
5016 bits[ret].size = next->size;
5017 lookup = next->start + next->size;
5021 next = next_cache_extent(next);
5029 static void free_chunk_record(struct cache_extent *cache)
5031 struct chunk_record *rec;
5033 rec = container_of(cache, struct chunk_record, cache);
5034 list_del_init(&rec->list);
5035 list_del_init(&rec->dextents);
5039 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5041 cache_tree_free_extents(chunk_cache, free_chunk_record);
5044 static void free_device_record(struct rb_node *node)
5046 struct device_record *rec;
5048 rec = container_of(node, struct device_record, node);
5052 FREE_RB_BASED_TREE(device_cache, free_device_record);
5054 int insert_block_group_record(struct block_group_tree *tree,
5055 struct block_group_record *bg_rec)
5059 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5063 list_add_tail(&bg_rec->list, &tree->block_groups);
5067 static void free_block_group_record(struct cache_extent *cache)
5069 struct block_group_record *rec;
5071 rec = container_of(cache, struct block_group_record, cache);
5072 list_del_init(&rec->list);
5076 void free_block_group_tree(struct block_group_tree *tree)
5078 cache_tree_free_extents(&tree->tree, free_block_group_record);
5081 int insert_device_extent_record(struct device_extent_tree *tree,
5082 struct device_extent_record *de_rec)
5087 * Device extent is a bit different from the other extents, because
5088 * the extents which belong to the different devices may have the
5089 * same start and size, so we need use the special extent cache
5090 * search/insert functions.
5092 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5096 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5097 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5101 static void free_device_extent_record(struct cache_extent *cache)
5103 struct device_extent_record *rec;
5105 rec = container_of(cache, struct device_extent_record, cache);
5106 if (!list_empty(&rec->chunk_list))
5107 list_del_init(&rec->chunk_list);
5108 if (!list_empty(&rec->device_list))
5109 list_del_init(&rec->device_list);
5113 void free_device_extent_tree(struct device_extent_tree *tree)
5115 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5118 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5119 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5120 struct extent_buffer *leaf, int slot)
5122 struct btrfs_extent_ref_v0 *ref0;
5123 struct btrfs_key key;
5125 btrfs_item_key_to_cpu(leaf, &key, slot);
5126 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5127 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5128 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5130 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5131 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5137 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5138 struct btrfs_key *key,
5141 struct btrfs_chunk *ptr;
5142 struct chunk_record *rec;
5145 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5146 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5148 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5150 fprintf(stderr, "memory allocation failed\n");
5154 INIT_LIST_HEAD(&rec->list);
5155 INIT_LIST_HEAD(&rec->dextents);
5158 rec->cache.start = key->offset;
5159 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5161 rec->generation = btrfs_header_generation(leaf);
5163 rec->objectid = key->objectid;
5164 rec->type = key->type;
5165 rec->offset = key->offset;
5167 rec->length = rec->cache.size;
5168 rec->owner = btrfs_chunk_owner(leaf, ptr);
5169 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5170 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5171 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5172 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5173 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5174 rec->num_stripes = num_stripes;
5175 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5177 for (i = 0; i < rec->num_stripes; ++i) {
5178 rec->stripes[i].devid =
5179 btrfs_stripe_devid_nr(leaf, ptr, i);
5180 rec->stripes[i].offset =
5181 btrfs_stripe_offset_nr(leaf, ptr, i);
5182 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5183 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5190 static int process_chunk_item(struct cache_tree *chunk_cache,
5191 struct btrfs_key *key, struct extent_buffer *eb,
5194 struct chunk_record *rec;
5197 rec = btrfs_new_chunk_record(eb, key, slot);
5198 ret = insert_cache_extent(chunk_cache, &rec->cache);
5200 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5201 rec->offset, rec->length);
5208 static int process_device_item(struct rb_root *dev_cache,
5209 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5211 struct btrfs_dev_item *ptr;
5212 struct device_record *rec;
5215 ptr = btrfs_item_ptr(eb,
5216 slot, struct btrfs_dev_item);
5218 rec = malloc(sizeof(*rec));
5220 fprintf(stderr, "memory allocation failed\n");
5224 rec->devid = key->offset;
5225 rec->generation = btrfs_header_generation(eb);
5227 rec->objectid = key->objectid;
5228 rec->type = key->type;
5229 rec->offset = key->offset;
5231 rec->devid = btrfs_device_id(eb, ptr);
5232 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5233 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5235 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5237 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5244 struct block_group_record *
5245 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5248 struct btrfs_block_group_item *ptr;
5249 struct block_group_record *rec;
5251 rec = calloc(1, sizeof(*rec));
5253 fprintf(stderr, "memory allocation failed\n");
5257 rec->cache.start = key->objectid;
5258 rec->cache.size = key->offset;
5260 rec->generation = btrfs_header_generation(leaf);
5262 rec->objectid = key->objectid;
5263 rec->type = key->type;
5264 rec->offset = key->offset;
5266 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5267 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5269 INIT_LIST_HEAD(&rec->list);
5274 static int process_block_group_item(struct block_group_tree *block_group_cache,
5275 struct btrfs_key *key,
5276 struct extent_buffer *eb, int slot)
5278 struct block_group_record *rec;
5281 rec = btrfs_new_block_group_record(eb, key, slot);
5282 ret = insert_block_group_record(block_group_cache, rec);
5284 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5285 rec->objectid, rec->offset);
5292 struct device_extent_record *
5293 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5294 struct btrfs_key *key, int slot)
5296 struct device_extent_record *rec;
5297 struct btrfs_dev_extent *ptr;
5299 rec = calloc(1, sizeof(*rec));
5301 fprintf(stderr, "memory allocation failed\n");
5305 rec->cache.objectid = key->objectid;
5306 rec->cache.start = key->offset;
5308 rec->generation = btrfs_header_generation(leaf);
5310 rec->objectid = key->objectid;
5311 rec->type = key->type;
5312 rec->offset = key->offset;
5314 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5315 rec->chunk_objecteid =
5316 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5318 btrfs_dev_extent_chunk_offset(leaf, ptr);
5319 rec->length = btrfs_dev_extent_length(leaf, ptr);
5320 rec->cache.size = rec->length;
5322 INIT_LIST_HEAD(&rec->chunk_list);
5323 INIT_LIST_HEAD(&rec->device_list);
5329 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5330 struct btrfs_key *key, struct extent_buffer *eb,
5333 struct device_extent_record *rec;
5336 rec = btrfs_new_device_extent_record(eb, key, slot);
5337 ret = insert_device_extent_record(dev_extent_cache, rec);
5340 "Device extent[%llu, %llu, %llu] existed.\n",
5341 rec->objectid, rec->offset, rec->length);
5348 static int process_extent_item(struct btrfs_root *root,
5349 struct cache_tree *extent_cache,
5350 struct extent_buffer *eb, int slot)
5352 struct btrfs_extent_item *ei;
5353 struct btrfs_extent_inline_ref *iref;
5354 struct btrfs_extent_data_ref *dref;
5355 struct btrfs_shared_data_ref *sref;
5356 struct btrfs_key key;
5357 struct extent_record tmpl;
5361 u32 item_size = btrfs_item_size_nr(eb, slot);
5367 btrfs_item_key_to_cpu(eb, &key, slot);
5369 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5371 num_bytes = root->nodesize;
5373 num_bytes = key.offset;
5376 if (item_size < sizeof(*ei)) {
5377 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5378 struct btrfs_extent_item_v0 *ei0;
5379 BUG_ON(item_size != sizeof(*ei0));
5380 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5381 refs = btrfs_extent_refs_v0(eb, ei0);
5385 memset(&tmpl, 0, sizeof(tmpl));
5386 tmpl.start = key.objectid;
5387 tmpl.nr = num_bytes;
5388 tmpl.extent_item_refs = refs;
5389 tmpl.metadata = metadata;
5391 tmpl.max_size = num_bytes;
5393 return add_extent_rec(extent_cache, &tmpl);
5396 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5397 refs = btrfs_extent_refs(eb, ei);
5398 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5403 memset(&tmpl, 0, sizeof(tmpl));
5404 tmpl.start = key.objectid;
5405 tmpl.nr = num_bytes;
5406 tmpl.extent_item_refs = refs;
5407 tmpl.metadata = metadata;
5409 tmpl.max_size = num_bytes;
5410 add_extent_rec(extent_cache, &tmpl);
5412 ptr = (unsigned long)(ei + 1);
5413 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5414 key.type == BTRFS_EXTENT_ITEM_KEY)
5415 ptr += sizeof(struct btrfs_tree_block_info);
5417 end = (unsigned long)ei + item_size;
5419 iref = (struct btrfs_extent_inline_ref *)ptr;
5420 type = btrfs_extent_inline_ref_type(eb, iref);
5421 offset = btrfs_extent_inline_ref_offset(eb, iref);
5423 case BTRFS_TREE_BLOCK_REF_KEY:
5424 add_tree_backref(extent_cache, key.objectid,
5427 case BTRFS_SHARED_BLOCK_REF_KEY:
5428 add_tree_backref(extent_cache, key.objectid,
5431 case BTRFS_EXTENT_DATA_REF_KEY:
5432 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5433 add_data_backref(extent_cache, key.objectid, 0,
5434 btrfs_extent_data_ref_root(eb, dref),
5435 btrfs_extent_data_ref_objectid(eb,
5437 btrfs_extent_data_ref_offset(eb, dref),
5438 btrfs_extent_data_ref_count(eb, dref),
5441 case BTRFS_SHARED_DATA_REF_KEY:
5442 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5443 add_data_backref(extent_cache, key.objectid, offset,
5445 btrfs_shared_data_ref_count(eb, sref),
5449 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5450 key.objectid, key.type, num_bytes);
5453 ptr += btrfs_extent_inline_ref_size(type);
5460 static int check_cache_range(struct btrfs_root *root,
5461 struct btrfs_block_group_cache *cache,
5462 u64 offset, u64 bytes)
5464 struct btrfs_free_space *entry;
5470 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5471 bytenr = btrfs_sb_offset(i);
5472 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5473 cache->key.objectid, bytenr, 0,
5474 &logical, &nr, &stripe_len);
5479 if (logical[nr] + stripe_len <= offset)
5481 if (offset + bytes <= logical[nr])
5483 if (logical[nr] == offset) {
5484 if (stripe_len >= bytes) {
5488 bytes -= stripe_len;
5489 offset += stripe_len;
5490 } else if (logical[nr] < offset) {
5491 if (logical[nr] + stripe_len >=
5496 bytes = (offset + bytes) -
5497 (logical[nr] + stripe_len);
5498 offset = logical[nr] + stripe_len;
5501 * Could be tricky, the super may land in the
5502 * middle of the area we're checking. First
5503 * check the easiest case, it's at the end.
5505 if (logical[nr] + stripe_len >=
5507 bytes = logical[nr] - offset;
5511 /* Check the left side */
5512 ret = check_cache_range(root, cache,
5514 logical[nr] - offset);
5520 /* Now we continue with the right side */
5521 bytes = (offset + bytes) -
5522 (logical[nr] + stripe_len);
5523 offset = logical[nr] + stripe_len;
5530 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5532 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5533 offset, offset+bytes);
5537 if (entry->offset != offset) {
5538 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5543 if (entry->bytes != bytes) {
5544 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5545 bytes, entry->bytes, offset);
5549 unlink_free_space(cache->free_space_ctl, entry);
5554 static int verify_space_cache(struct btrfs_root *root,
5555 struct btrfs_block_group_cache *cache)
5557 struct btrfs_path *path;
5558 struct extent_buffer *leaf;
5559 struct btrfs_key key;
5563 path = btrfs_alloc_path();
5567 root = root->fs_info->extent_root;
5569 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5571 key.objectid = last;
5573 key.type = BTRFS_EXTENT_ITEM_KEY;
5575 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5580 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5581 ret = btrfs_next_leaf(root, path);
5589 leaf = path->nodes[0];
5590 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5591 if (key.objectid >= cache->key.offset + cache->key.objectid)
5593 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5594 key.type != BTRFS_METADATA_ITEM_KEY) {
5599 if (last == key.objectid) {
5600 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5601 last = key.objectid + key.offset;
5603 last = key.objectid + root->nodesize;
5608 ret = check_cache_range(root, cache, last,
5609 key.objectid - last);
5612 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5613 last = key.objectid + key.offset;
5615 last = key.objectid + root->nodesize;
5619 if (last < cache->key.objectid + cache->key.offset)
5620 ret = check_cache_range(root, cache, last,
5621 cache->key.objectid +
5622 cache->key.offset - last);
5625 btrfs_free_path(path);
5628 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5629 fprintf(stderr, "There are still entries left in the space "
5637 static int check_space_cache(struct btrfs_root *root)
5639 struct btrfs_block_group_cache *cache;
5640 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5644 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5645 btrfs_super_generation(root->fs_info->super_copy) !=
5646 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5647 printf("cache and super generation don't match, space cache "
5648 "will be invalidated\n");
5652 if (ctx.progress_enabled) {
5653 ctx.tp = TASK_FREE_SPACE;
5654 task_start(ctx.info);
5658 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5662 start = cache->key.objectid + cache->key.offset;
5663 if (!cache->free_space_ctl) {
5664 if (btrfs_init_free_space_ctl(cache,
5665 root->sectorsize)) {
5670 btrfs_remove_free_space_cache(cache);
5673 if (btrfs_fs_compat_ro(root->fs_info,
5674 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5675 ret = exclude_super_stripes(root, cache);
5677 fprintf(stderr, "could not exclude super stripes: %s\n",
5682 ret = load_free_space_tree(root->fs_info, cache);
5683 free_excluded_extents(root, cache);
5685 fprintf(stderr, "could not load free space tree: %s\n",
5692 ret = load_free_space_cache(root->fs_info, cache);
5697 ret = verify_space_cache(root, cache);
5699 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5700 cache->key.objectid);
5705 task_stop(ctx.info);
5707 return error ? -EINVAL : 0;
5710 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5711 u64 num_bytes, unsigned long leaf_offset,
5712 struct extent_buffer *eb) {
5715 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5717 unsigned long csum_offset;
5721 u64 data_checked = 0;
5727 if (num_bytes % root->sectorsize)
5730 data = malloc(num_bytes);
5734 while (offset < num_bytes) {
5737 read_len = num_bytes - offset;
5738 /* read as much space once a time */
5739 ret = read_extent_data(root, data + offset,
5740 bytenr + offset, &read_len, mirror);
5744 /* verify every 4k data's checksum */
5745 while (data_checked < read_len) {
5747 tmp = offset + data_checked;
5749 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5750 csum, root->sectorsize);
5751 btrfs_csum_final(csum, (char *)&csum);
5753 csum_offset = leaf_offset +
5754 tmp / root->sectorsize * csum_size;
5755 read_extent_buffer(eb, (char *)&csum_expected,
5756 csum_offset, csum_size);
5757 /* try another mirror */
5758 if (csum != csum_expected) {
5759 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5760 mirror, bytenr + tmp,
5761 csum, csum_expected);
5762 num_copies = btrfs_num_copies(
5763 &root->fs_info->mapping_tree,
5765 if (mirror < num_copies - 1) {
5770 data_checked += root->sectorsize;
5779 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5782 struct btrfs_path *path;
5783 struct extent_buffer *leaf;
5784 struct btrfs_key key;
5787 path = btrfs_alloc_path();
5789 fprintf(stderr, "Error allocating path\n");
5793 key.objectid = bytenr;
5794 key.type = BTRFS_EXTENT_ITEM_KEY;
5795 key.offset = (u64)-1;
5798 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5801 fprintf(stderr, "Error looking up extent record %d\n", ret);
5802 btrfs_free_path(path);
5805 if (path->slots[0] > 0) {
5808 ret = btrfs_prev_leaf(root, path);
5811 } else if (ret > 0) {
5818 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5821 * Block group items come before extent items if they have the same
5822 * bytenr, so walk back one more just in case. Dear future traveller,
5823 * first congrats on mastering time travel. Now if it's not too much
5824 * trouble could you go back to 2006 and tell Chris to make the
5825 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5826 * EXTENT_ITEM_KEY please?
5828 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5829 if (path->slots[0] > 0) {
5832 ret = btrfs_prev_leaf(root, path);
5835 } else if (ret > 0) {
5840 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5844 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5845 ret = btrfs_next_leaf(root, path);
5847 fprintf(stderr, "Error going to next leaf "
5849 btrfs_free_path(path);
5855 leaf = path->nodes[0];
5856 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5857 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5861 if (key.objectid + key.offset < bytenr) {
5865 if (key.objectid > bytenr + num_bytes)
5868 if (key.objectid == bytenr) {
5869 if (key.offset >= num_bytes) {
5873 num_bytes -= key.offset;
5874 bytenr += key.offset;
5875 } else if (key.objectid < bytenr) {
5876 if (key.objectid + key.offset >= bytenr + num_bytes) {
5880 num_bytes = (bytenr + num_bytes) -
5881 (key.objectid + key.offset);
5882 bytenr = key.objectid + key.offset;
5884 if (key.objectid + key.offset < bytenr + num_bytes) {
5885 u64 new_start = key.objectid + key.offset;
5886 u64 new_bytes = bytenr + num_bytes - new_start;
5889 * Weird case, the extent is in the middle of
5890 * our range, we'll have to search one side
5891 * and then the other. Not sure if this happens
5892 * in real life, but no harm in coding it up
5893 * anyway just in case.
5895 btrfs_release_path(path);
5896 ret = check_extent_exists(root, new_start,
5899 fprintf(stderr, "Right section didn't "
5903 num_bytes = key.objectid - bytenr;
5906 num_bytes = key.objectid - bytenr;
5913 if (num_bytes && !ret) {
5914 fprintf(stderr, "There are no extents for csum range "
5915 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5919 btrfs_free_path(path);
5923 static int check_csums(struct btrfs_root *root)
5925 struct btrfs_path *path;
5926 struct extent_buffer *leaf;
5927 struct btrfs_key key;
5928 u64 offset = 0, num_bytes = 0;
5929 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5933 unsigned long leaf_offset;
5935 root = root->fs_info->csum_root;
5936 if (!extent_buffer_uptodate(root->node)) {
5937 fprintf(stderr, "No valid csum tree found\n");
5941 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5942 key.type = BTRFS_EXTENT_CSUM_KEY;
5945 path = btrfs_alloc_path();
5949 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5951 fprintf(stderr, "Error searching csum tree %d\n", ret);
5952 btrfs_free_path(path);
5956 if (ret > 0 && path->slots[0])
5961 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5962 ret = btrfs_next_leaf(root, path);
5964 fprintf(stderr, "Error going to next leaf "
5971 leaf = path->nodes[0];
5973 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5974 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5979 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5980 csum_size) * root->sectorsize;
5981 if (!check_data_csum)
5982 goto skip_csum_check;
5983 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5984 ret = check_extent_csums(root, key.offset, data_len,
5990 offset = key.offset;
5991 } else if (key.offset != offset + num_bytes) {
5992 ret = check_extent_exists(root, offset, num_bytes);
5994 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5995 "there is no extent record\n",
5996 offset, offset+num_bytes);
5999 offset = key.offset;
6002 num_bytes += data_len;
6006 btrfs_free_path(path);
6010 static int is_dropped_key(struct btrfs_key *key,
6011 struct btrfs_key *drop_key) {
6012 if (key->objectid < drop_key->objectid)
6014 else if (key->objectid == drop_key->objectid) {
6015 if (key->type < drop_key->type)
6017 else if (key->type == drop_key->type) {
6018 if (key->offset < drop_key->offset)
6026 * Here are the rules for FULL_BACKREF.
6028 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6029 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6031 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6032 * if it happened after the relocation occurred since we'll have dropped the
6033 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6034 * have no real way to know for sure.
6036 * We process the blocks one root at a time, and we start from the lowest root
6037 * objectid and go to the highest. So we can just lookup the owner backref for
6038 * the record and if we don't find it then we know it doesn't exist and we have
6041 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6042 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6043 * be set or not and then we can check later once we've gathered all the refs.
6045 static int calc_extent_flag(struct btrfs_root *root,
6046 struct cache_tree *extent_cache,
6047 struct extent_buffer *buf,
6048 struct root_item_record *ri,
6051 struct extent_record *rec;
6052 struct cache_extent *cache;
6053 struct tree_backref *tback;
6056 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6057 /* we have added this extent before */
6059 rec = container_of(cache, struct extent_record, cache);
6062 * Except file/reloc tree, we can not have
6065 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6070 if (buf->start == ri->bytenr)
6073 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6076 owner = btrfs_header_owner(buf);
6077 if (owner == ri->objectid)
6080 tback = find_tree_backref(rec, 0, owner);
6085 if (rec->flag_block_full_backref != FLAG_UNSET &&
6086 rec->flag_block_full_backref != 0)
6087 rec->bad_full_backref = 1;
6090 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6091 if (rec->flag_block_full_backref != FLAG_UNSET &&
6092 rec->flag_block_full_backref != 1)
6093 rec->bad_full_backref = 1;
6097 static int run_next_block(struct btrfs_root *root,
6098 struct block_info *bits,
6101 struct cache_tree *pending,
6102 struct cache_tree *seen,
6103 struct cache_tree *reada,
6104 struct cache_tree *nodes,
6105 struct cache_tree *extent_cache,
6106 struct cache_tree *chunk_cache,
6107 struct rb_root *dev_cache,
6108 struct block_group_tree *block_group_cache,
6109 struct device_extent_tree *dev_extent_cache,
6110 struct root_item_record *ri)
6112 struct extent_buffer *buf;
6113 struct extent_record *rec = NULL;
6124 struct btrfs_key key;
6125 struct cache_extent *cache;
6128 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6129 bits_nr, &reada_bits);
6134 for(i = 0; i < nritems; i++) {
6135 ret = add_cache_extent(reada, bits[i].start,
6140 /* fixme, get the parent transid */
6141 readahead_tree_block(root, bits[i].start,
6145 *last = bits[0].start;
6146 bytenr = bits[0].start;
6147 size = bits[0].size;
6149 cache = lookup_cache_extent(pending, bytenr, size);
6151 remove_cache_extent(pending, cache);
6154 cache = lookup_cache_extent(reada, bytenr, size);
6156 remove_cache_extent(reada, cache);
6159 cache = lookup_cache_extent(nodes, bytenr, size);
6161 remove_cache_extent(nodes, cache);
6164 cache = lookup_cache_extent(extent_cache, bytenr, size);
6166 rec = container_of(cache, struct extent_record, cache);
6167 gen = rec->parent_generation;
6170 /* fixme, get the real parent transid */
6171 buf = read_tree_block(root, bytenr, size, gen);
6172 if (!extent_buffer_uptodate(buf)) {
6173 record_bad_block_io(root->fs_info,
6174 extent_cache, bytenr, size);
6178 nritems = btrfs_header_nritems(buf);
6181 if (!init_extent_tree) {
6182 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6183 btrfs_header_level(buf), 1, NULL,
6186 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6188 fprintf(stderr, "Couldn't calc extent flags\n");
6189 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6194 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6196 fprintf(stderr, "Couldn't calc extent flags\n");
6197 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6201 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6203 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6204 ri->objectid == btrfs_header_owner(buf)) {
6206 * Ok we got to this block from it's original owner and
6207 * we have FULL_BACKREF set. Relocation can leave
6208 * converted blocks over so this is altogether possible,
6209 * however it's not possible if the generation > the
6210 * last snapshot, so check for this case.
6212 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6213 btrfs_header_generation(buf) > ri->last_snapshot) {
6214 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6215 rec->bad_full_backref = 1;
6220 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6221 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6222 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6223 rec->bad_full_backref = 1;
6227 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6228 rec->flag_block_full_backref = 1;
6232 rec->flag_block_full_backref = 0;
6234 owner = btrfs_header_owner(buf);
6237 ret = check_block(root, extent_cache, buf, flags);
6241 if (btrfs_is_leaf(buf)) {
6242 btree_space_waste += btrfs_leaf_free_space(root, buf);
6243 for (i = 0; i < nritems; i++) {
6244 struct btrfs_file_extent_item *fi;
6245 btrfs_item_key_to_cpu(buf, &key, i);
6246 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6247 process_extent_item(root, extent_cache, buf,
6251 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6252 process_extent_item(root, extent_cache, buf,
6256 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6258 btrfs_item_size_nr(buf, i);
6261 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6262 process_chunk_item(chunk_cache, &key, buf, i);
6265 if (key.type == BTRFS_DEV_ITEM_KEY) {
6266 process_device_item(dev_cache, &key, buf, i);
6269 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6270 process_block_group_item(block_group_cache,
6274 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6275 process_device_extent_item(dev_extent_cache,
6280 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6281 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6282 process_extent_ref_v0(extent_cache, buf, i);
6289 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6290 add_tree_backref(extent_cache, key.objectid, 0,
6294 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6295 add_tree_backref(extent_cache, key.objectid,
6299 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6300 struct btrfs_extent_data_ref *ref;
6301 ref = btrfs_item_ptr(buf, i,
6302 struct btrfs_extent_data_ref);
6303 add_data_backref(extent_cache,
6305 btrfs_extent_data_ref_root(buf, ref),
6306 btrfs_extent_data_ref_objectid(buf,
6308 btrfs_extent_data_ref_offset(buf, ref),
6309 btrfs_extent_data_ref_count(buf, ref),
6310 0, root->sectorsize);
6313 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6314 struct btrfs_shared_data_ref *ref;
6315 ref = btrfs_item_ptr(buf, i,
6316 struct btrfs_shared_data_ref);
6317 add_data_backref(extent_cache,
6318 key.objectid, key.offset, 0, 0, 0,
6319 btrfs_shared_data_ref_count(buf, ref),
6320 0, root->sectorsize);
6323 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6324 struct bad_item *bad;
6326 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6330 bad = malloc(sizeof(struct bad_item));
6333 INIT_LIST_HEAD(&bad->list);
6334 memcpy(&bad->key, &key,
6335 sizeof(struct btrfs_key));
6336 bad->root_id = owner;
6337 list_add_tail(&bad->list, &delete_items);
6340 if (key.type != BTRFS_EXTENT_DATA_KEY)
6342 fi = btrfs_item_ptr(buf, i,
6343 struct btrfs_file_extent_item);
6344 if (btrfs_file_extent_type(buf, fi) ==
6345 BTRFS_FILE_EXTENT_INLINE)
6347 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6350 data_bytes_allocated +=
6351 btrfs_file_extent_disk_num_bytes(buf, fi);
6352 if (data_bytes_allocated < root->sectorsize) {
6355 data_bytes_referenced +=
6356 btrfs_file_extent_num_bytes(buf, fi);
6357 add_data_backref(extent_cache,
6358 btrfs_file_extent_disk_bytenr(buf, fi),
6359 parent, owner, key.objectid, key.offset -
6360 btrfs_file_extent_offset(buf, fi), 1, 1,
6361 btrfs_file_extent_disk_num_bytes(buf, fi));
6365 struct btrfs_key first_key;
6367 first_key.objectid = 0;
6370 btrfs_item_key_to_cpu(buf, &first_key, 0);
6371 level = btrfs_header_level(buf);
6372 for (i = 0; i < nritems; i++) {
6373 struct extent_record tmpl;
6375 ptr = btrfs_node_blockptr(buf, i);
6376 size = root->nodesize;
6377 btrfs_node_key_to_cpu(buf, &key, i);
6379 if ((level == ri->drop_level)
6380 && is_dropped_key(&key, &ri->drop_key)) {
6385 memset(&tmpl, 0, sizeof(tmpl));
6386 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6387 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6392 tmpl.max_size = size;
6393 ret = add_extent_rec(extent_cache, &tmpl);
6396 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6399 add_pending(nodes, seen, ptr, size);
6401 add_pending(pending, seen, ptr, size);
6404 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6405 nritems) * sizeof(struct btrfs_key_ptr);
6407 total_btree_bytes += buf->len;
6408 if (fs_root_objectid(btrfs_header_owner(buf)))
6409 total_fs_tree_bytes += buf->len;
6410 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6411 total_extent_tree_bytes += buf->len;
6412 if (!found_old_backref &&
6413 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6414 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6415 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6416 found_old_backref = 1;
6418 free_extent_buffer(buf);
6422 static int add_root_to_pending(struct extent_buffer *buf,
6423 struct cache_tree *extent_cache,
6424 struct cache_tree *pending,
6425 struct cache_tree *seen,
6426 struct cache_tree *nodes,
6429 struct extent_record tmpl;
6431 if (btrfs_header_level(buf) > 0)
6432 add_pending(nodes, seen, buf->start, buf->len);
6434 add_pending(pending, seen, buf->start, buf->len);
6436 memset(&tmpl, 0, sizeof(tmpl));
6437 tmpl.start = buf->start;
6442 tmpl.max_size = buf->len;
6443 add_extent_rec(extent_cache, &tmpl);
6445 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6446 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6447 add_tree_backref(extent_cache, buf->start, buf->start,
6450 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6454 /* as we fix the tree, we might be deleting blocks that
6455 * we're tracking for repair. This hook makes sure we
6456 * remove any backrefs for blocks as we are fixing them.
6458 static int free_extent_hook(struct btrfs_trans_handle *trans,
6459 struct btrfs_root *root,
6460 u64 bytenr, u64 num_bytes, u64 parent,
6461 u64 root_objectid, u64 owner, u64 offset,
6464 struct extent_record *rec;
6465 struct cache_extent *cache;
6467 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6469 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6470 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6474 rec = container_of(cache, struct extent_record, cache);
6476 struct data_backref *back;
6477 back = find_data_backref(rec, parent, root_objectid, owner,
6478 offset, 1, bytenr, num_bytes);
6481 if (back->node.found_ref) {
6482 back->found_ref -= refs_to_drop;
6484 rec->refs -= refs_to_drop;
6486 if (back->node.found_extent_tree) {
6487 back->num_refs -= refs_to_drop;
6488 if (rec->extent_item_refs)
6489 rec->extent_item_refs -= refs_to_drop;
6491 if (back->found_ref == 0)
6492 back->node.found_ref = 0;
6493 if (back->num_refs == 0)
6494 back->node.found_extent_tree = 0;
6496 if (!back->node.found_extent_tree && back->node.found_ref) {
6497 rb_erase(&back->node.node, &rec->backref_tree);
6501 struct tree_backref *back;
6502 back = find_tree_backref(rec, parent, root_objectid);
6505 if (back->node.found_ref) {
6508 back->node.found_ref = 0;
6510 if (back->node.found_extent_tree) {
6511 if (rec->extent_item_refs)
6512 rec->extent_item_refs--;
6513 back->node.found_extent_tree = 0;
6515 if (!back->node.found_extent_tree && back->node.found_ref) {
6516 rb_erase(&back->node.node, &rec->backref_tree);
6520 maybe_free_extent_rec(extent_cache, rec);
6525 static int delete_extent_records(struct btrfs_trans_handle *trans,
6526 struct btrfs_root *root,
6527 struct btrfs_path *path,
6528 u64 bytenr, u64 new_len)
6530 struct btrfs_key key;
6531 struct btrfs_key found_key;
6532 struct extent_buffer *leaf;
6537 key.objectid = bytenr;
6539 key.offset = (u64)-1;
6542 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6549 if (path->slots[0] == 0)
6555 leaf = path->nodes[0];
6556 slot = path->slots[0];
6558 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6559 if (found_key.objectid != bytenr)
6562 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6563 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6564 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6565 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6566 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6567 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6568 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6569 btrfs_release_path(path);
6570 if (found_key.type == 0) {
6571 if (found_key.offset == 0)
6573 key.offset = found_key.offset - 1;
6574 key.type = found_key.type;
6576 key.type = found_key.type - 1;
6577 key.offset = (u64)-1;
6581 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6582 found_key.objectid, found_key.type, found_key.offset);
6584 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6587 btrfs_release_path(path);
6589 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6590 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6591 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6592 found_key.offset : root->nodesize;
6594 ret = btrfs_update_block_group(trans, root, bytenr,
6601 btrfs_release_path(path);
6606 * for a single backref, this will allocate a new extent
6607 * and add the backref to it.
6609 static int record_extent(struct btrfs_trans_handle *trans,
6610 struct btrfs_fs_info *info,
6611 struct btrfs_path *path,
6612 struct extent_record *rec,
6613 struct extent_backref *back,
6614 int allocated, u64 flags)
6617 struct btrfs_root *extent_root = info->extent_root;
6618 struct extent_buffer *leaf;
6619 struct btrfs_key ins_key;
6620 struct btrfs_extent_item *ei;
6621 struct tree_backref *tback;
6622 struct data_backref *dback;
6623 struct btrfs_tree_block_info *bi;
6626 rec->max_size = max_t(u64, rec->max_size,
6627 info->extent_root->nodesize);
6630 u32 item_size = sizeof(*ei);
6633 item_size += sizeof(*bi);
6635 ins_key.objectid = rec->start;
6636 ins_key.offset = rec->max_size;
6637 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6639 ret = btrfs_insert_empty_item(trans, extent_root, path,
6640 &ins_key, item_size);
6644 leaf = path->nodes[0];
6645 ei = btrfs_item_ptr(leaf, path->slots[0],
6646 struct btrfs_extent_item);
6648 btrfs_set_extent_refs(leaf, ei, 0);
6649 btrfs_set_extent_generation(leaf, ei, rec->generation);
6651 if (back->is_data) {
6652 btrfs_set_extent_flags(leaf, ei,
6653 BTRFS_EXTENT_FLAG_DATA);
6655 struct btrfs_disk_key copy_key;;
6657 tback = to_tree_backref(back);
6658 bi = (struct btrfs_tree_block_info *)(ei + 1);
6659 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6662 btrfs_set_disk_key_objectid(©_key,
6663 rec->info_objectid);
6664 btrfs_set_disk_key_type(©_key, 0);
6665 btrfs_set_disk_key_offset(©_key, 0);
6667 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6668 btrfs_set_tree_block_key(leaf, bi, ©_key);
6670 btrfs_set_extent_flags(leaf, ei,
6671 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6674 btrfs_mark_buffer_dirty(leaf);
6675 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6676 rec->max_size, 1, 0);
6679 btrfs_release_path(path);
6682 if (back->is_data) {
6686 dback = to_data_backref(back);
6687 if (back->full_backref)
6688 parent = dback->parent;
6692 for (i = 0; i < dback->found_ref; i++) {
6693 /* if parent != 0, we're doing a full backref
6694 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6695 * just makes the backref allocator create a data
6698 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6699 rec->start, rec->max_size,
6703 BTRFS_FIRST_FREE_OBJECTID :
6709 fprintf(stderr, "adding new data backref"
6710 " on %llu %s %llu owner %llu"
6711 " offset %llu found %d\n",
6712 (unsigned long long)rec->start,
6713 back->full_backref ?
6715 back->full_backref ?
6716 (unsigned long long)parent :
6717 (unsigned long long)dback->root,
6718 (unsigned long long)dback->owner,
6719 (unsigned long long)dback->offset,
6724 tback = to_tree_backref(back);
6725 if (back->full_backref)
6726 parent = tback->parent;
6730 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6731 rec->start, rec->max_size,
6732 parent, tback->root, 0, 0);
6733 fprintf(stderr, "adding new tree backref on "
6734 "start %llu len %llu parent %llu root %llu\n",
6735 rec->start, rec->max_size, parent, tback->root);
6738 btrfs_release_path(path);
6742 static struct extent_entry *find_entry(struct list_head *entries,
6743 u64 bytenr, u64 bytes)
6745 struct extent_entry *entry = NULL;
6747 list_for_each_entry(entry, entries, list) {
6748 if (entry->bytenr == bytenr && entry->bytes == bytes)
6755 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6757 struct extent_entry *entry, *best = NULL, *prev = NULL;
6759 list_for_each_entry(entry, entries, list) {
6766 * If there are as many broken entries as entries then we know
6767 * not to trust this particular entry.
6769 if (entry->broken == entry->count)
6773 * If our current entry == best then we can't be sure our best
6774 * is really the best, so we need to keep searching.
6776 if (best && best->count == entry->count) {
6782 /* Prev == entry, not good enough, have to keep searching */
6783 if (!prev->broken && prev->count == entry->count)
6787 best = (prev->count > entry->count) ? prev : entry;
6788 else if (best->count < entry->count)
6796 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6797 struct data_backref *dback, struct extent_entry *entry)
6799 struct btrfs_trans_handle *trans;
6800 struct btrfs_root *root;
6801 struct btrfs_file_extent_item *fi;
6802 struct extent_buffer *leaf;
6803 struct btrfs_key key;
6807 key.objectid = dback->root;
6808 key.type = BTRFS_ROOT_ITEM_KEY;
6809 key.offset = (u64)-1;
6810 root = btrfs_read_fs_root(info, &key);
6812 fprintf(stderr, "Couldn't find root for our ref\n");
6817 * The backref points to the original offset of the extent if it was
6818 * split, so we need to search down to the offset we have and then walk
6819 * forward until we find the backref we're looking for.
6821 key.objectid = dback->owner;
6822 key.type = BTRFS_EXTENT_DATA_KEY;
6823 key.offset = dback->offset;
6824 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6826 fprintf(stderr, "Error looking up ref %d\n", ret);
6831 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6832 ret = btrfs_next_leaf(root, path);
6834 fprintf(stderr, "Couldn't find our ref, next\n");
6838 leaf = path->nodes[0];
6839 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6840 if (key.objectid != dback->owner ||
6841 key.type != BTRFS_EXTENT_DATA_KEY) {
6842 fprintf(stderr, "Couldn't find our ref, search\n");
6845 fi = btrfs_item_ptr(leaf, path->slots[0],
6846 struct btrfs_file_extent_item);
6847 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6848 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6850 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6855 btrfs_release_path(path);
6857 trans = btrfs_start_transaction(root, 1);
6859 return PTR_ERR(trans);
6862 * Ok we have the key of the file extent we want to fix, now we can cow
6863 * down to the thing and fix it.
6865 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6867 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6868 key.objectid, key.type, key.offset, ret);
6872 fprintf(stderr, "Well that's odd, we just found this key "
6873 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6878 leaf = path->nodes[0];
6879 fi = btrfs_item_ptr(leaf, path->slots[0],
6880 struct btrfs_file_extent_item);
6882 if (btrfs_file_extent_compression(leaf, fi) &&
6883 dback->disk_bytenr != entry->bytenr) {
6884 fprintf(stderr, "Ref doesn't match the record start and is "
6885 "compressed, please take a btrfs-image of this file "
6886 "system and send it to a btrfs developer so they can "
6887 "complete this functionality for bytenr %Lu\n",
6888 dback->disk_bytenr);
6893 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6894 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6895 } else if (dback->disk_bytenr > entry->bytenr) {
6896 u64 off_diff, offset;
6898 off_diff = dback->disk_bytenr - entry->bytenr;
6899 offset = btrfs_file_extent_offset(leaf, fi);
6900 if (dback->disk_bytenr + offset +
6901 btrfs_file_extent_num_bytes(leaf, fi) >
6902 entry->bytenr + entry->bytes) {
6903 fprintf(stderr, "Ref is past the entry end, please "
6904 "take a btrfs-image of this file system and "
6905 "send it to a btrfs developer, ref %Lu\n",
6906 dback->disk_bytenr);
6911 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6912 btrfs_set_file_extent_offset(leaf, fi, offset);
6913 } else if (dback->disk_bytenr < entry->bytenr) {
6916 offset = btrfs_file_extent_offset(leaf, fi);
6917 if (dback->disk_bytenr + offset < entry->bytenr) {
6918 fprintf(stderr, "Ref is before the entry start, please"
6919 " take a btrfs-image of this file system and "
6920 "send it to a btrfs developer, ref %Lu\n",
6921 dback->disk_bytenr);
6926 offset += dback->disk_bytenr;
6927 offset -= entry->bytenr;
6928 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6929 btrfs_set_file_extent_offset(leaf, fi, offset);
6932 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6935 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6936 * only do this if we aren't using compression, otherwise it's a
6939 if (!btrfs_file_extent_compression(leaf, fi))
6940 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6942 printf("ram bytes may be wrong?\n");
6943 btrfs_mark_buffer_dirty(leaf);
6945 err = btrfs_commit_transaction(trans, root);
6946 btrfs_release_path(path);
6947 return ret ? ret : err;
6950 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6951 struct extent_record *rec)
6953 struct extent_backref *back, *tmp;
6954 struct data_backref *dback;
6955 struct extent_entry *entry, *best = NULL;
6958 int broken_entries = 0;
6963 * Metadata is easy and the backrefs should always agree on bytenr and
6964 * size, if not we've got bigger issues.
6969 rbtree_postorder_for_each_entry_safe(back, tmp,
6970 &rec->backref_tree, node) {
6971 if (back->full_backref || !back->is_data)
6974 dback = to_data_backref(back);
6977 * We only pay attention to backrefs that we found a real
6980 if (dback->found_ref == 0)
6984 * For now we only catch when the bytes don't match, not the
6985 * bytenr. We can easily do this at the same time, but I want
6986 * to have a fs image to test on before we just add repair
6987 * functionality willy-nilly so we know we won't screw up the
6991 entry = find_entry(&entries, dback->disk_bytenr,
6994 entry = malloc(sizeof(struct extent_entry));
6999 memset(entry, 0, sizeof(*entry));
7000 entry->bytenr = dback->disk_bytenr;
7001 entry->bytes = dback->bytes;
7002 list_add_tail(&entry->list, &entries);
7007 * If we only have on entry we may think the entries agree when
7008 * in reality they don't so we have to do some extra checking.
7010 if (dback->disk_bytenr != rec->start ||
7011 dback->bytes != rec->nr || back->broken)
7022 /* Yay all the backrefs agree, carry on good sir */
7023 if (nr_entries <= 1 && !mismatch)
7026 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7027 "%Lu\n", rec->start);
7030 * First we want to see if the backrefs can agree amongst themselves who
7031 * is right, so figure out which one of the entries has the highest
7034 best = find_most_right_entry(&entries);
7037 * Ok so we may have an even split between what the backrefs think, so
7038 * this is where we use the extent ref to see what it thinks.
7041 entry = find_entry(&entries, rec->start, rec->nr);
7042 if (!entry && (!broken_entries || !rec->found_rec)) {
7043 fprintf(stderr, "Backrefs don't agree with each other "
7044 "and extent record doesn't agree with anybody,"
7045 " so we can't fix bytenr %Lu bytes %Lu\n",
7046 rec->start, rec->nr);
7049 } else if (!entry) {
7051 * Ok our backrefs were broken, we'll assume this is the
7052 * correct value and add an entry for this range.
7054 entry = malloc(sizeof(struct extent_entry));
7059 memset(entry, 0, sizeof(*entry));
7060 entry->bytenr = rec->start;
7061 entry->bytes = rec->nr;
7062 list_add_tail(&entry->list, &entries);
7066 best = find_most_right_entry(&entries);
7068 fprintf(stderr, "Backrefs and extent record evenly "
7069 "split on who is right, this is going to "
7070 "require user input to fix bytenr %Lu bytes "
7071 "%Lu\n", rec->start, rec->nr);
7078 * I don't think this can happen currently as we'll abort() if we catch
7079 * this case higher up, but in case somebody removes that we still can't
7080 * deal with it properly here yet, so just bail out of that's the case.
7082 if (best->bytenr != rec->start) {
7083 fprintf(stderr, "Extent start and backref starts don't match, "
7084 "please use btrfs-image on this file system and send "
7085 "it to a btrfs developer so they can make fsck fix "
7086 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7087 rec->start, rec->nr);
7093 * Ok great we all agreed on an extent record, let's go find the real
7094 * references and fix up the ones that don't match.
7096 rbtree_postorder_for_each_entry_safe(back, tmp,
7097 &rec->backref_tree, node) {
7098 if (back->full_backref || !back->is_data)
7101 dback = to_data_backref(back);
7104 * Still ignoring backrefs that don't have a real ref attached
7107 if (dback->found_ref == 0)
7110 if (dback->bytes == best->bytes &&
7111 dback->disk_bytenr == best->bytenr)
7114 ret = repair_ref(info, path, dback, best);
7120 * Ok we messed with the actual refs, which means we need to drop our
7121 * entire cache and go back and rescan. I know this is a huge pain and
7122 * adds a lot of extra work, but it's the only way to be safe. Once all
7123 * the backrefs agree we may not need to do anything to the extent
7128 while (!list_empty(&entries)) {
7129 entry = list_entry(entries.next, struct extent_entry, list);
7130 list_del_init(&entry->list);
7136 static int process_duplicates(struct btrfs_root *root,
7137 struct cache_tree *extent_cache,
7138 struct extent_record *rec)
7140 struct extent_record *good, *tmp;
7141 struct cache_extent *cache;
7145 * If we found a extent record for this extent then return, or if we
7146 * have more than one duplicate we are likely going to need to delete
7149 if (rec->found_rec || rec->num_duplicates > 1)
7152 /* Shouldn't happen but just in case */
7153 BUG_ON(!rec->num_duplicates);
7156 * So this happens if we end up with a backref that doesn't match the
7157 * actual extent entry. So either the backref is bad or the extent
7158 * entry is bad. Either way we want to have the extent_record actually
7159 * reflect what we found in the extent_tree, so we need to take the
7160 * duplicate out and use that as the extent_record since the only way we
7161 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7163 remove_cache_extent(extent_cache, &rec->cache);
7165 good = to_extent_record(rec->dups.next);
7166 list_del_init(&good->list);
7167 INIT_LIST_HEAD(&good->backrefs);
7168 INIT_LIST_HEAD(&good->dups);
7169 good->cache.start = good->start;
7170 good->cache.size = good->nr;
7171 good->content_checked = 0;
7172 good->owner_ref_checked = 0;
7173 good->num_duplicates = 0;
7174 good->refs = rec->refs;
7175 list_splice_init(&rec->backrefs, &good->backrefs);
7177 cache = lookup_cache_extent(extent_cache, good->start,
7181 tmp = container_of(cache, struct extent_record, cache);
7184 * If we find another overlapping extent and it's found_rec is
7185 * set then it's a duplicate and we need to try and delete
7188 if (tmp->found_rec || tmp->num_duplicates > 0) {
7189 if (list_empty(&good->list))
7190 list_add_tail(&good->list,
7191 &duplicate_extents);
7192 good->num_duplicates += tmp->num_duplicates + 1;
7193 list_splice_init(&tmp->dups, &good->dups);
7194 list_del_init(&tmp->list);
7195 list_add_tail(&tmp->list, &good->dups);
7196 remove_cache_extent(extent_cache, &tmp->cache);
7201 * Ok we have another non extent item backed extent rec, so lets
7202 * just add it to this extent and carry on like we did above.
7204 good->refs += tmp->refs;
7205 list_splice_init(&tmp->backrefs, &good->backrefs);
7206 remove_cache_extent(extent_cache, &tmp->cache);
7209 ret = insert_cache_extent(extent_cache, &good->cache);
7212 return good->num_duplicates ? 0 : 1;
7215 static int delete_duplicate_records(struct btrfs_root *root,
7216 struct extent_record *rec)
7218 struct btrfs_trans_handle *trans;
7219 LIST_HEAD(delete_list);
7220 struct btrfs_path *path;
7221 struct extent_record *tmp, *good, *n;
7224 struct btrfs_key key;
7226 path = btrfs_alloc_path();
7233 /* Find the record that covers all of the duplicates. */
7234 list_for_each_entry(tmp, &rec->dups, list) {
7235 if (good->start < tmp->start)
7237 if (good->nr > tmp->nr)
7240 if (tmp->start + tmp->nr < good->start + good->nr) {
7241 fprintf(stderr, "Ok we have overlapping extents that "
7242 "aren't completely covered by each other, this "
7243 "is going to require more careful thought. "
7244 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7245 tmp->start, tmp->nr, good->start, good->nr);
7252 list_add_tail(&rec->list, &delete_list);
7254 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7257 list_move_tail(&tmp->list, &delete_list);
7260 root = root->fs_info->extent_root;
7261 trans = btrfs_start_transaction(root, 1);
7262 if (IS_ERR(trans)) {
7263 ret = PTR_ERR(trans);
7267 list_for_each_entry(tmp, &delete_list, list) {
7268 if (tmp->found_rec == 0)
7270 key.objectid = tmp->start;
7271 key.type = BTRFS_EXTENT_ITEM_KEY;
7272 key.offset = tmp->nr;
7274 /* Shouldn't happen but just in case */
7275 if (tmp->metadata) {
7276 fprintf(stderr, "Well this shouldn't happen, extent "
7277 "record overlaps but is metadata? "
7278 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7282 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7288 ret = btrfs_del_item(trans, root, path);
7291 btrfs_release_path(path);
7294 err = btrfs_commit_transaction(trans, root);
7298 while (!list_empty(&delete_list)) {
7299 tmp = to_extent_record(delete_list.next);
7300 list_del_init(&tmp->list);
7306 while (!list_empty(&rec->dups)) {
7307 tmp = to_extent_record(rec->dups.next);
7308 list_del_init(&tmp->list);
7312 btrfs_free_path(path);
7314 if (!ret && !nr_del)
7315 rec->num_duplicates = 0;
7317 return ret ? ret : nr_del;
7320 static int find_possible_backrefs(struct btrfs_fs_info *info,
7321 struct btrfs_path *path,
7322 struct cache_tree *extent_cache,
7323 struct extent_record *rec)
7325 struct btrfs_root *root;
7326 struct extent_backref *back, *tmp;
7327 struct data_backref *dback;
7328 struct cache_extent *cache;
7329 struct btrfs_file_extent_item *fi;
7330 struct btrfs_key key;
7334 rbtree_postorder_for_each_entry_safe(back, tmp,
7335 &rec->backref_tree, node) {
7336 /* Don't care about full backrefs (poor unloved backrefs) */
7337 if (back->full_backref || !back->is_data)
7340 dback = to_data_backref(back);
7342 /* We found this one, we don't need to do a lookup */
7343 if (dback->found_ref)
7346 key.objectid = dback->root;
7347 key.type = BTRFS_ROOT_ITEM_KEY;
7348 key.offset = (u64)-1;
7350 root = btrfs_read_fs_root(info, &key);
7352 /* No root, definitely a bad ref, skip */
7353 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7355 /* Other err, exit */
7357 return PTR_ERR(root);
7359 key.objectid = dback->owner;
7360 key.type = BTRFS_EXTENT_DATA_KEY;
7361 key.offset = dback->offset;
7362 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7364 btrfs_release_path(path);
7367 /* Didn't find it, we can carry on */
7372 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7373 struct btrfs_file_extent_item);
7374 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7375 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7376 btrfs_release_path(path);
7377 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7379 struct extent_record *tmp;
7380 tmp = container_of(cache, struct extent_record, cache);
7383 * If we found an extent record for the bytenr for this
7384 * particular backref then we can't add it to our
7385 * current extent record. We only want to add backrefs
7386 * that don't have a corresponding extent item in the
7387 * extent tree since they likely belong to this record
7388 * and we need to fix it if it doesn't match bytenrs.
7394 dback->found_ref += 1;
7395 dback->disk_bytenr = bytenr;
7396 dback->bytes = bytes;
7399 * Set this so the verify backref code knows not to trust the
7400 * values in this backref.
7409 * Record orphan data ref into corresponding root.
7411 * Return 0 if the extent item contains data ref and recorded.
7412 * Return 1 if the extent item contains no useful data ref
7413 * On that case, it may contains only shared_dataref or metadata backref
7414 * or the file extent exists(this should be handled by the extent bytenr
7416 * Return <0 if something goes wrong.
7418 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7419 struct extent_record *rec)
7421 struct btrfs_key key;
7422 struct btrfs_root *dest_root;
7423 struct extent_backref *back, *tmp;
7424 struct data_backref *dback;
7425 struct orphan_data_extent *orphan;
7426 struct btrfs_path *path;
7427 int recorded_data_ref = 0;
7432 path = btrfs_alloc_path();
7435 rbtree_postorder_for_each_entry_safe(back, tmp,
7436 &rec->backref_tree, node) {
7437 if (back->full_backref || !back->is_data ||
7438 !back->found_extent_tree)
7440 dback = to_data_backref(back);
7441 if (dback->found_ref)
7443 key.objectid = dback->root;
7444 key.type = BTRFS_ROOT_ITEM_KEY;
7445 key.offset = (u64)-1;
7447 dest_root = btrfs_read_fs_root(fs_info, &key);
7449 /* For non-exist root we just skip it */
7450 if (IS_ERR(dest_root) || !dest_root)
7453 key.objectid = dback->owner;
7454 key.type = BTRFS_EXTENT_DATA_KEY;
7455 key.offset = dback->offset;
7457 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7459 * For ret < 0, it's OK since the fs-tree may be corrupted,
7460 * we need to record it for inode/file extent rebuild.
7461 * For ret > 0, we record it only for file extent rebuild.
7462 * For ret == 0, the file extent exists but only bytenr
7463 * mismatch, let the original bytenr fix routine to handle,
7469 orphan = malloc(sizeof(*orphan));
7474 INIT_LIST_HEAD(&orphan->list);
7475 orphan->root = dback->root;
7476 orphan->objectid = dback->owner;
7477 orphan->offset = dback->offset;
7478 orphan->disk_bytenr = rec->cache.start;
7479 orphan->disk_len = rec->cache.size;
7480 list_add(&dest_root->orphan_data_extents, &orphan->list);
7481 recorded_data_ref = 1;
7484 btrfs_free_path(path);
7486 return !recorded_data_ref;
7492 * when an incorrect extent item is found, this will delete
7493 * all of the existing entries for it and recreate them
7494 * based on what the tree scan found.
7496 static int fixup_extent_refs(struct btrfs_fs_info *info,
7497 struct cache_tree *extent_cache,
7498 struct extent_record *rec)
7500 struct btrfs_trans_handle *trans = NULL;
7502 struct btrfs_path *path;
7503 struct cache_extent *cache;
7504 struct extent_backref *back, *tmp;
7508 if (rec->flag_block_full_backref)
7509 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7511 path = btrfs_alloc_path();
7515 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7517 * Sometimes the backrefs themselves are so broken they don't
7518 * get attached to any meaningful rec, so first go back and
7519 * check any of our backrefs that we couldn't find and throw
7520 * them into the list if we find the backref so that
7521 * verify_backrefs can figure out what to do.
7523 ret = find_possible_backrefs(info, path, extent_cache, rec);
7528 /* step one, make sure all of the backrefs agree */
7529 ret = verify_backrefs(info, path, rec);
7533 trans = btrfs_start_transaction(info->extent_root, 1);
7534 if (IS_ERR(trans)) {
7535 ret = PTR_ERR(trans);
7539 /* step two, delete all the existing records */
7540 ret = delete_extent_records(trans, info->extent_root, path,
7541 rec->start, rec->max_size);
7546 /* was this block corrupt? If so, don't add references to it */
7547 cache = lookup_cache_extent(info->corrupt_blocks,
7548 rec->start, rec->max_size);
7554 /* step three, recreate all the refs we did find */
7555 rbtree_postorder_for_each_entry_safe(back, tmp,
7556 &rec->backref_tree, node) {
7558 * if we didn't find any references, don't create a
7561 if (!back->found_ref)
7564 rec->bad_full_backref = 0;
7565 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7573 int err = btrfs_commit_transaction(trans, info->extent_root);
7578 btrfs_free_path(path);
7582 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7583 struct extent_record *rec)
7585 struct btrfs_trans_handle *trans;
7586 struct btrfs_root *root = fs_info->extent_root;
7587 struct btrfs_path *path;
7588 struct btrfs_extent_item *ei;
7589 struct btrfs_key key;
7593 key.objectid = rec->start;
7594 if (rec->metadata) {
7595 key.type = BTRFS_METADATA_ITEM_KEY;
7596 key.offset = rec->info_level;
7598 key.type = BTRFS_EXTENT_ITEM_KEY;
7599 key.offset = rec->max_size;
7602 path = btrfs_alloc_path();
7606 trans = btrfs_start_transaction(root, 0);
7607 if (IS_ERR(trans)) {
7608 btrfs_free_path(path);
7609 return PTR_ERR(trans);
7612 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7614 btrfs_free_path(path);
7615 btrfs_commit_transaction(trans, root);
7618 fprintf(stderr, "Didn't find extent for %llu\n",
7619 (unsigned long long)rec->start);
7620 btrfs_free_path(path);
7621 btrfs_commit_transaction(trans, root);
7625 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7626 struct btrfs_extent_item);
7627 flags = btrfs_extent_flags(path->nodes[0], ei);
7628 if (rec->flag_block_full_backref) {
7629 fprintf(stderr, "setting full backref on %llu\n",
7630 (unsigned long long)key.objectid);
7631 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7633 fprintf(stderr, "clearing full backref on %llu\n",
7634 (unsigned long long)key.objectid);
7635 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7637 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7638 btrfs_mark_buffer_dirty(path->nodes[0]);
7639 btrfs_free_path(path);
7640 return btrfs_commit_transaction(trans, root);
7643 /* right now we only prune from the extent allocation tree */
7644 static int prune_one_block(struct btrfs_trans_handle *trans,
7645 struct btrfs_fs_info *info,
7646 struct btrfs_corrupt_block *corrupt)
7649 struct btrfs_path path;
7650 struct extent_buffer *eb;
7654 int level = corrupt->level + 1;
7656 btrfs_init_path(&path);
7658 /* we want to stop at the parent to our busted block */
7659 path.lowest_level = level;
7661 ret = btrfs_search_slot(trans, info->extent_root,
7662 &corrupt->key, &path, -1, 1);
7667 eb = path.nodes[level];
7674 * hopefully the search gave us the block we want to prune,
7675 * lets try that first
7677 slot = path.slots[level];
7678 found = btrfs_node_blockptr(eb, slot);
7679 if (found == corrupt->cache.start)
7682 nritems = btrfs_header_nritems(eb);
7684 /* the search failed, lets scan this node and hope we find it */
7685 for (slot = 0; slot < nritems; slot++) {
7686 found = btrfs_node_blockptr(eb, slot);
7687 if (found == corrupt->cache.start)
7691 * we couldn't find the bad block. TODO, search all the nodes for pointers
7694 if (eb == info->extent_root->node) {
7699 btrfs_release_path(&path);
7704 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7705 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7708 btrfs_release_path(&path);
7712 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7714 struct btrfs_trans_handle *trans = NULL;
7715 struct cache_extent *cache;
7716 struct btrfs_corrupt_block *corrupt;
7719 cache = search_cache_extent(info->corrupt_blocks, 0);
7723 trans = btrfs_start_transaction(info->extent_root, 1);
7725 return PTR_ERR(trans);
7727 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7728 prune_one_block(trans, info, corrupt);
7729 remove_cache_extent(info->corrupt_blocks, cache);
7732 return btrfs_commit_transaction(trans, info->extent_root);
7736 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7738 struct btrfs_block_group_cache *cache;
7743 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7744 &start, &end, EXTENT_DIRTY);
7747 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7753 cache = btrfs_lookup_first_block_group(fs_info, start);
7758 start = cache->key.objectid + cache->key.offset;
7762 static int check_extent_refs(struct btrfs_root *root,
7763 struct cache_tree *extent_cache)
7765 struct extent_record *rec;
7766 struct cache_extent *cache;
7775 * if we're doing a repair, we have to make sure
7776 * we don't allocate from the problem extents.
7777 * In the worst case, this will be all the
7780 cache = search_cache_extent(extent_cache, 0);
7782 rec = container_of(cache, struct extent_record, cache);
7783 set_extent_dirty(root->fs_info->excluded_extents,
7785 rec->start + rec->max_size - 1,
7787 cache = next_cache_extent(cache);
7790 /* pin down all the corrupted blocks too */
7791 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7793 set_extent_dirty(root->fs_info->excluded_extents,
7795 cache->start + cache->size - 1,
7797 cache = next_cache_extent(cache);
7799 prune_corrupt_blocks(root->fs_info);
7800 reset_cached_block_groups(root->fs_info);
7803 reset_cached_block_groups(root->fs_info);
7806 * We need to delete any duplicate entries we find first otherwise we
7807 * could mess up the extent tree when we have backrefs that actually
7808 * belong to a different extent item and not the weird duplicate one.
7810 while (repair && !list_empty(&duplicate_extents)) {
7811 rec = to_extent_record(duplicate_extents.next);
7812 list_del_init(&rec->list);
7814 /* Sometimes we can find a backref before we find an actual
7815 * extent, so we need to process it a little bit to see if there
7816 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7817 * if this is a backref screwup. If we need to delete stuff
7818 * process_duplicates() will return 0, otherwise it will return
7821 if (process_duplicates(root, extent_cache, rec))
7823 ret = delete_duplicate_records(root, rec);
7827 * delete_duplicate_records will return the number of entries
7828 * deleted, so if it's greater than 0 then we know we actually
7829 * did something and we need to remove.
7843 cache = search_cache_extent(extent_cache, 0);
7846 rec = container_of(cache, struct extent_record, cache);
7847 if (rec->num_duplicates) {
7848 fprintf(stderr, "extent item %llu has multiple extent "
7849 "items\n", (unsigned long long)rec->start);
7854 if (rec->refs != rec->extent_item_refs) {
7855 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7856 (unsigned long long)rec->start,
7857 (unsigned long long)rec->nr);
7858 fprintf(stderr, "extent item %llu, found %llu\n",
7859 (unsigned long long)rec->extent_item_refs,
7860 (unsigned long long)rec->refs);
7861 ret = record_orphan_data_extents(root->fs_info, rec);
7868 * we can't use the extent to repair file
7869 * extent, let the fallback method handle it.
7871 if (!fixed && repair) {
7872 ret = fixup_extent_refs(
7883 if (all_backpointers_checked(rec, 1)) {
7884 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7885 (unsigned long long)rec->start,
7886 (unsigned long long)rec->nr);
7888 if (!fixed && !recorded && repair) {
7889 ret = fixup_extent_refs(root->fs_info,
7898 if (!rec->owner_ref_checked) {
7899 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7900 (unsigned long long)rec->start,
7901 (unsigned long long)rec->nr);
7902 if (!fixed && !recorded && repair) {
7903 ret = fixup_extent_refs(root->fs_info,
7912 if (rec->bad_full_backref) {
7913 fprintf(stderr, "bad full backref, on [%llu]\n",
7914 (unsigned long long)rec->start);
7916 ret = fixup_extent_flags(root->fs_info, rec);
7925 * Although it's not a extent ref's problem, we reuse this
7926 * routine for error reporting.
7927 * No repair function yet.
7929 if (rec->crossing_stripes) {
7931 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7932 rec->start, rec->start + rec->max_size);
7937 if (rec->wrong_chunk_type) {
7939 "bad extent [%llu, %llu), type mismatch with chunk\n",
7940 rec->start, rec->start + rec->max_size);
7945 remove_cache_extent(extent_cache, cache);
7946 free_all_extent_backrefs(rec);
7947 if (!init_extent_tree && repair && (!cur_err || fixed))
7948 clear_extent_dirty(root->fs_info->excluded_extents,
7950 rec->start + rec->max_size - 1,
7956 if (ret && ret != -EAGAIN) {
7957 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7960 struct btrfs_trans_handle *trans;
7962 root = root->fs_info->extent_root;
7963 trans = btrfs_start_transaction(root, 1);
7964 if (IS_ERR(trans)) {
7965 ret = PTR_ERR(trans);
7969 btrfs_fix_block_accounting(trans, root);
7970 ret = btrfs_commit_transaction(trans, root);
7975 fprintf(stderr, "repaired damaged extent references\n");
7981 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7985 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7986 stripe_size = length;
7987 stripe_size /= num_stripes;
7988 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7989 stripe_size = length * 2;
7990 stripe_size /= num_stripes;
7991 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7992 stripe_size = length;
7993 stripe_size /= (num_stripes - 1);
7994 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7995 stripe_size = length;
7996 stripe_size /= (num_stripes - 2);
7998 stripe_size = length;
8004 * Check the chunk with its block group/dev list ref:
8005 * Return 0 if all refs seems valid.
8006 * Return 1 if part of refs seems valid, need later check for rebuild ref
8007 * like missing block group and needs to search extent tree to rebuild them.
8008 * Return -1 if essential refs are missing and unable to rebuild.
8010 static int check_chunk_refs(struct chunk_record *chunk_rec,
8011 struct block_group_tree *block_group_cache,
8012 struct device_extent_tree *dev_extent_cache,
8015 struct cache_extent *block_group_item;
8016 struct block_group_record *block_group_rec;
8017 struct cache_extent *dev_extent_item;
8018 struct device_extent_record *dev_extent_rec;
8022 int metadump_v2 = 0;
8026 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8029 if (block_group_item) {
8030 block_group_rec = container_of(block_group_item,
8031 struct block_group_record,
8033 if (chunk_rec->length != block_group_rec->offset ||
8034 chunk_rec->offset != block_group_rec->objectid ||
8036 chunk_rec->type_flags != block_group_rec->flags)) {
8039 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8040 chunk_rec->objectid,
8045 chunk_rec->type_flags,
8046 block_group_rec->objectid,
8047 block_group_rec->type,
8048 block_group_rec->offset,
8049 block_group_rec->offset,
8050 block_group_rec->objectid,
8051 block_group_rec->flags);
8054 list_del_init(&block_group_rec->list);
8055 chunk_rec->bg_rec = block_group_rec;
8060 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8061 chunk_rec->objectid,
8066 chunk_rec->type_flags);
8073 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8074 chunk_rec->num_stripes);
8075 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8076 devid = chunk_rec->stripes[i].devid;
8077 offset = chunk_rec->stripes[i].offset;
8078 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8079 devid, offset, length);
8080 if (dev_extent_item) {
8081 dev_extent_rec = container_of(dev_extent_item,
8082 struct device_extent_record,
8084 if (dev_extent_rec->objectid != devid ||
8085 dev_extent_rec->offset != offset ||
8086 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8087 dev_extent_rec->length != length) {
8090 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8091 chunk_rec->objectid,
8094 chunk_rec->stripes[i].devid,
8095 chunk_rec->stripes[i].offset,
8096 dev_extent_rec->objectid,
8097 dev_extent_rec->offset,
8098 dev_extent_rec->length);
8101 list_move(&dev_extent_rec->chunk_list,
8102 &chunk_rec->dextents);
8107 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8108 chunk_rec->objectid,
8111 chunk_rec->stripes[i].devid,
8112 chunk_rec->stripes[i].offset);
8119 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8120 int check_chunks(struct cache_tree *chunk_cache,
8121 struct block_group_tree *block_group_cache,
8122 struct device_extent_tree *dev_extent_cache,
8123 struct list_head *good, struct list_head *bad,
8124 struct list_head *rebuild, int silent)
8126 struct cache_extent *chunk_item;
8127 struct chunk_record *chunk_rec;
8128 struct block_group_record *bg_rec;
8129 struct device_extent_record *dext_rec;
8133 chunk_item = first_cache_extent(chunk_cache);
8134 while (chunk_item) {
8135 chunk_rec = container_of(chunk_item, struct chunk_record,
8137 err = check_chunk_refs(chunk_rec, block_group_cache,
8138 dev_extent_cache, silent);
8141 if (err == 0 && good)
8142 list_add_tail(&chunk_rec->list, good);
8143 if (err > 0 && rebuild)
8144 list_add_tail(&chunk_rec->list, rebuild);
8146 list_add_tail(&chunk_rec->list, bad);
8147 chunk_item = next_cache_extent(chunk_item);
8150 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8153 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8161 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8165 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8176 static int check_device_used(struct device_record *dev_rec,
8177 struct device_extent_tree *dext_cache)
8179 struct cache_extent *cache;
8180 struct device_extent_record *dev_extent_rec;
8183 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8185 dev_extent_rec = container_of(cache,
8186 struct device_extent_record,
8188 if (dev_extent_rec->objectid != dev_rec->devid)
8191 list_del_init(&dev_extent_rec->device_list);
8192 total_byte += dev_extent_rec->length;
8193 cache = next_cache_extent(cache);
8196 if (total_byte != dev_rec->byte_used) {
8198 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8199 total_byte, dev_rec->byte_used, dev_rec->objectid,
8200 dev_rec->type, dev_rec->offset);
8207 /* check btrfs_dev_item -> btrfs_dev_extent */
8208 static int check_devices(struct rb_root *dev_cache,
8209 struct device_extent_tree *dev_extent_cache)
8211 struct rb_node *dev_node;
8212 struct device_record *dev_rec;
8213 struct device_extent_record *dext_rec;
8217 dev_node = rb_first(dev_cache);
8219 dev_rec = container_of(dev_node, struct device_record, node);
8220 err = check_device_used(dev_rec, dev_extent_cache);
8224 dev_node = rb_next(dev_node);
8226 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8229 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8230 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8237 static int add_root_item_to_list(struct list_head *head,
8238 u64 objectid, u64 bytenr, u64 last_snapshot,
8239 u8 level, u8 drop_level,
8240 int level_size, struct btrfs_key *drop_key)
8243 struct root_item_record *ri_rec;
8244 ri_rec = malloc(sizeof(*ri_rec));
8247 ri_rec->bytenr = bytenr;
8248 ri_rec->objectid = objectid;
8249 ri_rec->level = level;
8250 ri_rec->level_size = level_size;
8251 ri_rec->drop_level = drop_level;
8252 ri_rec->last_snapshot = last_snapshot;
8254 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8255 list_add_tail(&ri_rec->list, head);
8260 static void free_root_item_list(struct list_head *list)
8262 struct root_item_record *ri_rec;
8264 while (!list_empty(list)) {
8265 ri_rec = list_first_entry(list, struct root_item_record,
8267 list_del_init(&ri_rec->list);
8272 static int deal_root_from_list(struct list_head *list,
8273 struct btrfs_root *root,
8274 struct block_info *bits,
8276 struct cache_tree *pending,
8277 struct cache_tree *seen,
8278 struct cache_tree *reada,
8279 struct cache_tree *nodes,
8280 struct cache_tree *extent_cache,
8281 struct cache_tree *chunk_cache,
8282 struct rb_root *dev_cache,
8283 struct block_group_tree *block_group_cache,
8284 struct device_extent_tree *dev_extent_cache)
8289 while (!list_empty(list)) {
8290 struct root_item_record *rec;
8291 struct extent_buffer *buf;
8292 rec = list_entry(list->next,
8293 struct root_item_record, list);
8295 buf = read_tree_block(root->fs_info->tree_root,
8296 rec->bytenr, rec->level_size, 0);
8297 if (!extent_buffer_uptodate(buf)) {
8298 free_extent_buffer(buf);
8302 add_root_to_pending(buf, extent_cache, pending,
8303 seen, nodes, rec->objectid);
8305 * To rebuild extent tree, we need deal with snapshot
8306 * one by one, otherwise we deal with node firstly which
8307 * can maximize readahead.
8310 ret = run_next_block(root, bits, bits_nr, &last,
8311 pending, seen, reada, nodes,
8312 extent_cache, chunk_cache,
8313 dev_cache, block_group_cache,
8314 dev_extent_cache, rec);
8318 free_extent_buffer(buf);
8319 list_del(&rec->list);
8325 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8326 reada, nodes, extent_cache, chunk_cache,
8327 dev_cache, block_group_cache,
8328 dev_extent_cache, NULL);
8338 static int check_chunks_and_extents(struct btrfs_root *root)
8340 struct rb_root dev_cache;
8341 struct cache_tree chunk_cache;
8342 struct block_group_tree block_group_cache;
8343 struct device_extent_tree dev_extent_cache;
8344 struct cache_tree extent_cache;
8345 struct cache_tree seen;
8346 struct cache_tree pending;
8347 struct cache_tree reada;
8348 struct cache_tree nodes;
8349 struct extent_io_tree excluded_extents;
8350 struct cache_tree corrupt_blocks;
8351 struct btrfs_path path;
8352 struct btrfs_key key;
8353 struct btrfs_key found_key;
8355 struct block_info *bits;
8357 struct extent_buffer *leaf;
8359 struct btrfs_root_item ri;
8360 struct list_head dropping_trees;
8361 struct list_head normal_trees;
8362 struct btrfs_root *root1;
8367 dev_cache = RB_ROOT;
8368 cache_tree_init(&chunk_cache);
8369 block_group_tree_init(&block_group_cache);
8370 device_extent_tree_init(&dev_extent_cache);
8372 cache_tree_init(&extent_cache);
8373 cache_tree_init(&seen);
8374 cache_tree_init(&pending);
8375 cache_tree_init(&nodes);
8376 cache_tree_init(&reada);
8377 cache_tree_init(&corrupt_blocks);
8378 extent_io_tree_init(&excluded_extents);
8379 INIT_LIST_HEAD(&dropping_trees);
8380 INIT_LIST_HEAD(&normal_trees);
8383 root->fs_info->excluded_extents = &excluded_extents;
8384 root->fs_info->fsck_extent_cache = &extent_cache;
8385 root->fs_info->free_extent_hook = free_extent_hook;
8386 root->fs_info->corrupt_blocks = &corrupt_blocks;
8390 bits = malloc(bits_nr * sizeof(struct block_info));
8396 if (ctx.progress_enabled) {
8397 ctx.tp = TASK_EXTENTS;
8398 task_start(ctx.info);
8402 root1 = root->fs_info->tree_root;
8403 level = btrfs_header_level(root1->node);
8404 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8405 root1->node->start, 0, level, 0,
8406 root1->nodesize, NULL);
8409 root1 = root->fs_info->chunk_root;
8410 level = btrfs_header_level(root1->node);
8411 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8412 root1->node->start, 0, level, 0,
8413 root1->nodesize, NULL);
8416 btrfs_init_path(&path);
8419 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8420 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8425 leaf = path.nodes[0];
8426 slot = path.slots[0];
8427 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8428 ret = btrfs_next_leaf(root, &path);
8431 leaf = path.nodes[0];
8432 slot = path.slots[0];
8434 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8435 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8436 unsigned long offset;
8439 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8440 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8441 last_snapshot = btrfs_root_last_snapshot(&ri);
8442 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8443 level = btrfs_root_level(&ri);
8444 level_size = root->nodesize;
8445 ret = add_root_item_to_list(&normal_trees,
8447 btrfs_root_bytenr(&ri),
8448 last_snapshot, level,
8449 0, level_size, NULL);
8453 level = btrfs_root_level(&ri);
8454 level_size = root->nodesize;
8455 objectid = found_key.objectid;
8456 btrfs_disk_key_to_cpu(&found_key,
8458 ret = add_root_item_to_list(&dropping_trees,
8460 btrfs_root_bytenr(&ri),
8461 last_snapshot, level,
8463 level_size, &found_key);
8470 btrfs_release_path(&path);
8473 * check_block can return -EAGAIN if it fixes something, please keep
8474 * this in mind when dealing with return values from these functions, if
8475 * we get -EAGAIN we want to fall through and restart the loop.
8477 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8478 &seen, &reada, &nodes, &extent_cache,
8479 &chunk_cache, &dev_cache, &block_group_cache,
8486 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8487 &pending, &seen, &reada, &nodes,
8488 &extent_cache, &chunk_cache, &dev_cache,
8489 &block_group_cache, &dev_extent_cache);
8496 ret = check_chunks(&chunk_cache, &block_group_cache,
8497 &dev_extent_cache, NULL, NULL, NULL, 0);
8504 ret = check_extent_refs(root, &extent_cache);
8511 ret = check_devices(&dev_cache, &dev_extent_cache);
8516 task_stop(ctx.info);
8518 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8519 extent_io_tree_cleanup(&excluded_extents);
8520 root->fs_info->fsck_extent_cache = NULL;
8521 root->fs_info->free_extent_hook = NULL;
8522 root->fs_info->corrupt_blocks = NULL;
8523 root->fs_info->excluded_extents = NULL;
8526 free_chunk_cache_tree(&chunk_cache);
8527 free_device_cache_tree(&dev_cache);
8528 free_block_group_tree(&block_group_cache);
8529 free_device_extent_tree(&dev_extent_cache);
8530 free_extent_cache_tree(&seen);
8531 free_extent_cache_tree(&pending);
8532 free_extent_cache_tree(&reada);
8533 free_extent_cache_tree(&nodes);
8536 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8537 free_extent_cache_tree(&seen);
8538 free_extent_cache_tree(&pending);
8539 free_extent_cache_tree(&reada);
8540 free_extent_cache_tree(&nodes);
8541 free_chunk_cache_tree(&chunk_cache);
8542 free_block_group_tree(&block_group_cache);
8543 free_device_cache_tree(&dev_cache);
8544 free_device_extent_tree(&dev_extent_cache);
8545 free_extent_record_cache(root->fs_info, &extent_cache);
8546 free_root_item_list(&normal_trees);
8547 free_root_item_list(&dropping_trees);
8548 extent_io_tree_cleanup(&excluded_extents);
8553 * Check backrefs of a tree block given by @bytenr or @eb.
8555 * @root: the root containing the @bytenr or @eb
8556 * @eb: tree block extent buffer, can be NULL
8557 * @bytenr: bytenr of the tree block to search
8558 * @level: tree level of the tree block
8559 * @owner: owner of the tree block
8561 * Return >0 for any error found and output error message
8562 * Return 0 for no error found
8564 static int check_tree_block_ref(struct btrfs_root *root,
8565 struct extent_buffer *eb, u64 bytenr,
8566 int level, u64 owner)
8568 struct btrfs_key key;
8569 struct btrfs_root *extent_root = root->fs_info->extent_root;
8570 struct btrfs_path path;
8571 struct btrfs_extent_item *ei;
8572 struct btrfs_extent_inline_ref *iref;
8573 struct extent_buffer *leaf;
8579 u32 nodesize = root->nodesize;
8586 btrfs_init_path(&path);
8587 key.objectid = bytenr;
8588 if (btrfs_fs_incompat(root->fs_info,
8589 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8590 key.type = BTRFS_METADATA_ITEM_KEY;
8592 key.type = BTRFS_EXTENT_ITEM_KEY;
8593 key.offset = (u64)-1;
8595 /* Search for the backref in extent tree */
8596 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8598 err |= BACKREF_MISSING;
8601 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8603 err |= BACKREF_MISSING;
8607 leaf = path.nodes[0];
8608 slot = path.slots[0];
8609 btrfs_item_key_to_cpu(leaf, &key, slot);
8611 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8613 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8614 skinny_level = (int)key.offset;
8615 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8617 struct btrfs_tree_block_info *info;
8619 info = (struct btrfs_tree_block_info *)(ei + 1);
8620 skinny_level = btrfs_tree_block_level(leaf, info);
8621 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8628 if (!(btrfs_extent_flags(leaf, ei) &
8629 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8631 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8632 key.objectid, nodesize,
8633 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8634 err = BACKREF_MISMATCH;
8636 header_gen = btrfs_header_generation(eb);
8637 extent_gen = btrfs_extent_generation(leaf, ei);
8638 if (header_gen != extent_gen) {
8640 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8641 key.objectid, nodesize, header_gen,
8643 err = BACKREF_MISMATCH;
8645 if (level != skinny_level) {
8647 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8648 key.objectid, nodesize, level, skinny_level);
8649 err = BACKREF_MISMATCH;
8651 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8653 "extent[%llu %u] is referred by other roots than %llu",
8654 key.objectid, nodesize, root->objectid);
8655 err = BACKREF_MISMATCH;
8660 * Iterate the extent/metadata item to find the exact backref
8662 item_size = btrfs_item_size_nr(leaf, slot);
8663 ptr = (unsigned long)iref;
8664 end = (unsigned long)ei + item_size;
8666 iref = (struct btrfs_extent_inline_ref *)ptr;
8667 type = btrfs_extent_inline_ref_type(leaf, iref);
8668 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8670 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8671 (offset == root->objectid || offset == owner)) {
8673 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8674 /* Check if the backref points to valid referencer */
8675 found_ref = !check_tree_block_ref(root, NULL, offset,
8681 ptr += btrfs_extent_inline_ref_size(type);
8685 * Inlined extent item doesn't have what we need, check
8686 * TREE_BLOCK_REF_KEY
8689 btrfs_release_path(&path);
8690 key.objectid = bytenr;
8691 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8692 key.offset = root->objectid;
8694 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8699 err |= BACKREF_MISSING;
8701 btrfs_release_path(&path);
8702 if (eb && (err & BACKREF_MISSING))
8703 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8704 bytenr, nodesize, owner, level);
8709 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8711 * Return >0 any error found and output error message
8712 * Return 0 for no error found
8714 static int check_extent_data_item(struct btrfs_root *root,
8715 struct extent_buffer *eb, int slot)
8717 struct btrfs_file_extent_item *fi;
8718 struct btrfs_path path;
8719 struct btrfs_root *extent_root = root->fs_info->extent_root;
8720 struct btrfs_key fi_key;
8721 struct btrfs_key dbref_key;
8722 struct extent_buffer *leaf;
8723 struct btrfs_extent_item *ei;
8724 struct btrfs_extent_inline_ref *iref;
8725 struct btrfs_extent_data_ref *dref;
8727 u64 file_extent_gen;
8730 u64 extent_num_bytes;
8738 int found_dbackref = 0;
8742 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8743 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8744 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8746 /* Nothing to check for hole and inline data extents */
8747 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8748 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8751 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8752 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8753 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8755 /* Check unaligned disk_num_bytes and num_bytes */
8756 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8758 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8759 fi_key.objectid, fi_key.offset, disk_num_bytes,
8761 err |= BYTES_UNALIGNED;
8763 data_bytes_allocated += disk_num_bytes;
8765 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8767 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8768 fi_key.objectid, fi_key.offset, extent_num_bytes,
8770 err |= BYTES_UNALIGNED;
8772 data_bytes_referenced += extent_num_bytes;
8774 owner = btrfs_header_owner(eb);
8776 /* Check the extent item of the file extent in extent tree */
8777 btrfs_init_path(&path);
8778 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8779 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8780 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8782 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8784 err |= BACKREF_MISSING;
8788 leaf = path.nodes[0];
8789 slot = path.slots[0];
8790 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8792 extent_flags = btrfs_extent_flags(leaf, ei);
8793 extent_gen = btrfs_extent_generation(leaf, ei);
8795 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8797 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8798 disk_bytenr, disk_num_bytes,
8799 BTRFS_EXTENT_FLAG_DATA);
8800 err |= BACKREF_MISMATCH;
8803 if (file_extent_gen < extent_gen) {
8805 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8806 disk_bytenr, disk_num_bytes, file_extent_gen,
8808 err |= BACKREF_MISMATCH;
8811 /* Check data backref inside that extent item */
8812 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8813 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8814 ptr = (unsigned long)iref;
8815 end = (unsigned long)ei + item_size;
8817 iref = (struct btrfs_extent_inline_ref *)ptr;
8818 type = btrfs_extent_inline_ref_type(leaf, iref);
8819 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8821 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8822 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8823 if (ref_root == owner || ref_root == root->objectid)
8825 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8826 found_dbackref = !check_tree_block_ref(root, NULL,
8827 btrfs_extent_inline_ref_offset(leaf, iref),
8833 ptr += btrfs_extent_inline_ref_size(type);
8836 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8837 if (!found_dbackref) {
8838 btrfs_release_path(&path);
8840 btrfs_init_path(&path);
8841 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8842 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8843 dbref_key.offset = hash_extent_data_ref(root->objectid,
8844 fi_key.objectid, fi_key.offset);
8846 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8847 &dbref_key, &path, 0, 0);
8852 if (!found_dbackref)
8853 err |= BACKREF_MISSING;
8855 btrfs_release_path(&path);
8856 if (err & BACKREF_MISSING) {
8857 error("data extent[%llu %llu] backref lost",
8858 disk_bytenr, disk_num_bytes);
8864 * Get real tree block level for the case like shared block
8865 * Return >= 0 as tree level
8866 * Return <0 for error
8868 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8870 struct extent_buffer *eb;
8871 struct btrfs_path path;
8872 struct btrfs_key key;
8873 struct btrfs_extent_item *ei;
8876 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8881 /* Search extent tree for extent generation and level */
8882 key.objectid = bytenr;
8883 key.type = BTRFS_METADATA_ITEM_KEY;
8884 key.offset = (u64)-1;
8886 btrfs_init_path(&path);
8887 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8890 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8898 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8899 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8900 struct btrfs_extent_item);
8901 flags = btrfs_extent_flags(path.nodes[0], ei);
8902 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8907 /* Get transid for later read_tree_block() check */
8908 transid = btrfs_extent_generation(path.nodes[0], ei);
8910 /* Get backref level as one source */
8911 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8912 backref_level = key.offset;
8914 struct btrfs_tree_block_info *info;
8916 info = (struct btrfs_tree_block_info *)(ei + 1);
8917 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8919 btrfs_release_path(&path);
8921 /* Get level from tree block as an alternative source */
8922 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8923 if (!extent_buffer_uptodate(eb)) {
8924 free_extent_buffer(eb);
8927 header_level = btrfs_header_level(eb);
8928 free_extent_buffer(eb);
8930 if (header_level != backref_level)
8932 return header_level;
8935 btrfs_release_path(&path);
8939 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8940 struct btrfs_root *root, int overwrite)
8942 struct extent_buffer *c;
8943 struct extent_buffer *old = root->node;
8946 struct btrfs_disk_key disk_key = {0,0,0};
8952 extent_buffer_get(c);
8955 c = btrfs_alloc_free_block(trans, root,
8957 root->root_key.objectid,
8958 &disk_key, level, 0, 0);
8961 extent_buffer_get(c);
8965 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8966 btrfs_set_header_level(c, level);
8967 btrfs_set_header_bytenr(c, c->start);
8968 btrfs_set_header_generation(c, trans->transid);
8969 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8970 btrfs_set_header_owner(c, root->root_key.objectid);
8972 write_extent_buffer(c, root->fs_info->fsid,
8973 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8975 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8976 btrfs_header_chunk_tree_uuid(c),
8979 btrfs_mark_buffer_dirty(c);
8981 * this case can happen in the following case:
8983 * 1.overwrite previous root.
8985 * 2.reinit reloc data root, this is because we skip pin
8986 * down reloc data tree before which means we can allocate
8987 * same block bytenr here.
8989 if (old->start == c->start) {
8990 btrfs_set_root_generation(&root->root_item,
8992 root->root_item.level = btrfs_header_level(root->node);
8993 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8994 &root->root_key, &root->root_item);
8996 free_extent_buffer(c);
9000 free_extent_buffer(old);
9002 add_root_to_dirty_list(root);
9006 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9007 struct extent_buffer *eb, int tree_root)
9009 struct extent_buffer *tmp;
9010 struct btrfs_root_item *ri;
9011 struct btrfs_key key;
9014 int level = btrfs_header_level(eb);
9020 * If we have pinned this block before, don't pin it again.
9021 * This can not only avoid forever loop with broken filesystem
9022 * but also give us some speedups.
9024 if (test_range_bit(&fs_info->pinned_extents, eb->start,
9025 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9028 btrfs_pin_extent(fs_info, eb->start, eb->len);
9030 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9031 nritems = btrfs_header_nritems(eb);
9032 for (i = 0; i < nritems; i++) {
9034 btrfs_item_key_to_cpu(eb, &key, i);
9035 if (key.type != BTRFS_ROOT_ITEM_KEY)
9037 /* Skip the extent root and reloc roots */
9038 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9039 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9040 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9042 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9043 bytenr = btrfs_disk_root_bytenr(eb, ri);
9046 * If at any point we start needing the real root we
9047 * will have to build a stump root for the root we are
9048 * in, but for now this doesn't actually use the root so
9049 * just pass in extent_root.
9051 tmp = read_tree_block(fs_info->extent_root, bytenr,
9053 if (!extent_buffer_uptodate(tmp)) {
9054 fprintf(stderr, "Error reading root block\n");
9057 ret = pin_down_tree_blocks(fs_info, tmp, 0);
9058 free_extent_buffer(tmp);
9062 bytenr = btrfs_node_blockptr(eb, i);
9064 /* If we aren't the tree root don't read the block */
9065 if (level == 1 && !tree_root) {
9066 btrfs_pin_extent(fs_info, bytenr, nodesize);
9070 tmp = read_tree_block(fs_info->extent_root, bytenr,
9072 if (!extent_buffer_uptodate(tmp)) {
9073 fprintf(stderr, "Error reading tree block\n");
9076 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9077 free_extent_buffer(tmp);
9086 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9090 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9094 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9097 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9099 struct btrfs_block_group_cache *cache;
9100 struct btrfs_path *path;
9101 struct extent_buffer *leaf;
9102 struct btrfs_chunk *chunk;
9103 struct btrfs_key key;
9107 path = btrfs_alloc_path();
9112 key.type = BTRFS_CHUNK_ITEM_KEY;
9115 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9117 btrfs_free_path(path);
9122 * We do this in case the block groups were screwed up and had alloc
9123 * bits that aren't actually set on the chunks. This happens with
9124 * restored images every time and could happen in real life I guess.
9126 fs_info->avail_data_alloc_bits = 0;
9127 fs_info->avail_metadata_alloc_bits = 0;
9128 fs_info->avail_system_alloc_bits = 0;
9130 /* First we need to create the in-memory block groups */
9132 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9133 ret = btrfs_next_leaf(fs_info->chunk_root, path);
9135 btrfs_free_path(path);
9143 leaf = path->nodes[0];
9144 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9145 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9150 chunk = btrfs_item_ptr(leaf, path->slots[0],
9151 struct btrfs_chunk);
9152 btrfs_add_block_group(fs_info, 0,
9153 btrfs_chunk_type(leaf, chunk),
9154 key.objectid, key.offset,
9155 btrfs_chunk_length(leaf, chunk));
9156 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9157 key.offset + btrfs_chunk_length(leaf, chunk),
9163 cache = btrfs_lookup_first_block_group(fs_info, start);
9167 start = cache->key.objectid + cache->key.offset;
9170 btrfs_free_path(path);
9174 static int reset_balance(struct btrfs_trans_handle *trans,
9175 struct btrfs_fs_info *fs_info)
9177 struct btrfs_root *root = fs_info->tree_root;
9178 struct btrfs_path *path;
9179 struct extent_buffer *leaf;
9180 struct btrfs_key key;
9181 int del_slot, del_nr = 0;
9185 path = btrfs_alloc_path();
9189 key.objectid = BTRFS_BALANCE_OBJECTID;
9190 key.type = BTRFS_BALANCE_ITEM_KEY;
9193 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9198 goto reinit_data_reloc;
9203 ret = btrfs_del_item(trans, root, path);
9206 btrfs_release_path(path);
9208 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9209 key.type = BTRFS_ROOT_ITEM_KEY;
9212 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9216 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9221 ret = btrfs_del_items(trans, root, path,
9228 btrfs_release_path(path);
9231 ret = btrfs_search_slot(trans, root, &key, path,
9238 leaf = path->nodes[0];
9239 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9240 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9242 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9247 del_slot = path->slots[0];
9256 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9260 btrfs_release_path(path);
9263 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9264 key.type = BTRFS_ROOT_ITEM_KEY;
9265 key.offset = (u64)-1;
9266 root = btrfs_read_fs_root(fs_info, &key);
9268 fprintf(stderr, "Error reading data reloc tree\n");
9269 ret = PTR_ERR(root);
9272 record_root_in_trans(trans, root);
9273 ret = btrfs_fsck_reinit_root(trans, root, 0);
9276 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9278 btrfs_free_path(path);
9282 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9283 struct btrfs_fs_info *fs_info)
9289 * The only reason we don't do this is because right now we're just
9290 * walking the trees we find and pinning down their bytes, we don't look
9291 * at any of the leaves. In order to do mixed groups we'd have to check
9292 * the leaves of any fs roots and pin down the bytes for any file
9293 * extents we find. Not hard but why do it if we don't have to?
9295 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9296 fprintf(stderr, "We don't support re-initing the extent tree "
9297 "for mixed block groups yet, please notify a btrfs "
9298 "developer you want to do this so they can add this "
9299 "functionality.\n");
9304 * first we need to walk all of the trees except the extent tree and pin
9305 * down the bytes that are in use so we don't overwrite any existing
9308 ret = pin_metadata_blocks(fs_info);
9310 fprintf(stderr, "error pinning down used bytes\n");
9315 * Need to drop all the block groups since we're going to recreate all
9318 btrfs_free_block_groups(fs_info);
9319 ret = reset_block_groups(fs_info);
9321 fprintf(stderr, "error resetting the block groups\n");
9325 /* Ok we can allocate now, reinit the extent root */
9326 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9328 fprintf(stderr, "extent root initialization failed\n");
9330 * When the transaction code is updated we should end the
9331 * transaction, but for now progs only knows about commit so
9332 * just return an error.
9338 * Now we have all the in-memory block groups setup so we can make
9339 * allocations properly, and the metadata we care about is safe since we
9340 * pinned all of it above.
9343 struct btrfs_block_group_cache *cache;
9345 cache = btrfs_lookup_first_block_group(fs_info, start);
9348 start = cache->key.objectid + cache->key.offset;
9349 ret = btrfs_insert_item(trans, fs_info->extent_root,
9350 &cache->key, &cache->item,
9351 sizeof(cache->item));
9353 fprintf(stderr, "Error adding block group\n");
9356 btrfs_extent_post_op(trans, fs_info->extent_root);
9359 ret = reset_balance(trans, fs_info);
9361 fprintf(stderr, "error resetting the pending balance\n");
9366 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9368 struct btrfs_path *path;
9369 struct btrfs_trans_handle *trans;
9370 struct btrfs_key key;
9373 printf("Recowing metadata block %llu\n", eb->start);
9374 key.objectid = btrfs_header_owner(eb);
9375 key.type = BTRFS_ROOT_ITEM_KEY;
9376 key.offset = (u64)-1;
9378 root = btrfs_read_fs_root(root->fs_info, &key);
9380 fprintf(stderr, "Couldn't find owner root %llu\n",
9382 return PTR_ERR(root);
9385 path = btrfs_alloc_path();
9389 trans = btrfs_start_transaction(root, 1);
9390 if (IS_ERR(trans)) {
9391 btrfs_free_path(path);
9392 return PTR_ERR(trans);
9395 path->lowest_level = btrfs_header_level(eb);
9396 if (path->lowest_level)
9397 btrfs_node_key_to_cpu(eb, &key, 0);
9399 btrfs_item_key_to_cpu(eb, &key, 0);
9401 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9402 btrfs_commit_transaction(trans, root);
9403 btrfs_free_path(path);
9407 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9409 struct btrfs_path *path;
9410 struct btrfs_trans_handle *trans;
9411 struct btrfs_key key;
9414 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9415 bad->key.type, bad->key.offset);
9416 key.objectid = bad->root_id;
9417 key.type = BTRFS_ROOT_ITEM_KEY;
9418 key.offset = (u64)-1;
9420 root = btrfs_read_fs_root(root->fs_info, &key);
9422 fprintf(stderr, "Couldn't find owner root %llu\n",
9424 return PTR_ERR(root);
9427 path = btrfs_alloc_path();
9431 trans = btrfs_start_transaction(root, 1);
9432 if (IS_ERR(trans)) {
9433 btrfs_free_path(path);
9434 return PTR_ERR(trans);
9437 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9443 ret = btrfs_del_item(trans, root, path);
9445 btrfs_commit_transaction(trans, root);
9446 btrfs_free_path(path);
9450 static int zero_log_tree(struct btrfs_root *root)
9452 struct btrfs_trans_handle *trans;
9455 trans = btrfs_start_transaction(root, 1);
9456 if (IS_ERR(trans)) {
9457 ret = PTR_ERR(trans);
9460 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9461 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9462 ret = btrfs_commit_transaction(trans, root);
9466 static int populate_csum(struct btrfs_trans_handle *trans,
9467 struct btrfs_root *csum_root, char *buf, u64 start,
9474 while (offset < len) {
9475 sectorsize = csum_root->sectorsize;
9476 ret = read_extent_data(csum_root, buf, start + offset,
9480 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9481 start + offset, buf, sectorsize);
9484 offset += sectorsize;
9489 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9490 struct btrfs_root *csum_root,
9491 struct btrfs_root *cur_root)
9493 struct btrfs_path *path;
9494 struct btrfs_key key;
9495 struct extent_buffer *node;
9496 struct btrfs_file_extent_item *fi;
9503 path = btrfs_alloc_path();
9506 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9516 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9519 /* Iterate all regular file extents and fill its csum */
9521 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9523 if (key.type != BTRFS_EXTENT_DATA_KEY)
9525 node = path->nodes[0];
9526 slot = path->slots[0];
9527 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9528 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9530 start = btrfs_file_extent_disk_bytenr(node, fi);
9531 len = btrfs_file_extent_disk_num_bytes(node, fi);
9533 ret = populate_csum(trans, csum_root, buf, start, len);
9540 * TODO: if next leaf is corrupted, jump to nearest next valid
9543 ret = btrfs_next_item(cur_root, path);
9553 btrfs_free_path(path);
9558 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9559 struct btrfs_root *csum_root)
9561 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9562 struct btrfs_path *path;
9563 struct btrfs_root *tree_root = fs_info->tree_root;
9564 struct btrfs_root *cur_root;
9565 struct extent_buffer *node;
9566 struct btrfs_key key;
9570 path = btrfs_alloc_path();
9574 key.objectid = BTRFS_FS_TREE_OBJECTID;
9576 key.type = BTRFS_ROOT_ITEM_KEY;
9578 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9587 node = path->nodes[0];
9588 slot = path->slots[0];
9589 btrfs_item_key_to_cpu(node, &key, slot);
9590 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9592 if (key.type != BTRFS_ROOT_ITEM_KEY)
9594 if (!is_fstree(key.objectid))
9596 key.offset = (u64)-1;
9598 cur_root = btrfs_read_fs_root(fs_info, &key);
9599 if (IS_ERR(cur_root) || !cur_root) {
9600 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9604 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9609 ret = btrfs_next_item(tree_root, path);
9619 btrfs_free_path(path);
9623 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9624 struct btrfs_root *csum_root)
9626 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9627 struct btrfs_path *path;
9628 struct btrfs_extent_item *ei;
9629 struct extent_buffer *leaf;
9631 struct btrfs_key key;
9634 path = btrfs_alloc_path();
9639 key.type = BTRFS_EXTENT_ITEM_KEY;
9642 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9644 btrfs_free_path(path);
9648 buf = malloc(csum_root->sectorsize);
9650 btrfs_free_path(path);
9655 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9656 ret = btrfs_next_leaf(extent_root, path);
9664 leaf = path->nodes[0];
9666 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9667 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9672 ei = btrfs_item_ptr(leaf, path->slots[0],
9673 struct btrfs_extent_item);
9674 if (!(btrfs_extent_flags(leaf, ei) &
9675 BTRFS_EXTENT_FLAG_DATA)) {
9680 ret = populate_csum(trans, csum_root, buf, key.objectid,
9687 btrfs_free_path(path);
9693 * Recalculate the csum and put it into the csum tree.
9695 * Extent tree init will wipe out all the extent info, so in that case, we
9696 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9697 * will use fs/subvol trees to init the csum tree.
9699 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9700 struct btrfs_root *csum_root,
9704 return fill_csum_tree_from_fs(trans, csum_root);
9706 return fill_csum_tree_from_extent(trans, csum_root);
9709 static void free_roots_info_cache(void)
9711 if (!roots_info_cache)
9714 while (!cache_tree_empty(roots_info_cache)) {
9715 struct cache_extent *entry;
9716 struct root_item_info *rii;
9718 entry = first_cache_extent(roots_info_cache);
9721 remove_cache_extent(roots_info_cache, entry);
9722 rii = container_of(entry, struct root_item_info, cache_extent);
9726 free(roots_info_cache);
9727 roots_info_cache = NULL;
9730 static int build_roots_info_cache(struct btrfs_fs_info *info)
9733 struct btrfs_key key;
9734 struct extent_buffer *leaf;
9735 struct btrfs_path *path;
9737 if (!roots_info_cache) {
9738 roots_info_cache = malloc(sizeof(*roots_info_cache));
9739 if (!roots_info_cache)
9741 cache_tree_init(roots_info_cache);
9744 path = btrfs_alloc_path();
9749 key.type = BTRFS_EXTENT_ITEM_KEY;
9752 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9755 leaf = path->nodes[0];
9758 struct btrfs_key found_key;
9759 struct btrfs_extent_item *ei;
9760 struct btrfs_extent_inline_ref *iref;
9761 int slot = path->slots[0];
9766 struct cache_extent *entry;
9767 struct root_item_info *rii;
9769 if (slot >= btrfs_header_nritems(leaf)) {
9770 ret = btrfs_next_leaf(info->extent_root, path);
9777 leaf = path->nodes[0];
9778 slot = path->slots[0];
9781 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9783 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9784 found_key.type != BTRFS_METADATA_ITEM_KEY)
9787 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9788 flags = btrfs_extent_flags(leaf, ei);
9790 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9791 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9794 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9795 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9796 level = found_key.offset;
9798 struct btrfs_tree_block_info *binfo;
9800 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9801 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9802 level = btrfs_tree_block_level(leaf, binfo);
9806 * For a root extent, it must be of the following type and the
9807 * first (and only one) iref in the item.
9809 type = btrfs_extent_inline_ref_type(leaf, iref);
9810 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9813 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9814 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9816 rii = malloc(sizeof(struct root_item_info));
9821 rii->cache_extent.start = root_id;
9822 rii->cache_extent.size = 1;
9823 rii->level = (u8)-1;
9824 entry = &rii->cache_extent;
9825 ret = insert_cache_extent(roots_info_cache, entry);
9828 rii = container_of(entry, struct root_item_info,
9832 ASSERT(rii->cache_extent.start == root_id);
9833 ASSERT(rii->cache_extent.size == 1);
9835 if (level > rii->level || rii->level == (u8)-1) {
9837 rii->bytenr = found_key.objectid;
9838 rii->gen = btrfs_extent_generation(leaf, ei);
9839 rii->node_count = 1;
9840 } else if (level == rii->level) {
9848 btrfs_free_path(path);
9853 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9854 struct btrfs_path *path,
9855 const struct btrfs_key *root_key,
9856 const int read_only_mode)
9858 const u64 root_id = root_key->objectid;
9859 struct cache_extent *entry;
9860 struct root_item_info *rii;
9861 struct btrfs_root_item ri;
9862 unsigned long offset;
9864 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9867 "Error: could not find extent items for root %llu\n",
9868 root_key->objectid);
9872 rii = container_of(entry, struct root_item_info, cache_extent);
9873 ASSERT(rii->cache_extent.start == root_id);
9874 ASSERT(rii->cache_extent.size == 1);
9876 if (rii->node_count != 1) {
9878 "Error: could not find btree root extent for root %llu\n",
9883 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9884 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9886 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9887 btrfs_root_level(&ri) != rii->level ||
9888 btrfs_root_generation(&ri) != rii->gen) {
9891 * If we're in repair mode but our caller told us to not update
9892 * the root item, i.e. just check if it needs to be updated, don't
9893 * print this message, since the caller will call us again shortly
9894 * for the same root item without read only mode (the caller will
9895 * open a transaction first).
9897 if (!(read_only_mode && repair))
9899 "%sroot item for root %llu,"
9900 " current bytenr %llu, current gen %llu, current level %u,"
9901 " new bytenr %llu, new gen %llu, new level %u\n",
9902 (read_only_mode ? "" : "fixing "),
9904 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9905 btrfs_root_level(&ri),
9906 rii->bytenr, rii->gen, rii->level);
9908 if (btrfs_root_generation(&ri) > rii->gen) {
9910 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9911 root_id, btrfs_root_generation(&ri), rii->gen);
9915 if (!read_only_mode) {
9916 btrfs_set_root_bytenr(&ri, rii->bytenr);
9917 btrfs_set_root_level(&ri, rii->level);
9918 btrfs_set_root_generation(&ri, rii->gen);
9919 write_extent_buffer(path->nodes[0], &ri,
9920 offset, sizeof(ri));
9930 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9931 * caused read-only snapshots to be corrupted if they were created at a moment
9932 * when the source subvolume/snapshot had orphan items. The issue was that the
9933 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9934 * node instead of the post orphan cleanup root node.
9935 * So this function, and its callees, just detects and fixes those cases. Even
9936 * though the regression was for read-only snapshots, this function applies to
9937 * any snapshot/subvolume root.
9938 * This must be run before any other repair code - not doing it so, makes other
9939 * repair code delete or modify backrefs in the extent tree for example, which
9940 * will result in an inconsistent fs after repairing the root items.
9942 static int repair_root_items(struct btrfs_fs_info *info)
9944 struct btrfs_path *path = NULL;
9945 struct btrfs_key key;
9946 struct extent_buffer *leaf;
9947 struct btrfs_trans_handle *trans = NULL;
9952 ret = build_roots_info_cache(info);
9956 path = btrfs_alloc_path();
9962 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9963 key.type = BTRFS_ROOT_ITEM_KEY;
9968 * Avoid opening and committing transactions if a leaf doesn't have
9969 * any root items that need to be fixed, so that we avoid rotating
9970 * backup roots unnecessarily.
9973 trans = btrfs_start_transaction(info->tree_root, 1);
9974 if (IS_ERR(trans)) {
9975 ret = PTR_ERR(trans);
9980 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9984 leaf = path->nodes[0];
9987 struct btrfs_key found_key;
9989 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9990 int no_more_keys = find_next_key(path, &key);
9992 btrfs_release_path(path);
9994 ret = btrfs_commit_transaction(trans,
10006 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10008 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10010 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10013 ret = maybe_repair_root_item(info, path, &found_key,
10018 if (!trans && repair) {
10021 btrfs_release_path(path);
10031 free_roots_info_cache();
10032 btrfs_free_path(path);
10034 btrfs_commit_transaction(trans, info->tree_root);
10041 const char * const cmd_check_usage[] = {
10042 "btrfs check [options] <device>",
10043 "Check structural integrity of a filesystem (unmounted).",
10044 "Check structural integrity of an unmounted filesystem. Verify internal",
10045 "trees' consistency and item connectivity. In the repair mode try to",
10046 "fix the problems found.",
10047 "WARNING: the repair mode is considered dangerous",
10049 "-s|--super <superblock> use this superblock copy",
10050 "-b|--backup use the first valid backup root copy",
10051 "--repair try to repair the filesystem",
10052 "--readonly run in read-only mode (default)",
10053 "--init-csum-tree create a new CRC tree",
10054 "--init-extent-tree create a new extent tree",
10055 "--check-data-csum verify checksums of data blocks",
10056 "-Q|--qgroup-report print a report on qgroup consistency",
10057 "-E|--subvol-extents <subvolid>",
10058 " print subvolume extents and sharing state",
10059 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
10060 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
10061 "-p|--progress indicate progress",
10065 int cmd_check(int argc, char **argv)
10067 struct cache_tree root_cache;
10068 struct btrfs_root *root;
10069 struct btrfs_fs_info *info;
10072 u64 tree_root_bytenr = 0;
10073 u64 chunk_root_bytenr = 0;
10074 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10077 int init_csum_tree = 0;
10079 int qgroup_report = 0;
10080 int qgroups_repaired = 0;
10081 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10085 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10086 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10087 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10088 static const struct option long_options[] = {
10089 { "super", required_argument, NULL, 's' },
10090 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10091 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10092 { "init-csum-tree", no_argument, NULL,
10093 GETOPT_VAL_INIT_CSUM },
10094 { "init-extent-tree", no_argument, NULL,
10095 GETOPT_VAL_INIT_EXTENT },
10096 { "check-data-csum", no_argument, NULL,
10097 GETOPT_VAL_CHECK_CSUM },
10098 { "backup", no_argument, NULL, 'b' },
10099 { "subvol-extents", required_argument, NULL, 'E' },
10100 { "qgroup-report", no_argument, NULL, 'Q' },
10101 { "tree-root", required_argument, NULL, 'r' },
10102 { "chunk-root", required_argument, NULL,
10103 GETOPT_VAL_CHUNK_TREE },
10104 { "progress", no_argument, NULL, 'p' },
10105 { NULL, 0, NULL, 0}
10108 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10112 case 'a': /* ignored */ break;
10114 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10117 num = arg_strtou64(optarg);
10118 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10120 "ERROR: super mirror should be less than: %d\n",
10121 BTRFS_SUPER_MIRROR_MAX);
10124 bytenr = btrfs_sb_offset(((int)num));
10125 printf("using SB copy %llu, bytenr %llu\n", num,
10126 (unsigned long long)bytenr);
10132 subvolid = arg_strtou64(optarg);
10135 tree_root_bytenr = arg_strtou64(optarg);
10137 case GETOPT_VAL_CHUNK_TREE:
10138 chunk_root_bytenr = arg_strtou64(optarg);
10141 ctx.progress_enabled = true;
10145 usage(cmd_check_usage);
10146 case GETOPT_VAL_REPAIR:
10147 printf("enabling repair mode\n");
10149 ctree_flags |= OPEN_CTREE_WRITES;
10151 case GETOPT_VAL_READONLY:
10154 case GETOPT_VAL_INIT_CSUM:
10155 printf("Creating a new CRC tree\n");
10156 init_csum_tree = 1;
10158 ctree_flags |= OPEN_CTREE_WRITES;
10160 case GETOPT_VAL_INIT_EXTENT:
10161 init_extent_tree = 1;
10162 ctree_flags |= (OPEN_CTREE_WRITES |
10163 OPEN_CTREE_NO_BLOCK_GROUPS);
10166 case GETOPT_VAL_CHECK_CSUM:
10167 check_data_csum = 1;
10172 if (check_argc_exact(argc - optind, 1))
10173 usage(cmd_check_usage);
10175 if (ctx.progress_enabled) {
10176 ctx.tp = TASK_NOTHING;
10177 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10180 /* This check is the only reason for --readonly to exist */
10181 if (readonly && repair) {
10182 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10187 cache_tree_init(&root_cache);
10189 if((ret = check_mounted(argv[optind])) < 0) {
10190 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10193 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10198 /* only allow partial opening under repair mode */
10200 ctree_flags |= OPEN_CTREE_PARTIAL;
10202 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10203 chunk_root_bytenr, ctree_flags);
10205 fprintf(stderr, "Couldn't open file system\n");
10210 global_info = info;
10211 root = info->fs_root;
10214 * repair mode will force us to commit transaction which
10215 * will make us fail to load log tree when mounting.
10217 if (repair && btrfs_super_log_root(info->super_copy)) {
10218 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10223 ret = zero_log_tree(root);
10225 fprintf(stderr, "fail to zero log tree\n");
10230 uuid_unparse(info->super_copy->fsid, uuidbuf);
10231 if (qgroup_report) {
10232 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10234 ret = qgroup_verify_all(info);
10240 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10241 subvolid, argv[optind], uuidbuf);
10242 ret = print_extent_state(info, subvolid);
10245 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10247 if (!extent_buffer_uptodate(info->tree_root->node) ||
10248 !extent_buffer_uptodate(info->dev_root->node) ||
10249 !extent_buffer_uptodate(info->chunk_root->node)) {
10250 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10255 if (init_extent_tree || init_csum_tree) {
10256 struct btrfs_trans_handle *trans;
10258 trans = btrfs_start_transaction(info->extent_root, 0);
10259 if (IS_ERR(trans)) {
10260 fprintf(stderr, "Error starting transaction\n");
10261 ret = PTR_ERR(trans);
10265 if (init_extent_tree) {
10266 printf("Creating a new extent tree\n");
10267 ret = reinit_extent_tree(trans, info);
10272 if (init_csum_tree) {
10273 fprintf(stderr, "Reinit crc root\n");
10274 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10276 fprintf(stderr, "crc root initialization failed\n");
10281 ret = fill_csum_tree(trans, info->csum_root,
10284 fprintf(stderr, "crc refilling failed\n");
10289 * Ok now we commit and run the normal fsck, which will add
10290 * extent entries for all of the items it finds.
10292 ret = btrfs_commit_transaction(trans, info->extent_root);
10296 if (!extent_buffer_uptodate(info->extent_root->node)) {
10297 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10301 if (!extent_buffer_uptodate(info->csum_root->node)) {
10302 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10307 if (!ctx.progress_enabled)
10308 fprintf(stderr, "checking extents\n");
10309 ret = check_chunks_and_extents(root);
10311 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10313 ret = repair_root_items(info);
10317 fprintf(stderr, "Fixed %d roots.\n", ret);
10319 } else if (ret > 0) {
10321 "Found %d roots with an outdated root item.\n",
10324 "Please run a filesystem check with the option --repair to fix them.\n");
10329 if (!ctx.progress_enabled) {
10330 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10331 fprintf(stderr, "checking free space tree\n");
10333 fprintf(stderr, "checking free space cache\n");
10335 ret = check_space_cache(root);
10340 * We used to have to have these hole extents in between our real
10341 * extents so if we don't have this flag set we need to make sure there
10342 * are no gaps in the file extents for inodes, otherwise we can just
10343 * ignore it when this happens.
10345 no_holes = btrfs_fs_incompat(root->fs_info,
10346 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10347 if (!ctx.progress_enabled)
10348 fprintf(stderr, "checking fs roots\n");
10349 ret = check_fs_roots(root, &root_cache);
10353 fprintf(stderr, "checking csums\n");
10354 ret = check_csums(root);
10358 fprintf(stderr, "checking root refs\n");
10359 ret = check_root_refs(root, &root_cache);
10363 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10364 struct extent_buffer *eb;
10366 eb = list_first_entry(&root->fs_info->recow_ebs,
10367 struct extent_buffer, recow);
10368 list_del_init(&eb->recow);
10369 ret = recow_extent_buffer(root, eb);
10374 while (!list_empty(&delete_items)) {
10375 struct bad_item *bad;
10377 bad = list_first_entry(&delete_items, struct bad_item, list);
10378 list_del_init(&bad->list);
10380 ret = delete_bad_item(root, bad);
10384 if (info->quota_enabled) {
10386 fprintf(stderr, "checking quota groups\n");
10387 err = qgroup_verify_all(info);
10391 err = repair_qgroups(info, &qgroups_repaired);
10396 if (!list_empty(&root->fs_info->recow_ebs)) {
10397 fprintf(stderr, "Transid errors in file system\n");
10401 /* Don't override original ret */
10402 if (!ret && qgroups_repaired)
10403 ret = qgroups_repaired;
10405 if (found_old_backref) { /*
10406 * there was a disk format change when mixed
10407 * backref was in testing tree. The old format
10408 * existed about one week.
10410 printf("\n * Found old mixed backref format. "
10411 "The old format is not supported! *"
10412 "\n * Please mount the FS in readonly mode, "
10413 "backup data and re-format the FS. *\n\n");
10416 printf("found %llu bytes used err is %d\n",
10417 (unsigned long long)bytes_used, ret);
10418 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10419 printf("total tree bytes: %llu\n",
10420 (unsigned long long)total_btree_bytes);
10421 printf("total fs tree bytes: %llu\n",
10422 (unsigned long long)total_fs_tree_bytes);
10423 printf("total extent tree bytes: %llu\n",
10424 (unsigned long long)total_extent_tree_bytes);
10425 printf("btree space waste bytes: %llu\n",
10426 (unsigned long long)btree_space_waste);
10427 printf("file data blocks allocated: %llu\n referenced %llu\n",
10428 (unsigned long long)data_bytes_allocated,
10429 (unsigned long long)data_bytes_referenced);
10431 free_qgroup_counts();
10432 free_root_recs_tree(&root_cache);
10436 if (ctx.progress_enabled)
10437 task_deinit(ctx.info);