2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
438 * Error bit for low memory mode check.
440 * Currently no caller cares about it yet. Just internal use for error
443 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
448 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
449 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
450 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
451 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
453 static void *print_status_check(void *p)
455 struct task_ctx *priv = p;
456 const char work_indicator[] = { '.', 'o', 'O', 'o' };
458 static char *task_position_string[] = {
460 "checking free space cache",
464 task_period_start(priv->info, 1000 /* 1s */);
466 if (priv->tp == TASK_NOTHING)
470 printf("%s [%c]\r", task_position_string[priv->tp],
471 work_indicator[count % 4]);
474 task_period_wait(priv->info);
479 static int print_status_return(void *p)
487 /* Compatible function to allow reuse of old codes */
488 static u64 first_extent_gap(struct rb_root *holes)
490 struct file_extent_hole *hole;
492 if (RB_EMPTY_ROOT(holes))
495 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
499 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
501 struct file_extent_hole *hole1;
502 struct file_extent_hole *hole2;
504 hole1 = rb_entry(node1, struct file_extent_hole, node);
505 hole2 = rb_entry(node2, struct file_extent_hole, node);
507 if (hole1->start > hole2->start)
509 if (hole1->start < hole2->start)
511 /* Now hole1->start == hole2->start */
512 if (hole1->len >= hole2->len)
514 * Hole 1 will be merge center
515 * Same hole will be merged later
518 /* Hole 2 will be merge center */
523 * Add a hole to the record
525 * This will do hole merge for copy_file_extent_holes(),
526 * which will ensure there won't be continuous holes.
528 static int add_file_extent_hole(struct rb_root *holes,
531 struct file_extent_hole *hole;
532 struct file_extent_hole *prev = NULL;
533 struct file_extent_hole *next = NULL;
535 hole = malloc(sizeof(*hole));
540 /* Since compare will not return 0, no -EEXIST will happen */
541 rb_insert(holes, &hole->node, compare_hole);
543 /* simple merge with previous hole */
544 if (rb_prev(&hole->node))
545 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
547 if (prev && prev->start + prev->len >= hole->start) {
548 hole->len = hole->start + hole->len - prev->start;
549 hole->start = prev->start;
550 rb_erase(&prev->node, holes);
555 /* iterate merge with next holes */
557 if (!rb_next(&hole->node))
559 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
561 if (hole->start + hole->len >= next->start) {
562 if (hole->start + hole->len <= next->start + next->len)
563 hole->len = next->start + next->len -
565 rb_erase(&next->node, holes);
574 static int compare_hole_range(struct rb_node *node, void *data)
576 struct file_extent_hole *hole;
579 hole = (struct file_extent_hole *)data;
582 hole = rb_entry(node, struct file_extent_hole, node);
583 if (start < hole->start)
585 if (start >= hole->start && start < hole->start + hole->len)
591 * Delete a hole in the record
593 * This will do the hole split and is much restrict than add.
595 static int del_file_extent_hole(struct rb_root *holes,
598 struct file_extent_hole *hole;
599 struct file_extent_hole tmp;
604 struct rb_node *node;
611 node = rb_search(holes, &tmp, compare_hole_range, NULL);
614 hole = rb_entry(node, struct file_extent_hole, node);
615 if (start + len > hole->start + hole->len)
619 * Now there will be no overlap, delete the hole and re-add the
620 * split(s) if they exists.
622 if (start > hole->start) {
623 prev_start = hole->start;
624 prev_len = start - hole->start;
627 if (hole->start + hole->len > start + len) {
628 next_start = start + len;
629 next_len = hole->start + hole->len - start - len;
632 rb_erase(node, holes);
635 ret = add_file_extent_hole(holes, prev_start, prev_len);
640 ret = add_file_extent_hole(holes, next_start, next_len);
647 static int copy_file_extent_holes(struct rb_root *dst,
650 struct file_extent_hole *hole;
651 struct rb_node *node;
654 node = rb_first(src);
656 hole = rb_entry(node, struct file_extent_hole, node);
657 ret = add_file_extent_hole(dst, hole->start, hole->len);
660 node = rb_next(node);
665 static void free_file_extent_holes(struct rb_root *holes)
667 struct rb_node *node;
668 struct file_extent_hole *hole;
670 node = rb_first(holes);
672 hole = rb_entry(node, struct file_extent_hole, node);
673 rb_erase(node, holes);
675 node = rb_first(holes);
679 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
681 static void record_root_in_trans(struct btrfs_trans_handle *trans,
682 struct btrfs_root *root)
684 if (root->last_trans != trans->transid) {
685 root->track_dirty = 1;
686 root->last_trans = trans->transid;
687 root->commit_root = root->node;
688 extent_buffer_get(root->node);
692 static u8 imode_to_type(u32 imode)
695 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
696 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
697 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
698 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
699 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
700 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
701 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
702 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
705 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
709 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
711 struct device_record *rec1;
712 struct device_record *rec2;
714 rec1 = rb_entry(node1, struct device_record, node);
715 rec2 = rb_entry(node2, struct device_record, node);
716 if (rec1->devid > rec2->devid)
718 else if (rec1->devid < rec2->devid)
724 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
726 struct inode_record *rec;
727 struct inode_backref *backref;
728 struct inode_backref *orig;
729 struct inode_backref *tmp;
730 struct orphan_data_extent *src_orphan;
731 struct orphan_data_extent *dst_orphan;
735 rec = malloc(sizeof(*rec));
737 return ERR_PTR(-ENOMEM);
738 memcpy(rec, orig_rec, sizeof(*rec));
740 INIT_LIST_HEAD(&rec->backrefs);
741 INIT_LIST_HEAD(&rec->orphan_extents);
742 rec->holes = RB_ROOT;
744 list_for_each_entry(orig, &orig_rec->backrefs, list) {
745 size = sizeof(*orig) + orig->namelen + 1;
746 backref = malloc(size);
751 memcpy(backref, orig, size);
752 list_add_tail(&backref->list, &rec->backrefs);
754 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
755 dst_orphan = malloc(sizeof(*dst_orphan));
760 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
761 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
763 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
769 if (!list_empty(&rec->backrefs))
770 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
771 list_del(&orig->list);
775 if (!list_empty(&rec->orphan_extents))
776 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
777 list_del(&orig->list);
786 static void print_orphan_data_extents(struct list_head *orphan_extents,
789 struct orphan_data_extent *orphan;
791 if (list_empty(orphan_extents))
793 printf("The following data extent is lost in tree %llu:\n",
795 list_for_each_entry(orphan, orphan_extents, list) {
796 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
797 orphan->objectid, orphan->offset, orphan->disk_bytenr,
802 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
804 u64 root_objectid = root->root_key.objectid;
805 int errors = rec->errors;
809 /* reloc root errors, we print its corresponding fs root objectid*/
810 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
811 root_objectid = root->root_key.offset;
812 fprintf(stderr, "reloc");
814 fprintf(stderr, "root %llu inode %llu errors %x",
815 (unsigned long long) root_objectid,
816 (unsigned long long) rec->ino, rec->errors);
818 if (errors & I_ERR_NO_INODE_ITEM)
819 fprintf(stderr, ", no inode item");
820 if (errors & I_ERR_NO_ORPHAN_ITEM)
821 fprintf(stderr, ", no orphan item");
822 if (errors & I_ERR_DUP_INODE_ITEM)
823 fprintf(stderr, ", dup inode item");
824 if (errors & I_ERR_DUP_DIR_INDEX)
825 fprintf(stderr, ", dup dir index");
826 if (errors & I_ERR_ODD_DIR_ITEM)
827 fprintf(stderr, ", odd dir item");
828 if (errors & I_ERR_ODD_FILE_EXTENT)
829 fprintf(stderr, ", odd file extent");
830 if (errors & I_ERR_BAD_FILE_EXTENT)
831 fprintf(stderr, ", bad file extent");
832 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
833 fprintf(stderr, ", file extent overlap");
834 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
835 fprintf(stderr, ", file extent discount");
836 if (errors & I_ERR_DIR_ISIZE_WRONG)
837 fprintf(stderr, ", dir isize wrong");
838 if (errors & I_ERR_FILE_NBYTES_WRONG)
839 fprintf(stderr, ", nbytes wrong");
840 if (errors & I_ERR_ODD_CSUM_ITEM)
841 fprintf(stderr, ", odd csum item");
842 if (errors & I_ERR_SOME_CSUM_MISSING)
843 fprintf(stderr, ", some csum missing");
844 if (errors & I_ERR_LINK_COUNT_WRONG)
845 fprintf(stderr, ", link count wrong");
846 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
847 fprintf(stderr, ", orphan file extent");
848 fprintf(stderr, "\n");
849 /* Print the orphan extents if needed */
850 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
851 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
853 /* Print the holes if needed */
854 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
855 struct file_extent_hole *hole;
856 struct rb_node *node;
859 node = rb_first(&rec->holes);
860 fprintf(stderr, "Found file extent holes:\n");
863 hole = rb_entry(node, struct file_extent_hole, node);
864 fprintf(stderr, "\tstart: %llu, len: %llu\n",
865 hole->start, hole->len);
866 node = rb_next(node);
869 fprintf(stderr, "\tstart: 0, len: %llu\n",
870 round_up(rec->isize, root->sectorsize));
874 static void print_ref_error(int errors)
876 if (errors & REF_ERR_NO_DIR_ITEM)
877 fprintf(stderr, ", no dir item");
878 if (errors & REF_ERR_NO_DIR_INDEX)
879 fprintf(stderr, ", no dir index");
880 if (errors & REF_ERR_NO_INODE_REF)
881 fprintf(stderr, ", no inode ref");
882 if (errors & REF_ERR_DUP_DIR_ITEM)
883 fprintf(stderr, ", dup dir item");
884 if (errors & REF_ERR_DUP_DIR_INDEX)
885 fprintf(stderr, ", dup dir index");
886 if (errors & REF_ERR_DUP_INODE_REF)
887 fprintf(stderr, ", dup inode ref");
888 if (errors & REF_ERR_INDEX_UNMATCH)
889 fprintf(stderr, ", index mismatch");
890 if (errors & REF_ERR_FILETYPE_UNMATCH)
891 fprintf(stderr, ", filetype mismatch");
892 if (errors & REF_ERR_NAME_TOO_LONG)
893 fprintf(stderr, ", name too long");
894 if (errors & REF_ERR_NO_ROOT_REF)
895 fprintf(stderr, ", no root ref");
896 if (errors & REF_ERR_NO_ROOT_BACKREF)
897 fprintf(stderr, ", no root backref");
898 if (errors & REF_ERR_DUP_ROOT_REF)
899 fprintf(stderr, ", dup root ref");
900 if (errors & REF_ERR_DUP_ROOT_BACKREF)
901 fprintf(stderr, ", dup root backref");
902 fprintf(stderr, "\n");
905 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
908 struct ptr_node *node;
909 struct cache_extent *cache;
910 struct inode_record *rec = NULL;
913 cache = lookup_cache_extent(inode_cache, ino, 1);
915 node = container_of(cache, struct ptr_node, cache);
917 if (mod && rec->refs > 1) {
918 node->data = clone_inode_rec(rec);
919 if (IS_ERR(node->data))
925 rec = calloc(1, sizeof(*rec));
927 return ERR_PTR(-ENOMEM);
929 rec->extent_start = (u64)-1;
931 INIT_LIST_HEAD(&rec->backrefs);
932 INIT_LIST_HEAD(&rec->orphan_extents);
933 rec->holes = RB_ROOT;
935 node = malloc(sizeof(*node));
938 return ERR_PTR(-ENOMEM);
940 node->cache.start = ino;
941 node->cache.size = 1;
944 if (ino == BTRFS_FREE_INO_OBJECTID)
947 ret = insert_cache_extent(inode_cache, &node->cache);
949 return ERR_PTR(-EEXIST);
954 static void free_orphan_data_extents(struct list_head *orphan_extents)
956 struct orphan_data_extent *orphan;
958 while (!list_empty(orphan_extents)) {
959 orphan = list_entry(orphan_extents->next,
960 struct orphan_data_extent, list);
961 list_del(&orphan->list);
966 static void free_inode_rec(struct inode_record *rec)
968 struct inode_backref *backref;
973 while (!list_empty(&rec->backrefs)) {
974 backref = to_inode_backref(rec->backrefs.next);
975 list_del(&backref->list);
978 free_orphan_data_extents(&rec->orphan_extents);
979 free_file_extent_holes(&rec->holes);
983 static int can_free_inode_rec(struct inode_record *rec)
985 if (!rec->errors && rec->checked && rec->found_inode_item &&
986 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
991 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
992 struct inode_record *rec)
994 struct cache_extent *cache;
995 struct inode_backref *tmp, *backref;
996 struct ptr_node *node;
997 unsigned char filetype;
999 if (!rec->found_inode_item)
1002 filetype = imode_to_type(rec->imode);
1003 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1004 if (backref->found_dir_item && backref->found_dir_index) {
1005 if (backref->filetype != filetype)
1006 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1007 if (!backref->errors && backref->found_inode_ref &&
1008 rec->nlink == rec->found_link) {
1009 list_del(&backref->list);
1015 if (!rec->checked || rec->merging)
1018 if (S_ISDIR(rec->imode)) {
1019 if (rec->found_size != rec->isize)
1020 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1021 if (rec->found_file_extent)
1022 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1023 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1024 if (rec->found_dir_item)
1025 rec->errors |= I_ERR_ODD_DIR_ITEM;
1026 if (rec->found_size != rec->nbytes)
1027 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1028 if (rec->nlink > 0 && !no_holes &&
1029 (rec->extent_end < rec->isize ||
1030 first_extent_gap(&rec->holes) < rec->isize))
1031 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1034 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1035 if (rec->found_csum_item && rec->nodatasum)
1036 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1037 if (rec->some_csum_missing && !rec->nodatasum)
1038 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1041 BUG_ON(rec->refs != 1);
1042 if (can_free_inode_rec(rec)) {
1043 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1044 node = container_of(cache, struct ptr_node, cache);
1045 BUG_ON(node->data != rec);
1046 remove_cache_extent(inode_cache, &node->cache);
1048 free_inode_rec(rec);
1052 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1054 struct btrfs_path path;
1055 struct btrfs_key key;
1058 key.objectid = BTRFS_ORPHAN_OBJECTID;
1059 key.type = BTRFS_ORPHAN_ITEM_KEY;
1062 btrfs_init_path(&path);
1063 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1064 btrfs_release_path(&path);
1070 static int process_inode_item(struct extent_buffer *eb,
1071 int slot, struct btrfs_key *key,
1072 struct shared_node *active_node)
1074 struct inode_record *rec;
1075 struct btrfs_inode_item *item;
1077 rec = active_node->current;
1078 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1079 if (rec->found_inode_item) {
1080 rec->errors |= I_ERR_DUP_INODE_ITEM;
1083 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1084 rec->nlink = btrfs_inode_nlink(eb, item);
1085 rec->isize = btrfs_inode_size(eb, item);
1086 rec->nbytes = btrfs_inode_nbytes(eb, item);
1087 rec->imode = btrfs_inode_mode(eb, item);
1088 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1090 rec->found_inode_item = 1;
1091 if (rec->nlink == 0)
1092 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1093 maybe_free_inode_rec(&active_node->inode_cache, rec);
1097 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1099 int namelen, u64 dir)
1101 struct inode_backref *backref;
1103 list_for_each_entry(backref, &rec->backrefs, list) {
1104 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1106 if (backref->dir != dir || backref->namelen != namelen)
1108 if (memcmp(name, backref->name, namelen))
1113 backref = malloc(sizeof(*backref) + namelen + 1);
1116 memset(backref, 0, sizeof(*backref));
1118 backref->namelen = namelen;
1119 memcpy(backref->name, name, namelen);
1120 backref->name[namelen] = '\0';
1121 list_add_tail(&backref->list, &rec->backrefs);
1125 static int add_inode_backref(struct cache_tree *inode_cache,
1126 u64 ino, u64 dir, u64 index,
1127 const char *name, int namelen,
1128 int filetype, int itemtype, int errors)
1130 struct inode_record *rec;
1131 struct inode_backref *backref;
1133 rec = get_inode_rec(inode_cache, ino, 1);
1134 BUG_ON(IS_ERR(rec));
1135 backref = get_inode_backref(rec, name, namelen, dir);
1138 backref->errors |= errors;
1139 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1140 if (backref->found_dir_index)
1141 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1142 if (backref->found_inode_ref && backref->index != index)
1143 backref->errors |= REF_ERR_INDEX_UNMATCH;
1144 if (backref->found_dir_item && backref->filetype != filetype)
1145 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1147 backref->index = index;
1148 backref->filetype = filetype;
1149 backref->found_dir_index = 1;
1150 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1152 if (backref->found_dir_item)
1153 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1154 if (backref->found_dir_index && backref->filetype != filetype)
1155 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1157 backref->filetype = filetype;
1158 backref->found_dir_item = 1;
1159 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1160 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1161 if (backref->found_inode_ref)
1162 backref->errors |= REF_ERR_DUP_INODE_REF;
1163 if (backref->found_dir_index && backref->index != index)
1164 backref->errors |= REF_ERR_INDEX_UNMATCH;
1166 backref->index = index;
1168 backref->ref_type = itemtype;
1169 backref->found_inode_ref = 1;
1174 maybe_free_inode_rec(inode_cache, rec);
1178 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1179 struct cache_tree *dst_cache)
1181 struct inode_backref *backref;
1186 list_for_each_entry(backref, &src->backrefs, list) {
1187 if (backref->found_dir_index) {
1188 add_inode_backref(dst_cache, dst->ino, backref->dir,
1189 backref->index, backref->name,
1190 backref->namelen, backref->filetype,
1191 BTRFS_DIR_INDEX_KEY, backref->errors);
1193 if (backref->found_dir_item) {
1195 add_inode_backref(dst_cache, dst->ino,
1196 backref->dir, 0, backref->name,
1197 backref->namelen, backref->filetype,
1198 BTRFS_DIR_ITEM_KEY, backref->errors);
1200 if (backref->found_inode_ref) {
1201 add_inode_backref(dst_cache, dst->ino,
1202 backref->dir, backref->index,
1203 backref->name, backref->namelen, 0,
1204 backref->ref_type, backref->errors);
1208 if (src->found_dir_item)
1209 dst->found_dir_item = 1;
1210 if (src->found_file_extent)
1211 dst->found_file_extent = 1;
1212 if (src->found_csum_item)
1213 dst->found_csum_item = 1;
1214 if (src->some_csum_missing)
1215 dst->some_csum_missing = 1;
1216 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1217 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1222 BUG_ON(src->found_link < dir_count);
1223 dst->found_link += src->found_link - dir_count;
1224 dst->found_size += src->found_size;
1225 if (src->extent_start != (u64)-1) {
1226 if (dst->extent_start == (u64)-1) {
1227 dst->extent_start = src->extent_start;
1228 dst->extent_end = src->extent_end;
1230 if (dst->extent_end > src->extent_start)
1231 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1232 else if (dst->extent_end < src->extent_start) {
1233 ret = add_file_extent_hole(&dst->holes,
1235 src->extent_start - dst->extent_end);
1237 if (dst->extent_end < src->extent_end)
1238 dst->extent_end = src->extent_end;
1242 dst->errors |= src->errors;
1243 if (src->found_inode_item) {
1244 if (!dst->found_inode_item) {
1245 dst->nlink = src->nlink;
1246 dst->isize = src->isize;
1247 dst->nbytes = src->nbytes;
1248 dst->imode = src->imode;
1249 dst->nodatasum = src->nodatasum;
1250 dst->found_inode_item = 1;
1252 dst->errors |= I_ERR_DUP_INODE_ITEM;
1260 static int splice_shared_node(struct shared_node *src_node,
1261 struct shared_node *dst_node)
1263 struct cache_extent *cache;
1264 struct ptr_node *node, *ins;
1265 struct cache_tree *src, *dst;
1266 struct inode_record *rec, *conflict;
1267 u64 current_ino = 0;
1271 if (--src_node->refs == 0)
1273 if (src_node->current)
1274 current_ino = src_node->current->ino;
1276 src = &src_node->root_cache;
1277 dst = &dst_node->root_cache;
1279 cache = search_cache_extent(src, 0);
1281 node = container_of(cache, struct ptr_node, cache);
1283 cache = next_cache_extent(cache);
1286 remove_cache_extent(src, &node->cache);
1289 ins = malloc(sizeof(*ins));
1291 ins->cache.start = node->cache.start;
1292 ins->cache.size = node->cache.size;
1296 ret = insert_cache_extent(dst, &ins->cache);
1297 if (ret == -EEXIST) {
1298 conflict = get_inode_rec(dst, rec->ino, 1);
1299 BUG_ON(IS_ERR(conflict));
1300 merge_inode_recs(rec, conflict, dst);
1302 conflict->checked = 1;
1303 if (dst_node->current == conflict)
1304 dst_node->current = NULL;
1306 maybe_free_inode_rec(dst, conflict);
1307 free_inode_rec(rec);
1314 if (src == &src_node->root_cache) {
1315 src = &src_node->inode_cache;
1316 dst = &dst_node->inode_cache;
1320 if (current_ino > 0 && (!dst_node->current ||
1321 current_ino > dst_node->current->ino)) {
1322 if (dst_node->current) {
1323 dst_node->current->checked = 1;
1324 maybe_free_inode_rec(dst, dst_node->current);
1326 dst_node->current = get_inode_rec(dst, current_ino, 1);
1327 BUG_ON(IS_ERR(dst_node->current));
1332 static void free_inode_ptr(struct cache_extent *cache)
1334 struct ptr_node *node;
1335 struct inode_record *rec;
1337 node = container_of(cache, struct ptr_node, cache);
1339 free_inode_rec(rec);
1343 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1345 static struct shared_node *find_shared_node(struct cache_tree *shared,
1348 struct cache_extent *cache;
1349 struct shared_node *node;
1351 cache = lookup_cache_extent(shared, bytenr, 1);
1353 node = container_of(cache, struct shared_node, cache);
1359 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1362 struct shared_node *node;
1364 node = calloc(1, sizeof(*node));
1367 node->cache.start = bytenr;
1368 node->cache.size = 1;
1369 cache_tree_init(&node->root_cache);
1370 cache_tree_init(&node->inode_cache);
1373 ret = insert_cache_extent(shared, &node->cache);
1378 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1379 struct walk_control *wc, int level)
1381 struct shared_node *node;
1382 struct shared_node *dest;
1385 if (level == wc->active_node)
1388 BUG_ON(wc->active_node <= level);
1389 node = find_shared_node(&wc->shared, bytenr);
1391 ret = add_shared_node(&wc->shared, bytenr, refs);
1393 node = find_shared_node(&wc->shared, bytenr);
1394 wc->nodes[level] = node;
1395 wc->active_node = level;
1399 if (wc->root_level == wc->active_node &&
1400 btrfs_root_refs(&root->root_item) == 0) {
1401 if (--node->refs == 0) {
1402 free_inode_recs_tree(&node->root_cache);
1403 free_inode_recs_tree(&node->inode_cache);
1404 remove_cache_extent(&wc->shared, &node->cache);
1410 dest = wc->nodes[wc->active_node];
1411 splice_shared_node(node, dest);
1412 if (node->refs == 0) {
1413 remove_cache_extent(&wc->shared, &node->cache);
1419 static int leave_shared_node(struct btrfs_root *root,
1420 struct walk_control *wc, int level)
1422 struct shared_node *node;
1423 struct shared_node *dest;
1426 if (level == wc->root_level)
1429 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1433 BUG_ON(i >= BTRFS_MAX_LEVEL);
1435 node = wc->nodes[wc->active_node];
1436 wc->nodes[wc->active_node] = NULL;
1437 wc->active_node = i;
1439 dest = wc->nodes[wc->active_node];
1440 if (wc->active_node < wc->root_level ||
1441 btrfs_root_refs(&root->root_item) > 0) {
1442 BUG_ON(node->refs <= 1);
1443 splice_shared_node(node, dest);
1445 BUG_ON(node->refs < 2);
1454 * 1 - if the root with id child_root_id is a child of root parent_root_id
1455 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1456 * has other root(s) as parent(s)
1457 * 2 - if the root child_root_id doesn't have any parent roots
1459 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1462 struct btrfs_path path;
1463 struct btrfs_key key;
1464 struct extent_buffer *leaf;
1468 btrfs_init_path(&path);
1470 key.objectid = parent_root_id;
1471 key.type = BTRFS_ROOT_REF_KEY;
1472 key.offset = child_root_id;
1473 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1477 btrfs_release_path(&path);
1481 key.objectid = child_root_id;
1482 key.type = BTRFS_ROOT_BACKREF_KEY;
1484 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1490 leaf = path.nodes[0];
1491 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1492 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1495 leaf = path.nodes[0];
1498 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1499 if (key.objectid != child_root_id ||
1500 key.type != BTRFS_ROOT_BACKREF_KEY)
1505 if (key.offset == parent_root_id) {
1506 btrfs_release_path(&path);
1513 btrfs_release_path(&path);
1516 return has_parent ? 0 : 2;
1519 static int process_dir_item(struct btrfs_root *root,
1520 struct extent_buffer *eb,
1521 int slot, struct btrfs_key *key,
1522 struct shared_node *active_node)
1532 struct btrfs_dir_item *di;
1533 struct inode_record *rec;
1534 struct cache_tree *root_cache;
1535 struct cache_tree *inode_cache;
1536 struct btrfs_key location;
1537 char namebuf[BTRFS_NAME_LEN];
1539 root_cache = &active_node->root_cache;
1540 inode_cache = &active_node->inode_cache;
1541 rec = active_node->current;
1542 rec->found_dir_item = 1;
1544 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1545 total = btrfs_item_size_nr(eb, slot);
1546 while (cur < total) {
1548 btrfs_dir_item_key_to_cpu(eb, di, &location);
1549 name_len = btrfs_dir_name_len(eb, di);
1550 data_len = btrfs_dir_data_len(eb, di);
1551 filetype = btrfs_dir_type(eb, di);
1553 rec->found_size += name_len;
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1563 if (location.type == BTRFS_INODE_ITEM_KEY) {
1564 add_inode_backref(inode_cache, location.objectid,
1565 key->objectid, key->offset, namebuf,
1566 len, filetype, key->type, error);
1567 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1568 add_inode_backref(root_cache, location.objectid,
1569 key->objectid, key->offset,
1570 namebuf, len, filetype,
1573 fprintf(stderr, "invalid location in dir item %u\n",
1575 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1576 key->objectid, key->offset, namebuf,
1577 len, filetype, key->type, error);
1580 len = sizeof(*di) + name_len + data_len;
1581 di = (struct btrfs_dir_item *)((char *)di + len);
1584 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1585 rec->errors |= I_ERR_DUP_DIR_INDEX;
1590 static int process_inode_ref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1600 struct cache_tree *inode_cache;
1601 struct btrfs_inode_ref *ref;
1602 char namebuf[BTRFS_NAME_LEN];
1604 inode_cache = &active_node->inode_cache;
1606 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1607 total = btrfs_item_size_nr(eb, slot);
1608 while (cur < total) {
1609 name_len = btrfs_inode_ref_name_len(eb, ref);
1610 index = btrfs_inode_ref_index(eb, ref);
1611 if (name_len <= BTRFS_NAME_LEN) {
1615 len = BTRFS_NAME_LEN;
1616 error = REF_ERR_NAME_TOO_LONG;
1618 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1619 add_inode_backref(inode_cache, key->objectid, key->offset,
1620 index, namebuf, len, 0, key->type, error);
1622 len = sizeof(*ref) + name_len;
1623 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1629 static int process_inode_extref(struct extent_buffer *eb,
1630 int slot, struct btrfs_key *key,
1631 struct shared_node *active_node)
1640 struct cache_tree *inode_cache;
1641 struct btrfs_inode_extref *extref;
1642 char namebuf[BTRFS_NAME_LEN];
1644 inode_cache = &active_node->inode_cache;
1646 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1647 total = btrfs_item_size_nr(eb, slot);
1648 while (cur < total) {
1649 name_len = btrfs_inode_extref_name_len(eb, extref);
1650 index = btrfs_inode_extref_index(eb, extref);
1651 parent = btrfs_inode_extref_parent(eb, extref);
1652 if (name_len <= BTRFS_NAME_LEN) {
1656 len = BTRFS_NAME_LEN;
1657 error = REF_ERR_NAME_TOO_LONG;
1659 read_extent_buffer(eb, namebuf,
1660 (unsigned long)(extref + 1), len);
1661 add_inode_backref(inode_cache, key->objectid, parent,
1662 index, namebuf, len, 0, key->type, error);
1664 len = sizeof(*extref) + name_len;
1665 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1672 static int count_csum_range(struct btrfs_root *root, u64 start,
1673 u64 len, u64 *found)
1675 struct btrfs_key key;
1676 struct btrfs_path path;
1677 struct extent_buffer *leaf;
1682 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1684 btrfs_init_path(&path);
1686 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1688 key.type = BTRFS_EXTENT_CSUM_KEY;
1690 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1694 if (ret > 0 && path.slots[0] > 0) {
1695 leaf = path.nodes[0];
1696 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1697 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1698 key.type == BTRFS_EXTENT_CSUM_KEY)
1703 leaf = path.nodes[0];
1704 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1705 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1710 leaf = path.nodes[0];
1713 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1714 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1715 key.type != BTRFS_EXTENT_CSUM_KEY)
1718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1719 if (key.offset >= start + len)
1722 if (key.offset > start)
1725 size = btrfs_item_size_nr(leaf, path.slots[0]);
1726 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1727 if (csum_end > start) {
1728 size = min(csum_end - start, len);
1737 btrfs_release_path(&path);
1743 static int process_file_extent(struct btrfs_root *root,
1744 struct extent_buffer *eb,
1745 int slot, struct btrfs_key *key,
1746 struct shared_node *active_node)
1748 struct inode_record *rec;
1749 struct btrfs_file_extent_item *fi;
1751 u64 disk_bytenr = 0;
1752 u64 extent_offset = 0;
1753 u64 mask = root->sectorsize - 1;
1757 rec = active_node->current;
1758 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1759 rec->found_file_extent = 1;
1761 if (rec->extent_start == (u64)-1) {
1762 rec->extent_start = key->offset;
1763 rec->extent_end = key->offset;
1766 if (rec->extent_end > key->offset)
1767 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1768 else if (rec->extent_end < key->offset) {
1769 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1770 key->offset - rec->extent_end);
1775 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1776 extent_type = btrfs_file_extent_type(eb, fi);
1778 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1779 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1781 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782 rec->found_size += num_bytes;
1783 num_bytes = (num_bytes + mask) & ~mask;
1784 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1785 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1786 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1787 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1788 extent_offset = btrfs_file_extent_offset(eb, fi);
1789 if (num_bytes == 0 || (num_bytes & mask))
1790 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1791 if (num_bytes + extent_offset >
1792 btrfs_file_extent_ram_bytes(eb, fi))
1793 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1794 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1795 (btrfs_file_extent_compression(eb, fi) ||
1796 btrfs_file_extent_encryption(eb, fi) ||
1797 btrfs_file_extent_other_encoding(eb, fi)))
1798 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1799 if (disk_bytenr > 0)
1800 rec->found_size += num_bytes;
1802 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804 rec->extent_end = key->offset + num_bytes;
1807 * The data reloc tree will copy full extents into its inode and then
1808 * copy the corresponding csums. Because the extent it copied could be
1809 * a preallocated extent that hasn't been written to yet there may be no
1810 * csums to copy, ergo we won't have csums for our file extent. This is
1811 * ok so just don't bother checking csums if the inode belongs to the
1814 if (disk_bytenr > 0 &&
1815 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1817 if (btrfs_file_extent_compression(eb, fi))
1818 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1820 disk_bytenr += extent_offset;
1822 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1825 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1827 rec->found_csum_item = 1;
1828 if (found < num_bytes)
1829 rec->some_csum_missing = 1;
1830 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1832 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1838 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1839 struct walk_control *wc)
1841 struct btrfs_key key;
1845 struct cache_tree *inode_cache;
1846 struct shared_node *active_node;
1848 if (wc->root_level == wc->active_node &&
1849 btrfs_root_refs(&root->root_item) == 0)
1852 active_node = wc->nodes[wc->active_node];
1853 inode_cache = &active_node->inode_cache;
1854 nritems = btrfs_header_nritems(eb);
1855 for (i = 0; i < nritems; i++) {
1856 btrfs_item_key_to_cpu(eb, &key, i);
1858 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1860 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1863 if (active_node->current == NULL ||
1864 active_node->current->ino < key.objectid) {
1865 if (active_node->current) {
1866 active_node->current->checked = 1;
1867 maybe_free_inode_rec(inode_cache,
1868 active_node->current);
1870 active_node->current = get_inode_rec(inode_cache,
1872 BUG_ON(IS_ERR(active_node->current));
1875 case BTRFS_DIR_ITEM_KEY:
1876 case BTRFS_DIR_INDEX_KEY:
1877 ret = process_dir_item(root, eb, i, &key, active_node);
1879 case BTRFS_INODE_REF_KEY:
1880 ret = process_inode_ref(eb, i, &key, active_node);
1882 case BTRFS_INODE_EXTREF_KEY:
1883 ret = process_inode_extref(eb, i, &key, active_node);
1885 case BTRFS_INODE_ITEM_KEY:
1886 ret = process_inode_item(eb, i, &key, active_node);
1888 case BTRFS_EXTENT_DATA_KEY:
1889 ret = process_file_extent(root, eb, i, &key,
1899 static void reada_walk_down(struct btrfs_root *root,
1900 struct extent_buffer *node, int slot)
1909 level = btrfs_header_level(node);
1913 nritems = btrfs_header_nritems(node);
1914 blocksize = root->nodesize;
1915 for (i = slot; i < nritems; i++) {
1916 bytenr = btrfs_node_blockptr(node, i);
1917 ptr_gen = btrfs_node_ptr_generation(node, i);
1918 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1923 * Check the child node/leaf by the following condition:
1924 * 1. the first item key of the node/leaf should be the same with the one
1926 * 2. block in parent node should match the child node/leaf.
1927 * 3. generation of parent node and child's header should be consistent.
1929 * Or the child node/leaf pointed by the key in parent is not valid.
1931 * We hope to check leaf owner too, but since subvol may share leaves,
1932 * which makes leaf owner check not so strong, key check should be
1933 * sufficient enough for that case.
1935 static int check_child_node(struct btrfs_root *root,
1936 struct extent_buffer *parent, int slot,
1937 struct extent_buffer *child)
1939 struct btrfs_key parent_key;
1940 struct btrfs_key child_key;
1943 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1944 if (btrfs_header_level(child) == 0)
1945 btrfs_item_key_to_cpu(child, &child_key, 0);
1947 btrfs_node_key_to_cpu(child, &child_key, 0);
1949 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1952 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1953 parent_key.objectid, parent_key.type, parent_key.offset,
1954 child_key.objectid, child_key.type, child_key.offset);
1956 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1958 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1959 btrfs_node_blockptr(parent, slot),
1960 btrfs_header_bytenr(child));
1962 if (btrfs_node_ptr_generation(parent, slot) !=
1963 btrfs_header_generation(child)) {
1965 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1966 btrfs_header_generation(child),
1967 btrfs_node_ptr_generation(parent, slot));
1973 u64 bytenr[BTRFS_MAX_LEVEL];
1974 u64 refs[BTRFS_MAX_LEVEL];
1977 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1978 struct walk_control *wc, int *level,
1979 struct node_refs *nrefs)
1981 enum btrfs_tree_block_status status;
1984 struct extent_buffer *next;
1985 struct extent_buffer *cur;
1990 WARN_ON(*level < 0);
1991 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1993 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1994 refs = nrefs->refs[*level];
1997 ret = btrfs_lookup_extent_info(NULL, root,
1998 path->nodes[*level]->start,
1999 *level, 1, &refs, NULL);
2004 nrefs->bytenr[*level] = path->nodes[*level]->start;
2005 nrefs->refs[*level] = refs;
2009 ret = enter_shared_node(root, path->nodes[*level]->start,
2017 while (*level >= 0) {
2018 WARN_ON(*level < 0);
2019 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2020 cur = path->nodes[*level];
2022 if (btrfs_header_level(cur) != *level)
2025 if (path->slots[*level] >= btrfs_header_nritems(cur))
2028 ret = process_one_leaf(root, cur, wc);
2033 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2034 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2035 blocksize = root->nodesize;
2037 if (bytenr == nrefs->bytenr[*level - 1]) {
2038 refs = nrefs->refs[*level - 1];
2040 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2041 *level - 1, 1, &refs, NULL);
2045 nrefs->bytenr[*level - 1] = bytenr;
2046 nrefs->refs[*level - 1] = refs;
2051 ret = enter_shared_node(root, bytenr, refs,
2054 path->slots[*level]++;
2059 next = btrfs_find_tree_block(root, bytenr, blocksize);
2060 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2061 free_extent_buffer(next);
2062 reada_walk_down(root, cur, path->slots[*level]);
2063 next = read_tree_block(root, bytenr, blocksize,
2065 if (!extent_buffer_uptodate(next)) {
2066 struct btrfs_key node_key;
2068 btrfs_node_key_to_cpu(path->nodes[*level],
2070 path->slots[*level]);
2071 btrfs_add_corrupt_extent_record(root->fs_info,
2073 path->nodes[*level]->start,
2074 root->nodesize, *level);
2080 ret = check_child_node(root, cur, path->slots[*level], next);
2086 if (btrfs_is_leaf(next))
2087 status = btrfs_check_leaf(root, NULL, next);
2089 status = btrfs_check_node(root, NULL, next);
2090 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2091 free_extent_buffer(next);
2096 *level = *level - 1;
2097 free_extent_buffer(path->nodes[*level]);
2098 path->nodes[*level] = next;
2099 path->slots[*level] = 0;
2102 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2106 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2107 struct walk_control *wc, int *level)
2110 struct extent_buffer *leaf;
2112 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2113 leaf = path->nodes[i];
2114 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2119 free_extent_buffer(path->nodes[*level]);
2120 path->nodes[*level] = NULL;
2121 BUG_ON(*level > wc->active_node);
2122 if (*level == wc->active_node)
2123 leave_shared_node(root, wc, *level);
2130 static int check_root_dir(struct inode_record *rec)
2132 struct inode_backref *backref;
2135 if (!rec->found_inode_item || rec->errors)
2137 if (rec->nlink != 1 || rec->found_link != 0)
2139 if (list_empty(&rec->backrefs))
2141 backref = to_inode_backref(rec->backrefs.next);
2142 if (!backref->found_inode_ref)
2144 if (backref->index != 0 || backref->namelen != 2 ||
2145 memcmp(backref->name, "..", 2))
2147 if (backref->found_dir_index || backref->found_dir_item)
2154 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2155 struct btrfs_root *root, struct btrfs_path *path,
2156 struct inode_record *rec)
2158 struct btrfs_inode_item *ei;
2159 struct btrfs_key key;
2162 key.objectid = rec->ino;
2163 key.type = BTRFS_INODE_ITEM_KEY;
2164 key.offset = (u64)-1;
2166 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2170 if (!path->slots[0]) {
2177 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2178 if (key.objectid != rec->ino) {
2183 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2184 struct btrfs_inode_item);
2185 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2186 btrfs_mark_buffer_dirty(path->nodes[0]);
2187 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2188 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2189 root->root_key.objectid);
2191 btrfs_release_path(path);
2195 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2196 struct btrfs_root *root,
2197 struct btrfs_path *path,
2198 struct inode_record *rec)
2202 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2203 btrfs_release_path(path);
2205 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2209 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2210 struct btrfs_root *root,
2211 struct btrfs_path *path,
2212 struct inode_record *rec)
2214 struct btrfs_inode_item *ei;
2215 struct btrfs_key key;
2218 key.objectid = rec->ino;
2219 key.type = BTRFS_INODE_ITEM_KEY;
2222 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2229 /* Since ret == 0, no need to check anything */
2230 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2231 struct btrfs_inode_item);
2232 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2233 btrfs_mark_buffer_dirty(path->nodes[0]);
2234 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2235 printf("reset nbytes for ino %llu root %llu\n",
2236 rec->ino, root->root_key.objectid);
2238 btrfs_release_path(path);
2242 static int add_missing_dir_index(struct btrfs_root *root,
2243 struct cache_tree *inode_cache,
2244 struct inode_record *rec,
2245 struct inode_backref *backref)
2247 struct btrfs_path *path;
2248 struct btrfs_trans_handle *trans;
2249 struct btrfs_dir_item *dir_item;
2250 struct extent_buffer *leaf;
2251 struct btrfs_key key;
2252 struct btrfs_disk_key disk_key;
2253 struct inode_record *dir_rec;
2254 unsigned long name_ptr;
2255 u32 data_size = sizeof(*dir_item) + backref->namelen;
2258 path = btrfs_alloc_path();
2262 trans = btrfs_start_transaction(root, 1);
2263 if (IS_ERR(trans)) {
2264 btrfs_free_path(path);
2265 return PTR_ERR(trans);
2268 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2269 (unsigned long long)rec->ino);
2270 key.objectid = backref->dir;
2271 key.type = BTRFS_DIR_INDEX_KEY;
2272 key.offset = backref->index;
2274 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2277 leaf = path->nodes[0];
2278 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2280 disk_key.objectid = cpu_to_le64(rec->ino);
2281 disk_key.type = BTRFS_INODE_ITEM_KEY;
2282 disk_key.offset = 0;
2284 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2285 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2286 btrfs_set_dir_data_len(leaf, dir_item, 0);
2287 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2288 name_ptr = (unsigned long)(dir_item + 1);
2289 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2290 btrfs_mark_buffer_dirty(leaf);
2291 btrfs_free_path(path);
2292 btrfs_commit_transaction(trans, root);
2294 backref->found_dir_index = 1;
2295 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2296 BUG_ON(IS_ERR(dir_rec));
2299 dir_rec->found_size += backref->namelen;
2300 if (dir_rec->found_size == dir_rec->isize &&
2301 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2302 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2303 if (dir_rec->found_size != dir_rec->isize)
2304 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2309 static int delete_dir_index(struct btrfs_root *root,
2310 struct cache_tree *inode_cache,
2311 struct inode_record *rec,
2312 struct inode_backref *backref)
2314 struct btrfs_trans_handle *trans;
2315 struct btrfs_dir_item *di;
2316 struct btrfs_path *path;
2319 path = btrfs_alloc_path();
2323 trans = btrfs_start_transaction(root, 1);
2324 if (IS_ERR(trans)) {
2325 btrfs_free_path(path);
2326 return PTR_ERR(trans);
2330 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2331 (unsigned long long)backref->dir,
2332 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2333 (unsigned long long)root->objectid);
2335 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2336 backref->name, backref->namelen,
2337 backref->index, -1);
2340 btrfs_free_path(path);
2341 btrfs_commit_transaction(trans, root);
2348 ret = btrfs_del_item(trans, root, path);
2350 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2352 btrfs_free_path(path);
2353 btrfs_commit_transaction(trans, root);
2357 static int create_inode_item(struct btrfs_root *root,
2358 struct inode_record *rec,
2359 struct inode_backref *backref, int root_dir)
2361 struct btrfs_trans_handle *trans;
2362 struct btrfs_inode_item inode_item;
2363 time_t now = time(NULL);
2366 trans = btrfs_start_transaction(root, 1);
2367 if (IS_ERR(trans)) {
2368 ret = PTR_ERR(trans);
2372 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2373 "be incomplete, please check permissions and content after "
2374 "the fsck completes.\n", (unsigned long long)root->objectid,
2375 (unsigned long long)rec->ino);
2377 memset(&inode_item, 0, sizeof(inode_item));
2378 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2380 btrfs_set_stack_inode_nlink(&inode_item, 1);
2382 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2383 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2384 if (rec->found_dir_item) {
2385 if (rec->found_file_extent)
2386 fprintf(stderr, "root %llu inode %llu has both a dir "
2387 "item and extents, unsure if it is a dir or a "
2388 "regular file so setting it as a directory\n",
2389 (unsigned long long)root->objectid,
2390 (unsigned long long)rec->ino);
2391 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2392 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2393 } else if (!rec->found_dir_item) {
2394 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2395 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2397 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2398 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2399 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2400 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2401 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2402 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2403 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2404 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2406 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2408 btrfs_commit_transaction(trans, root);
2412 static int repair_inode_backrefs(struct btrfs_root *root,
2413 struct inode_record *rec,
2414 struct cache_tree *inode_cache,
2417 struct inode_backref *tmp, *backref;
2418 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2422 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2423 if (!delete && rec->ino == root_dirid) {
2424 if (!rec->found_inode_item) {
2425 ret = create_inode_item(root, rec, backref, 1);
2432 /* Index 0 for root dir's are special, don't mess with it */
2433 if (rec->ino == root_dirid && backref->index == 0)
2437 ((backref->found_dir_index && !backref->found_inode_ref) ||
2438 (backref->found_dir_index && backref->found_inode_ref &&
2439 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2440 ret = delete_dir_index(root, inode_cache, rec, backref);
2444 list_del(&backref->list);
2448 if (!delete && !backref->found_dir_index &&
2449 backref->found_dir_item && backref->found_inode_ref) {
2450 ret = add_missing_dir_index(root, inode_cache, rec,
2455 if (backref->found_dir_item &&
2456 backref->found_dir_index &&
2457 backref->found_dir_index) {
2458 if (!backref->errors &&
2459 backref->found_inode_ref) {
2460 list_del(&backref->list);
2466 if (!delete && (!backref->found_dir_index &&
2467 !backref->found_dir_item &&
2468 backref->found_inode_ref)) {
2469 struct btrfs_trans_handle *trans;
2470 struct btrfs_key location;
2472 ret = check_dir_conflict(root, backref->name,
2478 * let nlink fixing routine to handle it,
2479 * which can do it better.
2484 location.objectid = rec->ino;
2485 location.type = BTRFS_INODE_ITEM_KEY;
2486 location.offset = 0;
2488 trans = btrfs_start_transaction(root, 1);
2489 if (IS_ERR(trans)) {
2490 ret = PTR_ERR(trans);
2493 fprintf(stderr, "adding missing dir index/item pair "
2495 (unsigned long long)rec->ino);
2496 ret = btrfs_insert_dir_item(trans, root, backref->name,
2498 backref->dir, &location,
2499 imode_to_type(rec->imode),
2502 btrfs_commit_transaction(trans, root);
2506 if (!delete && (backref->found_inode_ref &&
2507 backref->found_dir_index &&
2508 backref->found_dir_item &&
2509 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2510 !rec->found_inode_item)) {
2511 ret = create_inode_item(root, rec, backref, 0);
2518 return ret ? ret : repaired;
2522 * To determine the file type for nlink/inode_item repair
2524 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2525 * Return -ENOENT if file type is not found.
2527 static int find_file_type(struct inode_record *rec, u8 *type)
2529 struct inode_backref *backref;
2531 /* For inode item recovered case */
2532 if (rec->found_inode_item) {
2533 *type = imode_to_type(rec->imode);
2537 list_for_each_entry(backref, &rec->backrefs, list) {
2538 if (backref->found_dir_index || backref->found_dir_item) {
2539 *type = backref->filetype;
2547 * To determine the file name for nlink repair
2549 * Return 0 if file name is found, set name and namelen.
2550 * Return -ENOENT if file name is not found.
2552 static int find_file_name(struct inode_record *rec,
2553 char *name, int *namelen)
2555 struct inode_backref *backref;
2557 list_for_each_entry(backref, &rec->backrefs, list) {
2558 if (backref->found_dir_index || backref->found_dir_item ||
2559 backref->found_inode_ref) {
2560 memcpy(name, backref->name, backref->namelen);
2561 *namelen = backref->namelen;
2568 /* Reset the nlink of the inode to the correct one */
2569 static int reset_nlink(struct btrfs_trans_handle *trans,
2570 struct btrfs_root *root,
2571 struct btrfs_path *path,
2572 struct inode_record *rec)
2574 struct inode_backref *backref;
2575 struct inode_backref *tmp;
2576 struct btrfs_key key;
2577 struct btrfs_inode_item *inode_item;
2580 /* We don't believe this either, reset it and iterate backref */
2581 rec->found_link = 0;
2583 /* Remove all backref including the valid ones */
2584 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2585 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2586 backref->index, backref->name,
2587 backref->namelen, 0);
2591 /* remove invalid backref, so it won't be added back */
2592 if (!(backref->found_dir_index &&
2593 backref->found_dir_item &&
2594 backref->found_inode_ref)) {
2595 list_del(&backref->list);
2602 /* Set nlink to 0 */
2603 key.objectid = rec->ino;
2604 key.type = BTRFS_INODE_ITEM_KEY;
2606 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2613 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2614 struct btrfs_inode_item);
2615 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2616 btrfs_mark_buffer_dirty(path->nodes[0]);
2617 btrfs_release_path(path);
2620 * Add back valid inode_ref/dir_item/dir_index,
2621 * add_link() will handle the nlink inc, so new nlink must be correct
2623 list_for_each_entry(backref, &rec->backrefs, list) {
2624 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2625 backref->name, backref->namelen,
2626 backref->filetype, &backref->index, 1);
2631 btrfs_release_path(path);
2635 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2636 struct btrfs_root *root,
2637 struct btrfs_path *path,
2638 struct inode_record *rec)
2640 char *dir_name = "lost+found";
2641 char namebuf[BTRFS_NAME_LEN] = {0};
2646 int name_recovered = 0;
2647 int type_recovered = 0;
2651 * Get file name and type first before these invalid inode ref
2652 * are deleted by remove_all_invalid_backref()
2654 name_recovered = !find_file_name(rec, namebuf, &namelen);
2655 type_recovered = !find_file_type(rec, &type);
2657 if (!name_recovered) {
2658 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2659 rec->ino, rec->ino);
2660 namelen = count_digits(rec->ino);
2661 sprintf(namebuf, "%llu", rec->ino);
2664 if (!type_recovered) {
2665 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2667 type = BTRFS_FT_REG_FILE;
2671 ret = reset_nlink(trans, root, path, rec);
2674 "Failed to reset nlink for inode %llu: %s\n",
2675 rec->ino, strerror(-ret));
2679 if (rec->found_link == 0) {
2680 lost_found_ino = root->highest_inode;
2681 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2686 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2687 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2690 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2691 dir_name, strerror(-ret));
2694 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2695 namebuf, namelen, type, NULL, 1);
2697 * Add ".INO" suffix several times to handle case where
2698 * "FILENAME.INO" is already taken by another file.
2700 while (ret == -EEXIST) {
2702 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2704 if (namelen + count_digits(rec->ino) + 1 >
2709 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2711 namelen += count_digits(rec->ino) + 1;
2712 ret = btrfs_add_link(trans, root, rec->ino,
2713 lost_found_ino, namebuf,
2714 namelen, type, NULL, 1);
2718 "Failed to link the inode %llu to %s dir: %s\n",
2719 rec->ino, dir_name, strerror(-ret));
2723 * Just increase the found_link, don't actually add the
2724 * backref. This will make things easier and this inode
2725 * record will be freed after the repair is done.
2726 * So fsck will not report problem about this inode.
2729 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2730 namelen, namebuf, dir_name);
2732 printf("Fixed the nlink of inode %llu\n", rec->ino);
2735 * Clear the flag anyway, or we will loop forever for the same inode
2736 * as it will not be removed from the bad inode list and the dead loop
2739 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2740 btrfs_release_path(path);
2745 * Check if there is any normal(reg or prealloc) file extent for given
2747 * This is used to determine the file type when neither its dir_index/item or
2748 * inode_item exists.
2750 * This will *NOT* report error, if any error happens, just consider it does
2751 * not have any normal file extent.
2753 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2755 struct btrfs_path *path;
2756 struct btrfs_key key;
2757 struct btrfs_key found_key;
2758 struct btrfs_file_extent_item *fi;
2762 path = btrfs_alloc_path();
2766 key.type = BTRFS_EXTENT_DATA_KEY;
2769 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2774 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2775 ret = btrfs_next_leaf(root, path);
2782 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2784 if (found_key.objectid != ino ||
2785 found_key.type != BTRFS_EXTENT_DATA_KEY)
2787 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2788 struct btrfs_file_extent_item);
2789 type = btrfs_file_extent_type(path->nodes[0], fi);
2790 if (type != BTRFS_FILE_EXTENT_INLINE) {
2796 btrfs_free_path(path);
2800 static u32 btrfs_type_to_imode(u8 type)
2802 static u32 imode_by_btrfs_type[] = {
2803 [BTRFS_FT_REG_FILE] = S_IFREG,
2804 [BTRFS_FT_DIR] = S_IFDIR,
2805 [BTRFS_FT_CHRDEV] = S_IFCHR,
2806 [BTRFS_FT_BLKDEV] = S_IFBLK,
2807 [BTRFS_FT_FIFO] = S_IFIFO,
2808 [BTRFS_FT_SOCK] = S_IFSOCK,
2809 [BTRFS_FT_SYMLINK] = S_IFLNK,
2812 return imode_by_btrfs_type[(type)];
2815 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2816 struct btrfs_root *root,
2817 struct btrfs_path *path,
2818 struct inode_record *rec)
2822 int type_recovered = 0;
2825 printf("Trying to rebuild inode:%llu\n", rec->ino);
2827 type_recovered = !find_file_type(rec, &filetype);
2830 * Try to determine inode type if type not found.
2832 * For found regular file extent, it must be FILE.
2833 * For found dir_item/index, it must be DIR.
2835 * For undetermined one, use FILE as fallback.
2838 * 1. If found backref(inode_index/item is already handled) to it,
2840 * Need new inode-inode ref structure to allow search for that.
2842 if (!type_recovered) {
2843 if (rec->found_file_extent &&
2844 find_normal_file_extent(root, rec->ino)) {
2846 filetype = BTRFS_FT_REG_FILE;
2847 } else if (rec->found_dir_item) {
2849 filetype = BTRFS_FT_DIR;
2850 } else if (!list_empty(&rec->orphan_extents)) {
2852 filetype = BTRFS_FT_REG_FILE;
2854 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2857 filetype = BTRFS_FT_REG_FILE;
2861 ret = btrfs_new_inode(trans, root, rec->ino,
2862 mode | btrfs_type_to_imode(filetype));
2867 * Here inode rebuild is done, we only rebuild the inode item,
2868 * don't repair the nlink(like move to lost+found).
2869 * That is the job of nlink repair.
2871 * We just fill the record and return
2873 rec->found_dir_item = 1;
2874 rec->imode = mode | btrfs_type_to_imode(filetype);
2876 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2877 /* Ensure the inode_nlinks repair function will be called */
2878 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2883 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2884 struct btrfs_root *root,
2885 struct btrfs_path *path,
2886 struct inode_record *rec)
2888 struct orphan_data_extent *orphan;
2889 struct orphan_data_extent *tmp;
2892 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2894 * Check for conflicting file extents
2896 * Here we don't know whether the extents is compressed or not,
2897 * so we can only assume it not compressed nor data offset,
2898 * and use its disk_len as extent length.
2900 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2901 orphan->offset, orphan->disk_len, 0);
2902 btrfs_release_path(path);
2907 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2908 orphan->disk_bytenr, orphan->disk_len);
2909 ret = btrfs_free_extent(trans,
2910 root->fs_info->extent_root,
2911 orphan->disk_bytenr, orphan->disk_len,
2912 0, root->objectid, orphan->objectid,
2917 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2918 orphan->offset, orphan->disk_bytenr,
2919 orphan->disk_len, orphan->disk_len);
2923 /* Update file size info */
2924 rec->found_size += orphan->disk_len;
2925 if (rec->found_size == rec->nbytes)
2926 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2928 /* Update the file extent hole info too */
2929 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2933 if (RB_EMPTY_ROOT(&rec->holes))
2934 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2936 list_del(&orphan->list);
2939 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2944 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2945 struct btrfs_root *root,
2946 struct btrfs_path *path,
2947 struct inode_record *rec)
2949 struct rb_node *node;
2950 struct file_extent_hole *hole;
2954 node = rb_first(&rec->holes);
2958 hole = rb_entry(node, struct file_extent_hole, node);
2959 ret = btrfs_punch_hole(trans, root, rec->ino,
2960 hole->start, hole->len);
2963 ret = del_file_extent_hole(&rec->holes, hole->start,
2967 if (RB_EMPTY_ROOT(&rec->holes))
2968 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2969 node = rb_first(&rec->holes);
2971 /* special case for a file losing all its file extent */
2973 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2974 round_up(rec->isize, root->sectorsize));
2978 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2979 rec->ino, root->objectid);
2984 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2986 struct btrfs_trans_handle *trans;
2987 struct btrfs_path *path;
2990 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2991 I_ERR_NO_ORPHAN_ITEM |
2992 I_ERR_LINK_COUNT_WRONG |
2993 I_ERR_NO_INODE_ITEM |
2994 I_ERR_FILE_EXTENT_ORPHAN |
2995 I_ERR_FILE_EXTENT_DISCOUNT|
2996 I_ERR_FILE_NBYTES_WRONG)))
2999 path = btrfs_alloc_path();
3004 * For nlink repair, it may create a dir and add link, so
3005 * 2 for parent(256)'s dir_index and dir_item
3006 * 2 for lost+found dir's inode_item and inode_ref
3007 * 1 for the new inode_ref of the file
3008 * 2 for lost+found dir's dir_index and dir_item for the file
3010 trans = btrfs_start_transaction(root, 7);
3011 if (IS_ERR(trans)) {
3012 btrfs_free_path(path);
3013 return PTR_ERR(trans);
3016 if (rec->errors & I_ERR_NO_INODE_ITEM)
3017 ret = repair_inode_no_item(trans, root, path, rec);
3018 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3019 ret = repair_inode_orphan_extent(trans, root, path, rec);
3020 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3021 ret = repair_inode_discount_extent(trans, root, path, rec);
3022 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3023 ret = repair_inode_isize(trans, root, path, rec);
3024 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3025 ret = repair_inode_orphan_item(trans, root, path, rec);
3026 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3027 ret = repair_inode_nlinks(trans, root, path, rec);
3028 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3029 ret = repair_inode_nbytes(trans, root, path, rec);
3030 btrfs_commit_transaction(trans, root);
3031 btrfs_free_path(path);
3035 static int check_inode_recs(struct btrfs_root *root,
3036 struct cache_tree *inode_cache)
3038 struct cache_extent *cache;
3039 struct ptr_node *node;
3040 struct inode_record *rec;
3041 struct inode_backref *backref;
3046 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3048 if (btrfs_root_refs(&root->root_item) == 0) {
3049 if (!cache_tree_empty(inode_cache))
3050 fprintf(stderr, "warning line %d\n", __LINE__);
3055 * We need to record the highest inode number for later 'lost+found'
3057 * We must select an ino not used/referred by any existing inode, or
3058 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3059 * this may cause 'lost+found' dir has wrong nlinks.
3061 cache = last_cache_extent(inode_cache);
3063 node = container_of(cache, struct ptr_node, cache);
3065 if (rec->ino > root->highest_inode)
3066 root->highest_inode = rec->ino;
3070 * We need to repair backrefs first because we could change some of the
3071 * errors in the inode recs.
3073 * We also need to go through and delete invalid backrefs first and then
3074 * add the correct ones second. We do this because we may get EEXIST
3075 * when adding back the correct index because we hadn't yet deleted the
3078 * For example, if we were missing a dir index then the directories
3079 * isize would be wrong, so if we fixed the isize to what we thought it
3080 * would be and then fixed the backref we'd still have a invalid fs, so
3081 * we need to add back the dir index and then check to see if the isize
3086 if (stage == 3 && !err)
3089 cache = search_cache_extent(inode_cache, 0);
3090 while (repair && cache) {
3091 node = container_of(cache, struct ptr_node, cache);
3093 cache = next_cache_extent(cache);
3095 /* Need to free everything up and rescan */
3097 remove_cache_extent(inode_cache, &node->cache);
3099 free_inode_rec(rec);
3103 if (list_empty(&rec->backrefs))
3106 ret = repair_inode_backrefs(root, rec, inode_cache,
3120 rec = get_inode_rec(inode_cache, root_dirid, 0);
3121 BUG_ON(IS_ERR(rec));
3123 ret = check_root_dir(rec);
3125 fprintf(stderr, "root %llu root dir %llu error\n",
3126 (unsigned long long)root->root_key.objectid,
3127 (unsigned long long)root_dirid);
3128 print_inode_error(root, rec);
3133 struct btrfs_trans_handle *trans;
3135 trans = btrfs_start_transaction(root, 1);
3136 if (IS_ERR(trans)) {
3137 err = PTR_ERR(trans);
3142 "root %llu missing its root dir, recreating\n",
3143 (unsigned long long)root->objectid);
3145 ret = btrfs_make_root_dir(trans, root, root_dirid);
3148 btrfs_commit_transaction(trans, root);
3152 fprintf(stderr, "root %llu root dir %llu not found\n",
3153 (unsigned long long)root->root_key.objectid,
3154 (unsigned long long)root_dirid);
3158 cache = search_cache_extent(inode_cache, 0);
3161 node = container_of(cache, struct ptr_node, cache);
3163 remove_cache_extent(inode_cache, &node->cache);
3165 if (rec->ino == root_dirid ||
3166 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3167 free_inode_rec(rec);
3171 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3172 ret = check_orphan_item(root, rec->ino);
3174 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3175 if (can_free_inode_rec(rec)) {
3176 free_inode_rec(rec);
3181 if (!rec->found_inode_item)
3182 rec->errors |= I_ERR_NO_INODE_ITEM;
3183 if (rec->found_link != rec->nlink)
3184 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3186 ret = try_repair_inode(root, rec);
3187 if (ret == 0 && can_free_inode_rec(rec)) {
3188 free_inode_rec(rec);
3194 if (!(repair && ret == 0))
3196 print_inode_error(root, rec);
3197 list_for_each_entry(backref, &rec->backrefs, list) {
3198 if (!backref->found_dir_item)
3199 backref->errors |= REF_ERR_NO_DIR_ITEM;
3200 if (!backref->found_dir_index)
3201 backref->errors |= REF_ERR_NO_DIR_INDEX;
3202 if (!backref->found_inode_ref)
3203 backref->errors |= REF_ERR_NO_INODE_REF;
3204 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3205 " namelen %u name %s filetype %d errors %x",
3206 (unsigned long long)backref->dir,
3207 (unsigned long long)backref->index,
3208 backref->namelen, backref->name,
3209 backref->filetype, backref->errors);
3210 print_ref_error(backref->errors);
3212 free_inode_rec(rec);
3214 return (error > 0) ? -1 : 0;
3217 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3220 struct cache_extent *cache;
3221 struct root_record *rec = NULL;
3224 cache = lookup_cache_extent(root_cache, objectid, 1);
3226 rec = container_of(cache, struct root_record, cache);
3228 rec = calloc(1, sizeof(*rec));
3230 return ERR_PTR(-ENOMEM);
3231 rec->objectid = objectid;
3232 INIT_LIST_HEAD(&rec->backrefs);
3233 rec->cache.start = objectid;
3234 rec->cache.size = 1;
3236 ret = insert_cache_extent(root_cache, &rec->cache);
3238 return ERR_PTR(-EEXIST);
3243 static struct root_backref *get_root_backref(struct root_record *rec,
3244 u64 ref_root, u64 dir, u64 index,
3245 const char *name, int namelen)
3247 struct root_backref *backref;
3249 list_for_each_entry(backref, &rec->backrefs, list) {
3250 if (backref->ref_root != ref_root || backref->dir != dir ||
3251 backref->namelen != namelen)
3253 if (memcmp(name, backref->name, namelen))
3258 backref = calloc(1, sizeof(*backref) + namelen + 1);
3261 backref->ref_root = ref_root;
3263 backref->index = index;
3264 backref->namelen = namelen;
3265 memcpy(backref->name, name, namelen);
3266 backref->name[namelen] = '\0';
3267 list_add_tail(&backref->list, &rec->backrefs);
3271 static void free_root_record(struct cache_extent *cache)
3273 struct root_record *rec;
3274 struct root_backref *backref;
3276 rec = container_of(cache, struct root_record, cache);
3277 while (!list_empty(&rec->backrefs)) {
3278 backref = to_root_backref(rec->backrefs.next);
3279 list_del(&backref->list);
3286 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3288 static int add_root_backref(struct cache_tree *root_cache,
3289 u64 root_id, u64 ref_root, u64 dir, u64 index,
3290 const char *name, int namelen,
3291 int item_type, int errors)
3293 struct root_record *rec;
3294 struct root_backref *backref;
3296 rec = get_root_rec(root_cache, root_id);
3297 BUG_ON(IS_ERR(rec));
3298 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3301 backref->errors |= errors;
3303 if (item_type != BTRFS_DIR_ITEM_KEY) {
3304 if (backref->found_dir_index || backref->found_back_ref ||
3305 backref->found_forward_ref) {
3306 if (backref->index != index)
3307 backref->errors |= REF_ERR_INDEX_UNMATCH;
3309 backref->index = index;
3313 if (item_type == BTRFS_DIR_ITEM_KEY) {
3314 if (backref->found_forward_ref)
3316 backref->found_dir_item = 1;
3317 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3318 backref->found_dir_index = 1;
3319 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3320 if (backref->found_forward_ref)
3321 backref->errors |= REF_ERR_DUP_ROOT_REF;
3322 else if (backref->found_dir_item)
3324 backref->found_forward_ref = 1;
3325 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3326 if (backref->found_back_ref)
3327 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3328 backref->found_back_ref = 1;
3333 if (backref->found_forward_ref && backref->found_dir_item)
3334 backref->reachable = 1;
3338 static int merge_root_recs(struct btrfs_root *root,
3339 struct cache_tree *src_cache,
3340 struct cache_tree *dst_cache)
3342 struct cache_extent *cache;
3343 struct ptr_node *node;
3344 struct inode_record *rec;
3345 struct inode_backref *backref;
3348 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3349 free_inode_recs_tree(src_cache);
3354 cache = search_cache_extent(src_cache, 0);
3357 node = container_of(cache, struct ptr_node, cache);
3359 remove_cache_extent(src_cache, &node->cache);
3362 ret = is_child_root(root, root->objectid, rec->ino);
3368 list_for_each_entry(backref, &rec->backrefs, list) {
3369 BUG_ON(backref->found_inode_ref);
3370 if (backref->found_dir_item)
3371 add_root_backref(dst_cache, rec->ino,
3372 root->root_key.objectid, backref->dir,
3373 backref->index, backref->name,
3374 backref->namelen, BTRFS_DIR_ITEM_KEY,
3376 if (backref->found_dir_index)
3377 add_root_backref(dst_cache, rec->ino,
3378 root->root_key.objectid, backref->dir,
3379 backref->index, backref->name,
3380 backref->namelen, BTRFS_DIR_INDEX_KEY,
3384 free_inode_rec(rec);
3391 static int check_root_refs(struct btrfs_root *root,
3392 struct cache_tree *root_cache)
3394 struct root_record *rec;
3395 struct root_record *ref_root;
3396 struct root_backref *backref;
3397 struct cache_extent *cache;
3403 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3404 BUG_ON(IS_ERR(rec));
3407 /* fixme: this can not detect circular references */
3410 cache = search_cache_extent(root_cache, 0);
3414 rec = container_of(cache, struct root_record, cache);
3415 cache = next_cache_extent(cache);
3417 if (rec->found_ref == 0)
3420 list_for_each_entry(backref, &rec->backrefs, list) {
3421 if (!backref->reachable)
3424 ref_root = get_root_rec(root_cache,
3426 BUG_ON(IS_ERR(ref_root));
3427 if (ref_root->found_ref > 0)
3430 backref->reachable = 0;
3432 if (rec->found_ref == 0)
3438 cache = search_cache_extent(root_cache, 0);
3442 rec = container_of(cache, struct root_record, cache);
3443 cache = next_cache_extent(cache);
3445 if (rec->found_ref == 0 &&
3446 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3447 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3448 ret = check_orphan_item(root->fs_info->tree_root,
3454 * If we don't have a root item then we likely just have
3455 * a dir item in a snapshot for this root but no actual
3456 * ref key or anything so it's meaningless.
3458 if (!rec->found_root_item)
3461 fprintf(stderr, "fs tree %llu not referenced\n",
3462 (unsigned long long)rec->objectid);
3466 if (rec->found_ref > 0 && !rec->found_root_item)
3468 list_for_each_entry(backref, &rec->backrefs, list) {
3469 if (!backref->found_dir_item)
3470 backref->errors |= REF_ERR_NO_DIR_ITEM;
3471 if (!backref->found_dir_index)
3472 backref->errors |= REF_ERR_NO_DIR_INDEX;
3473 if (!backref->found_back_ref)
3474 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3475 if (!backref->found_forward_ref)
3476 backref->errors |= REF_ERR_NO_ROOT_REF;
3477 if (backref->reachable && backref->errors)
3484 fprintf(stderr, "fs tree %llu refs %u %s\n",
3485 (unsigned long long)rec->objectid, rec->found_ref,
3486 rec->found_root_item ? "" : "not found");
3488 list_for_each_entry(backref, &rec->backrefs, list) {
3489 if (!backref->reachable)
3491 if (!backref->errors && rec->found_root_item)
3493 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3494 " index %llu namelen %u name %s errors %x\n",
3495 (unsigned long long)backref->ref_root,
3496 (unsigned long long)backref->dir,
3497 (unsigned long long)backref->index,
3498 backref->namelen, backref->name,
3500 print_ref_error(backref->errors);
3503 return errors > 0 ? 1 : 0;
3506 static int process_root_ref(struct extent_buffer *eb, int slot,
3507 struct btrfs_key *key,
3508 struct cache_tree *root_cache)
3514 struct btrfs_root_ref *ref;
3515 char namebuf[BTRFS_NAME_LEN];
3518 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3520 dirid = btrfs_root_ref_dirid(eb, ref);
3521 index = btrfs_root_ref_sequence(eb, ref);
3522 name_len = btrfs_root_ref_name_len(eb, ref);
3524 if (name_len <= BTRFS_NAME_LEN) {
3528 len = BTRFS_NAME_LEN;
3529 error = REF_ERR_NAME_TOO_LONG;
3531 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3533 if (key->type == BTRFS_ROOT_REF_KEY) {
3534 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3535 index, namebuf, len, key->type, error);
3537 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3538 index, namebuf, len, key->type, error);
3543 static void free_corrupt_block(struct cache_extent *cache)
3545 struct btrfs_corrupt_block *corrupt;
3547 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3551 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3554 * Repair the btree of the given root.
3556 * The fix is to remove the node key in corrupt_blocks cache_tree.
3557 * and rebalance the tree.
3558 * After the fix, the btree should be writeable.
3560 static int repair_btree(struct btrfs_root *root,
3561 struct cache_tree *corrupt_blocks)
3563 struct btrfs_trans_handle *trans;
3564 struct btrfs_path *path;
3565 struct btrfs_corrupt_block *corrupt;
3566 struct cache_extent *cache;
3567 struct btrfs_key key;
3572 if (cache_tree_empty(corrupt_blocks))
3575 path = btrfs_alloc_path();
3579 trans = btrfs_start_transaction(root, 1);
3580 if (IS_ERR(trans)) {
3581 ret = PTR_ERR(trans);
3582 fprintf(stderr, "Error starting transaction: %s\n",
3586 cache = first_cache_extent(corrupt_blocks);
3588 corrupt = container_of(cache, struct btrfs_corrupt_block,
3590 level = corrupt->level;
3591 path->lowest_level = level;
3592 key.objectid = corrupt->key.objectid;
3593 key.type = corrupt->key.type;
3594 key.offset = corrupt->key.offset;
3597 * Here we don't want to do any tree balance, since it may
3598 * cause a balance with corrupted brother leaf/node,
3599 * so ins_len set to 0 here.
3600 * Balance will be done after all corrupt node/leaf is deleted.
3602 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3605 offset = btrfs_node_blockptr(path->nodes[level],
3606 path->slots[level]);
3608 /* Remove the ptr */
3609 ret = btrfs_del_ptr(trans, root, path, level,
3610 path->slots[level]);
3614 * Remove the corresponding extent
3615 * return value is not concerned.
3617 btrfs_release_path(path);
3618 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3619 0, root->root_key.objectid,
3621 cache = next_cache_extent(cache);
3624 /* Balance the btree using btrfs_search_slot() */
3625 cache = first_cache_extent(corrupt_blocks);
3627 corrupt = container_of(cache, struct btrfs_corrupt_block,
3629 memcpy(&key, &corrupt->key, sizeof(key));
3630 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3633 /* return will always >0 since it won't find the item */
3635 btrfs_release_path(path);
3636 cache = next_cache_extent(cache);
3639 btrfs_commit_transaction(trans, root);
3641 btrfs_free_path(path);
3645 static int check_fs_root(struct btrfs_root *root,
3646 struct cache_tree *root_cache,
3647 struct walk_control *wc)
3653 struct btrfs_path path;
3654 struct shared_node root_node;
3655 struct root_record *rec;
3656 struct btrfs_root_item *root_item = &root->root_item;
3657 struct cache_tree corrupt_blocks;
3658 struct orphan_data_extent *orphan;
3659 struct orphan_data_extent *tmp;
3660 enum btrfs_tree_block_status status;
3661 struct node_refs nrefs;
3664 * Reuse the corrupt_block cache tree to record corrupted tree block
3666 * Unlike the usage in extent tree check, here we do it in a per
3667 * fs/subvol tree base.
3669 cache_tree_init(&corrupt_blocks);
3670 root->fs_info->corrupt_blocks = &corrupt_blocks;
3672 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3673 rec = get_root_rec(root_cache, root->root_key.objectid);
3674 BUG_ON(IS_ERR(rec));
3675 if (btrfs_root_refs(root_item) > 0)
3676 rec->found_root_item = 1;
3679 btrfs_init_path(&path);
3680 memset(&root_node, 0, sizeof(root_node));
3681 cache_tree_init(&root_node.root_cache);
3682 cache_tree_init(&root_node.inode_cache);
3683 memset(&nrefs, 0, sizeof(nrefs));
3685 /* Move the orphan extent record to corresponding inode_record */
3686 list_for_each_entry_safe(orphan, tmp,
3687 &root->orphan_data_extents, list) {
3688 struct inode_record *inode;
3690 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3692 BUG_ON(IS_ERR(inode));
3693 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3694 list_move(&orphan->list, &inode->orphan_extents);
3697 level = btrfs_header_level(root->node);
3698 memset(wc->nodes, 0, sizeof(wc->nodes));
3699 wc->nodes[level] = &root_node;
3700 wc->active_node = level;
3701 wc->root_level = level;
3703 /* We may not have checked the root block, lets do that now */
3704 if (btrfs_is_leaf(root->node))
3705 status = btrfs_check_leaf(root, NULL, root->node);
3707 status = btrfs_check_node(root, NULL, root->node);
3708 if (status != BTRFS_TREE_BLOCK_CLEAN)
3711 if (btrfs_root_refs(root_item) > 0 ||
3712 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3713 path.nodes[level] = root->node;
3714 extent_buffer_get(root->node);
3715 path.slots[level] = 0;
3717 struct btrfs_key key;
3718 struct btrfs_disk_key found_key;
3720 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3721 level = root_item->drop_level;
3722 path.lowest_level = level;
3723 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3726 btrfs_node_key(path.nodes[level], &found_key,
3728 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3729 sizeof(found_key)));
3733 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3739 wret = walk_up_tree(root, &path, wc, &level);
3746 btrfs_release_path(&path);
3748 if (!cache_tree_empty(&corrupt_blocks)) {
3749 struct cache_extent *cache;
3750 struct btrfs_corrupt_block *corrupt;
3752 printf("The following tree block(s) is corrupted in tree %llu:\n",
3753 root->root_key.objectid);
3754 cache = first_cache_extent(&corrupt_blocks);
3756 corrupt = container_of(cache,
3757 struct btrfs_corrupt_block,
3759 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3760 cache->start, corrupt->level,
3761 corrupt->key.objectid, corrupt->key.type,
3762 corrupt->key.offset);
3763 cache = next_cache_extent(cache);
3766 printf("Try to repair the btree for root %llu\n",
3767 root->root_key.objectid);
3768 ret = repair_btree(root, &corrupt_blocks);
3770 fprintf(stderr, "Failed to repair btree: %s\n",
3773 printf("Btree for root %llu is fixed\n",
3774 root->root_key.objectid);
3778 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3782 if (root_node.current) {
3783 root_node.current->checked = 1;
3784 maybe_free_inode_rec(&root_node.inode_cache,
3788 err = check_inode_recs(root, &root_node.inode_cache);
3792 free_corrupt_blocks_tree(&corrupt_blocks);
3793 root->fs_info->corrupt_blocks = NULL;
3794 free_orphan_data_extents(&root->orphan_data_extents);
3798 static int fs_root_objectid(u64 objectid)
3800 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3801 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3803 return is_fstree(objectid);
3806 static int check_fs_roots(struct btrfs_root *root,
3807 struct cache_tree *root_cache)
3809 struct btrfs_path path;
3810 struct btrfs_key key;
3811 struct walk_control wc;
3812 struct extent_buffer *leaf, *tree_node;
3813 struct btrfs_root *tmp_root;
3814 struct btrfs_root *tree_root = root->fs_info->tree_root;
3818 if (ctx.progress_enabled) {
3819 ctx.tp = TASK_FS_ROOTS;
3820 task_start(ctx.info);
3824 * Just in case we made any changes to the extent tree that weren't
3825 * reflected into the free space cache yet.
3828 reset_cached_block_groups(root->fs_info);
3829 memset(&wc, 0, sizeof(wc));
3830 cache_tree_init(&wc.shared);
3831 btrfs_init_path(&path);
3836 key.type = BTRFS_ROOT_ITEM_KEY;
3837 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3842 tree_node = tree_root->node;
3844 if (tree_node != tree_root->node) {
3845 free_root_recs_tree(root_cache);
3846 btrfs_release_path(&path);
3849 leaf = path.nodes[0];
3850 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3851 ret = btrfs_next_leaf(tree_root, &path);
3857 leaf = path.nodes[0];
3859 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3860 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3861 fs_root_objectid(key.objectid)) {
3862 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3863 tmp_root = btrfs_read_fs_root_no_cache(
3864 root->fs_info, &key);
3866 key.offset = (u64)-1;
3867 tmp_root = btrfs_read_fs_root(
3868 root->fs_info, &key);
3870 if (IS_ERR(tmp_root)) {
3874 ret = check_fs_root(tmp_root, root_cache, &wc);
3875 if (ret == -EAGAIN) {
3876 free_root_recs_tree(root_cache);
3877 btrfs_release_path(&path);
3882 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3883 btrfs_free_fs_root(tmp_root);
3884 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3885 key.type == BTRFS_ROOT_BACKREF_KEY) {
3886 process_root_ref(leaf, path.slots[0], &key,
3893 btrfs_release_path(&path);
3895 free_extent_cache_tree(&wc.shared);
3896 if (!cache_tree_empty(&wc.shared))
3897 fprintf(stderr, "warning line %d\n", __LINE__);
3899 task_stop(ctx.info);
3904 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3907 struct extent_backref *back;
3908 struct tree_backref *tback;
3909 struct data_backref *dback;
3913 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3914 back = rb_node_to_extent_backref(n);
3915 if (!back->found_extent_tree) {
3919 if (back->is_data) {
3920 dback = to_data_backref(back);
3921 fprintf(stderr, "Backref %llu %s %llu"
3922 " owner %llu offset %llu num_refs %lu"
3923 " not found in extent tree\n",
3924 (unsigned long long)rec->start,
3925 back->full_backref ?
3927 back->full_backref ?
3928 (unsigned long long)dback->parent:
3929 (unsigned long long)dback->root,
3930 (unsigned long long)dback->owner,
3931 (unsigned long long)dback->offset,
3932 (unsigned long)dback->num_refs);
3934 tback = to_tree_backref(back);
3935 fprintf(stderr, "Backref %llu parent %llu"
3936 " root %llu not found in extent tree\n",
3937 (unsigned long long)rec->start,
3938 (unsigned long long)tback->parent,
3939 (unsigned long long)tback->root);
3942 if (!back->is_data && !back->found_ref) {
3946 tback = to_tree_backref(back);
3947 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3948 (unsigned long long)rec->start,
3949 back->full_backref ? "parent" : "root",
3950 back->full_backref ?
3951 (unsigned long long)tback->parent :
3952 (unsigned long long)tback->root, back);
3954 if (back->is_data) {
3955 dback = to_data_backref(back);
3956 if (dback->found_ref != dback->num_refs) {
3960 fprintf(stderr, "Incorrect local backref count"
3961 " on %llu %s %llu owner %llu"
3962 " offset %llu found %u wanted %u back %p\n",
3963 (unsigned long long)rec->start,
3964 back->full_backref ?
3966 back->full_backref ?
3967 (unsigned long long)dback->parent:
3968 (unsigned long long)dback->root,
3969 (unsigned long long)dback->owner,
3970 (unsigned long long)dback->offset,
3971 dback->found_ref, dback->num_refs, back);
3973 if (dback->disk_bytenr != rec->start) {
3977 fprintf(stderr, "Backref disk bytenr does not"
3978 " match extent record, bytenr=%llu, "
3979 "ref bytenr=%llu\n",
3980 (unsigned long long)rec->start,
3981 (unsigned long long)dback->disk_bytenr);
3984 if (dback->bytes != rec->nr) {
3988 fprintf(stderr, "Backref bytes do not match "
3989 "extent backref, bytenr=%llu, ref "
3990 "bytes=%llu, backref bytes=%llu\n",
3991 (unsigned long long)rec->start,
3992 (unsigned long long)rec->nr,
3993 (unsigned long long)dback->bytes);
3996 if (!back->is_data) {
3999 dback = to_data_backref(back);
4000 found += dback->found_ref;
4003 if (found != rec->refs) {
4007 fprintf(stderr, "Incorrect global backref count "
4008 "on %llu found %llu wanted %llu\n",
4009 (unsigned long long)rec->start,
4010 (unsigned long long)found,
4011 (unsigned long long)rec->refs);
4017 static void __free_one_backref(struct rb_node *node)
4019 struct extent_backref *back = rb_node_to_extent_backref(node);
4024 static void free_all_extent_backrefs(struct extent_record *rec)
4026 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4029 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4030 struct cache_tree *extent_cache)
4032 struct cache_extent *cache;
4033 struct extent_record *rec;
4036 cache = first_cache_extent(extent_cache);
4039 rec = container_of(cache, struct extent_record, cache);
4040 remove_cache_extent(extent_cache, cache);
4041 free_all_extent_backrefs(rec);
4046 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4047 struct extent_record *rec)
4049 if (rec->content_checked && rec->owner_ref_checked &&
4050 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4051 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4052 !rec->bad_full_backref && !rec->crossing_stripes &&
4053 !rec->wrong_chunk_type) {
4054 remove_cache_extent(extent_cache, &rec->cache);
4055 free_all_extent_backrefs(rec);
4056 list_del_init(&rec->list);
4062 static int check_owner_ref(struct btrfs_root *root,
4063 struct extent_record *rec,
4064 struct extent_buffer *buf)
4066 struct extent_backref *node, *tmp;
4067 struct tree_backref *back;
4068 struct btrfs_root *ref_root;
4069 struct btrfs_key key;
4070 struct btrfs_path path;
4071 struct extent_buffer *parent;
4076 rbtree_postorder_for_each_entry_safe(node, tmp,
4077 &rec->backref_tree, node) {
4080 if (!node->found_ref)
4082 if (node->full_backref)
4084 back = to_tree_backref(node);
4085 if (btrfs_header_owner(buf) == back->root)
4088 BUG_ON(rec->is_root);
4090 /* try to find the block by search corresponding fs tree */
4091 key.objectid = btrfs_header_owner(buf);
4092 key.type = BTRFS_ROOT_ITEM_KEY;
4093 key.offset = (u64)-1;
4095 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4096 if (IS_ERR(ref_root))
4099 level = btrfs_header_level(buf);
4101 btrfs_item_key_to_cpu(buf, &key, 0);
4103 btrfs_node_key_to_cpu(buf, &key, 0);
4105 btrfs_init_path(&path);
4106 path.lowest_level = level + 1;
4107 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4111 parent = path.nodes[level + 1];
4112 if (parent && buf->start == btrfs_node_blockptr(parent,
4113 path.slots[level + 1]))
4116 btrfs_release_path(&path);
4117 return found ? 0 : 1;
4120 static int is_extent_tree_record(struct extent_record *rec)
4122 struct extent_backref *ref, *tmp;
4123 struct tree_backref *back;
4126 rbtree_postorder_for_each_entry_safe(ref, tmp,
4127 &rec->backref_tree, node) {
4130 back = to_tree_backref(ref);
4131 if (ref->full_backref)
4133 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4140 static int record_bad_block_io(struct btrfs_fs_info *info,
4141 struct cache_tree *extent_cache,
4144 struct extent_record *rec;
4145 struct cache_extent *cache;
4146 struct btrfs_key key;
4148 cache = lookup_cache_extent(extent_cache, start, len);
4152 rec = container_of(cache, struct extent_record, cache);
4153 if (!is_extent_tree_record(rec))
4156 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4157 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4160 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4161 struct extent_buffer *buf, int slot)
4163 if (btrfs_header_level(buf)) {
4164 struct btrfs_key_ptr ptr1, ptr2;
4166 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4167 sizeof(struct btrfs_key_ptr));
4168 read_extent_buffer(buf, &ptr2,
4169 btrfs_node_key_ptr_offset(slot + 1),
4170 sizeof(struct btrfs_key_ptr));
4171 write_extent_buffer(buf, &ptr1,
4172 btrfs_node_key_ptr_offset(slot + 1),
4173 sizeof(struct btrfs_key_ptr));
4174 write_extent_buffer(buf, &ptr2,
4175 btrfs_node_key_ptr_offset(slot),
4176 sizeof(struct btrfs_key_ptr));
4178 struct btrfs_disk_key key;
4179 btrfs_node_key(buf, &key, 0);
4180 btrfs_fixup_low_keys(root, path, &key,
4181 btrfs_header_level(buf) + 1);
4184 struct btrfs_item *item1, *item2;
4185 struct btrfs_key k1, k2;
4186 char *item1_data, *item2_data;
4187 u32 item1_offset, item2_offset, item1_size, item2_size;
4189 item1 = btrfs_item_nr(slot);
4190 item2 = btrfs_item_nr(slot + 1);
4191 btrfs_item_key_to_cpu(buf, &k1, slot);
4192 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4193 item1_offset = btrfs_item_offset(buf, item1);
4194 item2_offset = btrfs_item_offset(buf, item2);
4195 item1_size = btrfs_item_size(buf, item1);
4196 item2_size = btrfs_item_size(buf, item2);
4198 item1_data = malloc(item1_size);
4201 item2_data = malloc(item2_size);
4207 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4208 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4210 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4211 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4215 btrfs_set_item_offset(buf, item1, item2_offset);
4216 btrfs_set_item_offset(buf, item2, item1_offset);
4217 btrfs_set_item_size(buf, item1, item2_size);
4218 btrfs_set_item_size(buf, item2, item1_size);
4220 path->slots[0] = slot;
4221 btrfs_set_item_key_unsafe(root, path, &k2);
4222 path->slots[0] = slot + 1;
4223 btrfs_set_item_key_unsafe(root, path, &k1);
4228 static int fix_key_order(struct btrfs_trans_handle *trans,
4229 struct btrfs_root *root,
4230 struct btrfs_path *path)
4232 struct extent_buffer *buf;
4233 struct btrfs_key k1, k2;
4235 int level = path->lowest_level;
4238 buf = path->nodes[level];
4239 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4241 btrfs_node_key_to_cpu(buf, &k1, i);
4242 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4244 btrfs_item_key_to_cpu(buf, &k1, i);
4245 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4247 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4249 ret = swap_values(root, path, buf, i);
4252 btrfs_mark_buffer_dirty(buf);
4258 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4259 struct btrfs_root *root,
4260 struct btrfs_path *path,
4261 struct extent_buffer *buf, int slot)
4263 struct btrfs_key key;
4264 int nritems = btrfs_header_nritems(buf);
4266 btrfs_item_key_to_cpu(buf, &key, slot);
4268 /* These are all the keys we can deal with missing. */
4269 if (key.type != BTRFS_DIR_INDEX_KEY &&
4270 key.type != BTRFS_EXTENT_ITEM_KEY &&
4271 key.type != BTRFS_METADATA_ITEM_KEY &&
4272 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4273 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4276 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4277 (unsigned long long)key.objectid, key.type,
4278 (unsigned long long)key.offset, slot, buf->start);
4279 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4280 btrfs_item_nr_offset(slot + 1),
4281 sizeof(struct btrfs_item) *
4282 (nritems - slot - 1));
4283 btrfs_set_header_nritems(buf, nritems - 1);
4285 struct btrfs_disk_key disk_key;
4287 btrfs_item_key(buf, &disk_key, 0);
4288 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4290 btrfs_mark_buffer_dirty(buf);
4294 static int fix_item_offset(struct btrfs_trans_handle *trans,
4295 struct btrfs_root *root,
4296 struct btrfs_path *path)
4298 struct extent_buffer *buf;
4302 /* We should only get this for leaves */
4303 BUG_ON(path->lowest_level);
4304 buf = path->nodes[0];
4306 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4307 unsigned int shift = 0, offset;
4309 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4310 BTRFS_LEAF_DATA_SIZE(root)) {
4311 if (btrfs_item_end_nr(buf, i) >
4312 BTRFS_LEAF_DATA_SIZE(root)) {
4313 ret = delete_bogus_item(trans, root, path,
4317 fprintf(stderr, "item is off the end of the "
4318 "leaf, can't fix\n");
4322 shift = BTRFS_LEAF_DATA_SIZE(root) -
4323 btrfs_item_end_nr(buf, i);
4324 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4325 btrfs_item_offset_nr(buf, i - 1)) {
4326 if (btrfs_item_end_nr(buf, i) >
4327 btrfs_item_offset_nr(buf, i - 1)) {
4328 ret = delete_bogus_item(trans, root, path,
4332 fprintf(stderr, "items overlap, can't fix\n");
4336 shift = btrfs_item_offset_nr(buf, i - 1) -
4337 btrfs_item_end_nr(buf, i);
4342 printf("Shifting item nr %d by %u bytes in block %llu\n",
4343 i, shift, (unsigned long long)buf->start);
4344 offset = btrfs_item_offset_nr(buf, i);
4345 memmove_extent_buffer(buf,
4346 btrfs_leaf_data(buf) + offset + shift,
4347 btrfs_leaf_data(buf) + offset,
4348 btrfs_item_size_nr(buf, i));
4349 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4351 btrfs_mark_buffer_dirty(buf);
4355 * We may have moved things, in which case we want to exit so we don't
4356 * write those changes out. Once we have proper abort functionality in
4357 * progs this can be changed to something nicer.
4364 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4365 * then just return -EIO.
4367 static int try_to_fix_bad_block(struct btrfs_root *root,
4368 struct extent_buffer *buf,
4369 enum btrfs_tree_block_status status)
4371 struct btrfs_trans_handle *trans;
4372 struct ulist *roots;
4373 struct ulist_node *node;
4374 struct btrfs_root *search_root;
4375 struct btrfs_path *path;
4376 struct ulist_iterator iter;
4377 struct btrfs_key root_key, key;
4380 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4381 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4384 path = btrfs_alloc_path();
4388 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4391 btrfs_free_path(path);
4395 ULIST_ITER_INIT(&iter);
4396 while ((node = ulist_next(roots, &iter))) {
4397 root_key.objectid = node->val;
4398 root_key.type = BTRFS_ROOT_ITEM_KEY;
4399 root_key.offset = (u64)-1;
4401 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4408 trans = btrfs_start_transaction(search_root, 0);
4409 if (IS_ERR(trans)) {
4410 ret = PTR_ERR(trans);
4414 path->lowest_level = btrfs_header_level(buf);
4415 path->skip_check_block = 1;
4416 if (path->lowest_level)
4417 btrfs_node_key_to_cpu(buf, &key, 0);
4419 btrfs_item_key_to_cpu(buf, &key, 0);
4420 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4423 btrfs_commit_transaction(trans, search_root);
4426 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4427 ret = fix_key_order(trans, search_root, path);
4428 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4429 ret = fix_item_offset(trans, search_root, path);
4431 btrfs_commit_transaction(trans, search_root);
4434 btrfs_release_path(path);
4435 btrfs_commit_transaction(trans, search_root);
4438 btrfs_free_path(path);
4442 static int check_block(struct btrfs_root *root,
4443 struct cache_tree *extent_cache,
4444 struct extent_buffer *buf, u64 flags)
4446 struct extent_record *rec;
4447 struct cache_extent *cache;
4448 struct btrfs_key key;
4449 enum btrfs_tree_block_status status;
4453 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4456 rec = container_of(cache, struct extent_record, cache);
4457 rec->generation = btrfs_header_generation(buf);
4459 level = btrfs_header_level(buf);
4460 if (btrfs_header_nritems(buf) > 0) {
4463 btrfs_item_key_to_cpu(buf, &key, 0);
4465 btrfs_node_key_to_cpu(buf, &key, 0);
4467 rec->info_objectid = key.objectid;
4469 rec->info_level = level;
4471 if (btrfs_is_leaf(buf))
4472 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4474 status = btrfs_check_node(root, &rec->parent_key, buf);
4476 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4478 status = try_to_fix_bad_block(root, buf, status);
4479 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4481 fprintf(stderr, "bad block %llu\n",
4482 (unsigned long long)buf->start);
4485 * Signal to callers we need to start the scan over
4486 * again since we'll have cowed blocks.
4491 rec->content_checked = 1;
4492 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4493 rec->owner_ref_checked = 1;
4495 ret = check_owner_ref(root, rec, buf);
4497 rec->owner_ref_checked = 1;
4501 maybe_free_extent_rec(extent_cache, rec);
4506 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4507 u64 parent, u64 root)
4509 struct rb_node *node;
4510 struct tree_backref *back = NULL;
4511 struct tree_backref match = {
4518 match.parent = parent;
4519 match.node.full_backref = 1;
4524 node = rb_search(&rec->backref_tree, &match.node.node,
4525 (rb_compare_keys)compare_extent_backref, NULL);
4527 back = to_tree_backref(rb_node_to_extent_backref(node));
4532 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4533 u64 parent, u64 root)
4535 struct tree_backref *ref = malloc(sizeof(*ref));
4539 memset(&ref->node, 0, sizeof(ref->node));
4541 ref->parent = parent;
4542 ref->node.full_backref = 1;
4545 ref->node.full_backref = 0;
4547 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4552 static struct data_backref *find_data_backref(struct extent_record *rec,
4553 u64 parent, u64 root,
4554 u64 owner, u64 offset,
4556 u64 disk_bytenr, u64 bytes)
4558 struct rb_node *node;
4559 struct data_backref *back = NULL;
4560 struct data_backref match = {
4567 .found_ref = found_ref,
4568 .disk_bytenr = disk_bytenr,
4572 match.parent = parent;
4573 match.node.full_backref = 1;
4578 node = rb_search(&rec->backref_tree, &match.node.node,
4579 (rb_compare_keys)compare_extent_backref, NULL);
4581 back = to_data_backref(rb_node_to_extent_backref(node));
4586 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4587 u64 parent, u64 root,
4588 u64 owner, u64 offset,
4591 struct data_backref *ref = malloc(sizeof(*ref));
4595 memset(&ref->node, 0, sizeof(ref->node));
4596 ref->node.is_data = 1;
4599 ref->parent = parent;
4602 ref->node.full_backref = 1;
4606 ref->offset = offset;
4607 ref->node.full_backref = 0;
4609 ref->bytes = max_size;
4612 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4613 if (max_size > rec->max_size)
4614 rec->max_size = max_size;
4618 /* Check if the type of extent matches with its chunk */
4619 static void check_extent_type(struct extent_record *rec)
4621 struct btrfs_block_group_cache *bg_cache;
4623 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4627 /* data extent, check chunk directly*/
4628 if (!rec->metadata) {
4629 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4630 rec->wrong_chunk_type = 1;
4634 /* metadata extent, check the obvious case first */
4635 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4636 BTRFS_BLOCK_GROUP_METADATA))) {
4637 rec->wrong_chunk_type = 1;
4642 * Check SYSTEM extent, as it's also marked as metadata, we can only
4643 * make sure it's a SYSTEM extent by its backref
4645 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4646 struct extent_backref *node;
4647 struct tree_backref *tback;
4650 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4651 if (node->is_data) {
4652 /* tree block shouldn't have data backref */
4653 rec->wrong_chunk_type = 1;
4656 tback = container_of(node, struct tree_backref, node);
4658 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4659 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4661 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4662 if (!(bg_cache->flags & bg_type))
4663 rec->wrong_chunk_type = 1;
4668 * Allocate a new extent record, fill default values from @tmpl and insert int
4669 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4670 * the cache, otherwise it fails.
4672 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4673 struct extent_record *tmpl)
4675 struct extent_record *rec;
4678 rec = malloc(sizeof(*rec));
4681 rec->start = tmpl->start;
4682 rec->max_size = tmpl->max_size;
4683 rec->nr = max(tmpl->nr, tmpl->max_size);
4684 rec->found_rec = tmpl->found_rec;
4685 rec->content_checked = tmpl->content_checked;
4686 rec->owner_ref_checked = tmpl->owner_ref_checked;
4687 rec->num_duplicates = 0;
4688 rec->metadata = tmpl->metadata;
4689 rec->flag_block_full_backref = FLAG_UNSET;
4690 rec->bad_full_backref = 0;
4691 rec->crossing_stripes = 0;
4692 rec->wrong_chunk_type = 0;
4693 rec->is_root = tmpl->is_root;
4694 rec->refs = tmpl->refs;
4695 rec->extent_item_refs = tmpl->extent_item_refs;
4696 rec->parent_generation = tmpl->parent_generation;
4697 INIT_LIST_HEAD(&rec->backrefs);
4698 INIT_LIST_HEAD(&rec->dups);
4699 INIT_LIST_HEAD(&rec->list);
4700 rec->backref_tree = RB_ROOT;
4701 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4702 rec->cache.start = tmpl->start;
4703 rec->cache.size = tmpl->nr;
4704 ret = insert_cache_extent(extent_cache, &rec->cache);
4706 bytes_used += rec->nr;
4709 rec->crossing_stripes = check_crossing_stripes(rec->start,
4710 global_info->tree_root->nodesize);
4711 check_extent_type(rec);
4716 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4718 * - refs - if found, increase refs
4719 * - is_root - if found, set
4720 * - content_checked - if found, set
4721 * - owner_ref_checked - if found, set
4723 * If not found, create a new one, initialize and insert.
4725 static int add_extent_rec(struct cache_tree *extent_cache,
4726 struct extent_record *tmpl)
4728 struct extent_record *rec;
4729 struct cache_extent *cache;
4733 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4735 rec = container_of(cache, struct extent_record, cache);
4739 rec->nr = max(tmpl->nr, tmpl->max_size);
4742 * We need to make sure to reset nr to whatever the extent
4743 * record says was the real size, this way we can compare it to
4746 if (tmpl->found_rec) {
4747 if (tmpl->start != rec->start || rec->found_rec) {
4748 struct extent_record *tmp;
4751 if (list_empty(&rec->list))
4752 list_add_tail(&rec->list,
4753 &duplicate_extents);
4756 * We have to do this song and dance in case we
4757 * find an extent record that falls inside of
4758 * our current extent record but does not have
4759 * the same objectid.
4761 tmp = malloc(sizeof(*tmp));
4764 tmp->start = tmpl->start;
4765 tmp->max_size = tmpl->max_size;
4768 tmp->metadata = tmpl->metadata;
4769 tmp->extent_item_refs = tmpl->extent_item_refs;
4770 INIT_LIST_HEAD(&tmp->list);
4771 list_add_tail(&tmp->list, &rec->dups);
4772 rec->num_duplicates++;
4779 if (tmpl->extent_item_refs && !dup) {
4780 if (rec->extent_item_refs) {
4781 fprintf(stderr, "block %llu rec "
4782 "extent_item_refs %llu, passed %llu\n",
4783 (unsigned long long)tmpl->start,
4784 (unsigned long long)
4785 rec->extent_item_refs,
4786 (unsigned long long)tmpl->extent_item_refs);
4788 rec->extent_item_refs = tmpl->extent_item_refs;
4792 if (tmpl->content_checked)
4793 rec->content_checked = 1;
4794 if (tmpl->owner_ref_checked)
4795 rec->owner_ref_checked = 1;
4796 memcpy(&rec->parent_key, &tmpl->parent_key,
4797 sizeof(tmpl->parent_key));
4798 if (tmpl->parent_generation)
4799 rec->parent_generation = tmpl->parent_generation;
4800 if (rec->max_size < tmpl->max_size)
4801 rec->max_size = tmpl->max_size;
4804 * A metadata extent can't cross stripe_len boundary, otherwise
4805 * kernel scrub won't be able to handle it.
4806 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4810 rec->crossing_stripes = check_crossing_stripes(
4811 rec->start, global_info->tree_root->nodesize);
4812 check_extent_type(rec);
4813 maybe_free_extent_rec(extent_cache, rec);
4817 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4822 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4823 u64 parent, u64 root, int found_ref)
4825 struct extent_record *rec;
4826 struct tree_backref *back;
4827 struct cache_extent *cache;
4829 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4831 struct extent_record tmpl;
4833 memset(&tmpl, 0, sizeof(tmpl));
4834 tmpl.start = bytenr;
4838 add_extent_rec_nolookup(extent_cache, &tmpl);
4840 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4845 rec = container_of(cache, struct extent_record, cache);
4846 if (rec->start != bytenr) {
4850 back = find_tree_backref(rec, parent, root);
4852 back = alloc_tree_backref(rec, parent, root);
4857 if (back->node.found_ref) {
4858 fprintf(stderr, "Extent back ref already exists "
4859 "for %llu parent %llu root %llu \n",
4860 (unsigned long long)bytenr,
4861 (unsigned long long)parent,
4862 (unsigned long long)root);
4864 back->node.found_ref = 1;
4866 if (back->node.found_extent_tree) {
4867 fprintf(stderr, "Extent back ref already exists "
4868 "for %llu parent %llu root %llu \n",
4869 (unsigned long long)bytenr,
4870 (unsigned long long)parent,
4871 (unsigned long long)root);
4873 back->node.found_extent_tree = 1;
4875 check_extent_type(rec);
4876 maybe_free_extent_rec(extent_cache, rec);
4880 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4881 u64 parent, u64 root, u64 owner, u64 offset,
4882 u32 num_refs, int found_ref, u64 max_size)
4884 struct extent_record *rec;
4885 struct data_backref *back;
4886 struct cache_extent *cache;
4888 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4890 struct extent_record tmpl;
4892 memset(&tmpl, 0, sizeof(tmpl));
4893 tmpl.start = bytenr;
4895 tmpl.max_size = max_size;
4897 add_extent_rec_nolookup(extent_cache, &tmpl);
4899 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4904 rec = container_of(cache, struct extent_record, cache);
4905 if (rec->max_size < max_size)
4906 rec->max_size = max_size;
4909 * If found_ref is set then max_size is the real size and must match the
4910 * existing refs. So if we have already found a ref then we need to
4911 * make sure that this ref matches the existing one, otherwise we need
4912 * to add a new backref so we can notice that the backrefs don't match
4913 * and we need to figure out who is telling the truth. This is to
4914 * account for that awful fsync bug I introduced where we'd end up with
4915 * a btrfs_file_extent_item that would have its length include multiple
4916 * prealloc extents or point inside of a prealloc extent.
4918 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4921 back = alloc_data_backref(rec, parent, root, owner, offset,
4927 BUG_ON(num_refs != 1);
4928 if (back->node.found_ref)
4929 BUG_ON(back->bytes != max_size);
4930 back->node.found_ref = 1;
4931 back->found_ref += 1;
4932 back->bytes = max_size;
4933 back->disk_bytenr = bytenr;
4935 rec->content_checked = 1;
4936 rec->owner_ref_checked = 1;
4938 if (back->node.found_extent_tree) {
4939 fprintf(stderr, "Extent back ref already exists "
4940 "for %llu parent %llu root %llu "
4941 "owner %llu offset %llu num_refs %lu\n",
4942 (unsigned long long)bytenr,
4943 (unsigned long long)parent,
4944 (unsigned long long)root,
4945 (unsigned long long)owner,
4946 (unsigned long long)offset,
4947 (unsigned long)num_refs);
4949 back->num_refs = num_refs;
4950 back->node.found_extent_tree = 1;
4952 maybe_free_extent_rec(extent_cache, rec);
4956 static int add_pending(struct cache_tree *pending,
4957 struct cache_tree *seen, u64 bytenr, u32 size)
4960 ret = add_cache_extent(seen, bytenr, size);
4963 add_cache_extent(pending, bytenr, size);
4967 static int pick_next_pending(struct cache_tree *pending,
4968 struct cache_tree *reada,
4969 struct cache_tree *nodes,
4970 u64 last, struct block_info *bits, int bits_nr,
4973 unsigned long node_start = last;
4974 struct cache_extent *cache;
4977 cache = search_cache_extent(reada, 0);
4979 bits[0].start = cache->start;
4980 bits[0].size = cache->size;
4985 if (node_start > 32768)
4986 node_start -= 32768;
4988 cache = search_cache_extent(nodes, node_start);
4990 cache = search_cache_extent(nodes, 0);
4993 cache = search_cache_extent(pending, 0);
4998 bits[ret].start = cache->start;
4999 bits[ret].size = cache->size;
5000 cache = next_cache_extent(cache);
5002 } while (cache && ret < bits_nr);
5008 bits[ret].start = cache->start;
5009 bits[ret].size = cache->size;
5010 cache = next_cache_extent(cache);
5012 } while (cache && ret < bits_nr);
5014 if (bits_nr - ret > 8) {
5015 u64 lookup = bits[0].start + bits[0].size;
5016 struct cache_extent *next;
5017 next = search_cache_extent(pending, lookup);
5019 if (next->start - lookup > 32768)
5021 bits[ret].start = next->start;
5022 bits[ret].size = next->size;
5023 lookup = next->start + next->size;
5027 next = next_cache_extent(next);
5035 static void free_chunk_record(struct cache_extent *cache)
5037 struct chunk_record *rec;
5039 rec = container_of(cache, struct chunk_record, cache);
5040 list_del_init(&rec->list);
5041 list_del_init(&rec->dextents);
5045 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5047 cache_tree_free_extents(chunk_cache, free_chunk_record);
5050 static void free_device_record(struct rb_node *node)
5052 struct device_record *rec;
5054 rec = container_of(node, struct device_record, node);
5058 FREE_RB_BASED_TREE(device_cache, free_device_record);
5060 int insert_block_group_record(struct block_group_tree *tree,
5061 struct block_group_record *bg_rec)
5065 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5069 list_add_tail(&bg_rec->list, &tree->block_groups);
5073 static void free_block_group_record(struct cache_extent *cache)
5075 struct block_group_record *rec;
5077 rec = container_of(cache, struct block_group_record, cache);
5078 list_del_init(&rec->list);
5082 void free_block_group_tree(struct block_group_tree *tree)
5084 cache_tree_free_extents(&tree->tree, free_block_group_record);
5087 int insert_device_extent_record(struct device_extent_tree *tree,
5088 struct device_extent_record *de_rec)
5093 * Device extent is a bit different from the other extents, because
5094 * the extents which belong to the different devices may have the
5095 * same start and size, so we need use the special extent cache
5096 * search/insert functions.
5098 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5102 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5103 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5107 static void free_device_extent_record(struct cache_extent *cache)
5109 struct device_extent_record *rec;
5111 rec = container_of(cache, struct device_extent_record, cache);
5112 if (!list_empty(&rec->chunk_list))
5113 list_del_init(&rec->chunk_list);
5114 if (!list_empty(&rec->device_list))
5115 list_del_init(&rec->device_list);
5119 void free_device_extent_tree(struct device_extent_tree *tree)
5121 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5124 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5125 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5126 struct extent_buffer *leaf, int slot)
5128 struct btrfs_extent_ref_v0 *ref0;
5129 struct btrfs_key key;
5131 btrfs_item_key_to_cpu(leaf, &key, slot);
5132 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5133 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5134 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5136 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5137 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5143 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5144 struct btrfs_key *key,
5147 struct btrfs_chunk *ptr;
5148 struct chunk_record *rec;
5151 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5152 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5154 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5156 fprintf(stderr, "memory allocation failed\n");
5160 INIT_LIST_HEAD(&rec->list);
5161 INIT_LIST_HEAD(&rec->dextents);
5164 rec->cache.start = key->offset;
5165 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5167 rec->generation = btrfs_header_generation(leaf);
5169 rec->objectid = key->objectid;
5170 rec->type = key->type;
5171 rec->offset = key->offset;
5173 rec->length = rec->cache.size;
5174 rec->owner = btrfs_chunk_owner(leaf, ptr);
5175 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5176 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5177 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5178 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5179 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5180 rec->num_stripes = num_stripes;
5181 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5183 for (i = 0; i < rec->num_stripes; ++i) {
5184 rec->stripes[i].devid =
5185 btrfs_stripe_devid_nr(leaf, ptr, i);
5186 rec->stripes[i].offset =
5187 btrfs_stripe_offset_nr(leaf, ptr, i);
5188 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5189 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5196 static int process_chunk_item(struct cache_tree *chunk_cache,
5197 struct btrfs_key *key, struct extent_buffer *eb,
5200 struct chunk_record *rec;
5203 rec = btrfs_new_chunk_record(eb, key, slot);
5204 ret = insert_cache_extent(chunk_cache, &rec->cache);
5206 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5207 rec->offset, rec->length);
5214 static int process_device_item(struct rb_root *dev_cache,
5215 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5217 struct btrfs_dev_item *ptr;
5218 struct device_record *rec;
5221 ptr = btrfs_item_ptr(eb,
5222 slot, struct btrfs_dev_item);
5224 rec = malloc(sizeof(*rec));
5226 fprintf(stderr, "memory allocation failed\n");
5230 rec->devid = key->offset;
5231 rec->generation = btrfs_header_generation(eb);
5233 rec->objectid = key->objectid;
5234 rec->type = key->type;
5235 rec->offset = key->offset;
5237 rec->devid = btrfs_device_id(eb, ptr);
5238 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5239 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5241 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5243 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5250 struct block_group_record *
5251 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5254 struct btrfs_block_group_item *ptr;
5255 struct block_group_record *rec;
5257 rec = calloc(1, sizeof(*rec));
5259 fprintf(stderr, "memory allocation failed\n");
5263 rec->cache.start = key->objectid;
5264 rec->cache.size = key->offset;
5266 rec->generation = btrfs_header_generation(leaf);
5268 rec->objectid = key->objectid;
5269 rec->type = key->type;
5270 rec->offset = key->offset;
5272 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5273 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5275 INIT_LIST_HEAD(&rec->list);
5280 static int process_block_group_item(struct block_group_tree *block_group_cache,
5281 struct btrfs_key *key,
5282 struct extent_buffer *eb, int slot)
5284 struct block_group_record *rec;
5287 rec = btrfs_new_block_group_record(eb, key, slot);
5288 ret = insert_block_group_record(block_group_cache, rec);
5290 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5291 rec->objectid, rec->offset);
5298 struct device_extent_record *
5299 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5300 struct btrfs_key *key, int slot)
5302 struct device_extent_record *rec;
5303 struct btrfs_dev_extent *ptr;
5305 rec = calloc(1, sizeof(*rec));
5307 fprintf(stderr, "memory allocation failed\n");
5311 rec->cache.objectid = key->objectid;
5312 rec->cache.start = key->offset;
5314 rec->generation = btrfs_header_generation(leaf);
5316 rec->objectid = key->objectid;
5317 rec->type = key->type;
5318 rec->offset = key->offset;
5320 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5321 rec->chunk_objecteid =
5322 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5324 btrfs_dev_extent_chunk_offset(leaf, ptr);
5325 rec->length = btrfs_dev_extent_length(leaf, ptr);
5326 rec->cache.size = rec->length;
5328 INIT_LIST_HEAD(&rec->chunk_list);
5329 INIT_LIST_HEAD(&rec->device_list);
5335 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5336 struct btrfs_key *key, struct extent_buffer *eb,
5339 struct device_extent_record *rec;
5342 rec = btrfs_new_device_extent_record(eb, key, slot);
5343 ret = insert_device_extent_record(dev_extent_cache, rec);
5346 "Device extent[%llu, %llu, %llu] existed.\n",
5347 rec->objectid, rec->offset, rec->length);
5354 static int process_extent_item(struct btrfs_root *root,
5355 struct cache_tree *extent_cache,
5356 struct extent_buffer *eb, int slot)
5358 struct btrfs_extent_item *ei;
5359 struct btrfs_extent_inline_ref *iref;
5360 struct btrfs_extent_data_ref *dref;
5361 struct btrfs_shared_data_ref *sref;
5362 struct btrfs_key key;
5363 struct extent_record tmpl;
5367 u32 item_size = btrfs_item_size_nr(eb, slot);
5373 btrfs_item_key_to_cpu(eb, &key, slot);
5375 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5377 num_bytes = root->nodesize;
5379 num_bytes = key.offset;
5382 if (item_size < sizeof(*ei)) {
5383 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5384 struct btrfs_extent_item_v0 *ei0;
5385 BUG_ON(item_size != sizeof(*ei0));
5386 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5387 refs = btrfs_extent_refs_v0(eb, ei0);
5391 memset(&tmpl, 0, sizeof(tmpl));
5392 tmpl.start = key.objectid;
5393 tmpl.nr = num_bytes;
5394 tmpl.extent_item_refs = refs;
5395 tmpl.metadata = metadata;
5397 tmpl.max_size = num_bytes;
5399 return add_extent_rec(extent_cache, &tmpl);
5402 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5403 refs = btrfs_extent_refs(eb, ei);
5404 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5409 memset(&tmpl, 0, sizeof(tmpl));
5410 tmpl.start = key.objectid;
5411 tmpl.nr = num_bytes;
5412 tmpl.extent_item_refs = refs;
5413 tmpl.metadata = metadata;
5415 tmpl.max_size = num_bytes;
5416 add_extent_rec(extent_cache, &tmpl);
5418 ptr = (unsigned long)(ei + 1);
5419 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5420 key.type == BTRFS_EXTENT_ITEM_KEY)
5421 ptr += sizeof(struct btrfs_tree_block_info);
5423 end = (unsigned long)ei + item_size;
5425 iref = (struct btrfs_extent_inline_ref *)ptr;
5426 type = btrfs_extent_inline_ref_type(eb, iref);
5427 offset = btrfs_extent_inline_ref_offset(eb, iref);
5429 case BTRFS_TREE_BLOCK_REF_KEY:
5430 add_tree_backref(extent_cache, key.objectid,
5433 case BTRFS_SHARED_BLOCK_REF_KEY:
5434 add_tree_backref(extent_cache, key.objectid,
5437 case BTRFS_EXTENT_DATA_REF_KEY:
5438 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5439 add_data_backref(extent_cache, key.objectid, 0,
5440 btrfs_extent_data_ref_root(eb, dref),
5441 btrfs_extent_data_ref_objectid(eb,
5443 btrfs_extent_data_ref_offset(eb, dref),
5444 btrfs_extent_data_ref_count(eb, dref),
5447 case BTRFS_SHARED_DATA_REF_KEY:
5448 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5449 add_data_backref(extent_cache, key.objectid, offset,
5451 btrfs_shared_data_ref_count(eb, sref),
5455 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5456 key.objectid, key.type, num_bytes);
5459 ptr += btrfs_extent_inline_ref_size(type);
5466 static int check_cache_range(struct btrfs_root *root,
5467 struct btrfs_block_group_cache *cache,
5468 u64 offset, u64 bytes)
5470 struct btrfs_free_space *entry;
5476 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5477 bytenr = btrfs_sb_offset(i);
5478 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5479 cache->key.objectid, bytenr, 0,
5480 &logical, &nr, &stripe_len);
5485 if (logical[nr] + stripe_len <= offset)
5487 if (offset + bytes <= logical[nr])
5489 if (logical[nr] == offset) {
5490 if (stripe_len >= bytes) {
5494 bytes -= stripe_len;
5495 offset += stripe_len;
5496 } else if (logical[nr] < offset) {
5497 if (logical[nr] + stripe_len >=
5502 bytes = (offset + bytes) -
5503 (logical[nr] + stripe_len);
5504 offset = logical[nr] + stripe_len;
5507 * Could be tricky, the super may land in the
5508 * middle of the area we're checking. First
5509 * check the easiest case, it's at the end.
5511 if (logical[nr] + stripe_len >=
5513 bytes = logical[nr] - offset;
5517 /* Check the left side */
5518 ret = check_cache_range(root, cache,
5520 logical[nr] - offset);
5526 /* Now we continue with the right side */
5527 bytes = (offset + bytes) -
5528 (logical[nr] + stripe_len);
5529 offset = logical[nr] + stripe_len;
5536 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5538 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5539 offset, offset+bytes);
5543 if (entry->offset != offset) {
5544 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5549 if (entry->bytes != bytes) {
5550 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5551 bytes, entry->bytes, offset);
5555 unlink_free_space(cache->free_space_ctl, entry);
5560 static int verify_space_cache(struct btrfs_root *root,
5561 struct btrfs_block_group_cache *cache)
5563 struct btrfs_path *path;
5564 struct extent_buffer *leaf;
5565 struct btrfs_key key;
5569 path = btrfs_alloc_path();
5573 root = root->fs_info->extent_root;
5575 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5577 key.objectid = last;
5579 key.type = BTRFS_EXTENT_ITEM_KEY;
5581 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5586 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5587 ret = btrfs_next_leaf(root, path);
5595 leaf = path->nodes[0];
5596 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5597 if (key.objectid >= cache->key.offset + cache->key.objectid)
5599 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5600 key.type != BTRFS_METADATA_ITEM_KEY) {
5605 if (last == key.objectid) {
5606 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5607 last = key.objectid + key.offset;
5609 last = key.objectid + root->nodesize;
5614 ret = check_cache_range(root, cache, last,
5615 key.objectid - last);
5618 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5619 last = key.objectid + key.offset;
5621 last = key.objectid + root->nodesize;
5625 if (last < cache->key.objectid + cache->key.offset)
5626 ret = check_cache_range(root, cache, last,
5627 cache->key.objectid +
5628 cache->key.offset - last);
5631 btrfs_free_path(path);
5634 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5635 fprintf(stderr, "There are still entries left in the space "
5643 static int check_space_cache(struct btrfs_root *root)
5645 struct btrfs_block_group_cache *cache;
5646 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5650 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5651 btrfs_super_generation(root->fs_info->super_copy) !=
5652 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5653 printf("cache and super generation don't match, space cache "
5654 "will be invalidated\n");
5658 if (ctx.progress_enabled) {
5659 ctx.tp = TASK_FREE_SPACE;
5660 task_start(ctx.info);
5664 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5668 start = cache->key.objectid + cache->key.offset;
5669 if (!cache->free_space_ctl) {
5670 if (btrfs_init_free_space_ctl(cache,
5671 root->sectorsize)) {
5676 btrfs_remove_free_space_cache(cache);
5679 if (btrfs_fs_compat_ro(root->fs_info,
5680 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5681 ret = exclude_super_stripes(root, cache);
5683 fprintf(stderr, "could not exclude super stripes: %s\n",
5688 ret = load_free_space_tree(root->fs_info, cache);
5689 free_excluded_extents(root, cache);
5691 fprintf(stderr, "could not load free space tree: %s\n",
5698 ret = load_free_space_cache(root->fs_info, cache);
5703 ret = verify_space_cache(root, cache);
5705 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5706 cache->key.objectid);
5711 task_stop(ctx.info);
5713 return error ? -EINVAL : 0;
5716 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5717 u64 num_bytes, unsigned long leaf_offset,
5718 struct extent_buffer *eb) {
5721 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5723 unsigned long csum_offset;
5727 u64 data_checked = 0;
5733 if (num_bytes % root->sectorsize)
5736 data = malloc(num_bytes);
5740 while (offset < num_bytes) {
5743 read_len = num_bytes - offset;
5744 /* read as much space once a time */
5745 ret = read_extent_data(root, data + offset,
5746 bytenr + offset, &read_len, mirror);
5750 /* verify every 4k data's checksum */
5751 while (data_checked < read_len) {
5753 tmp = offset + data_checked;
5755 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5756 csum, root->sectorsize);
5757 btrfs_csum_final(csum, (char *)&csum);
5759 csum_offset = leaf_offset +
5760 tmp / root->sectorsize * csum_size;
5761 read_extent_buffer(eb, (char *)&csum_expected,
5762 csum_offset, csum_size);
5763 /* try another mirror */
5764 if (csum != csum_expected) {
5765 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5766 mirror, bytenr + tmp,
5767 csum, csum_expected);
5768 num_copies = btrfs_num_copies(
5769 &root->fs_info->mapping_tree,
5771 if (mirror < num_copies - 1) {
5776 data_checked += root->sectorsize;
5785 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5788 struct btrfs_path *path;
5789 struct extent_buffer *leaf;
5790 struct btrfs_key key;
5793 path = btrfs_alloc_path();
5795 fprintf(stderr, "Error allocating path\n");
5799 key.objectid = bytenr;
5800 key.type = BTRFS_EXTENT_ITEM_KEY;
5801 key.offset = (u64)-1;
5804 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5807 fprintf(stderr, "Error looking up extent record %d\n", ret);
5808 btrfs_free_path(path);
5811 if (path->slots[0] > 0) {
5814 ret = btrfs_prev_leaf(root, path);
5817 } else if (ret > 0) {
5824 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5827 * Block group items come before extent items if they have the same
5828 * bytenr, so walk back one more just in case. Dear future traveller,
5829 * first congrats on mastering time travel. Now if it's not too much
5830 * trouble could you go back to 2006 and tell Chris to make the
5831 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5832 * EXTENT_ITEM_KEY please?
5834 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5835 if (path->slots[0] > 0) {
5838 ret = btrfs_prev_leaf(root, path);
5841 } else if (ret > 0) {
5846 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5850 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5851 ret = btrfs_next_leaf(root, path);
5853 fprintf(stderr, "Error going to next leaf "
5855 btrfs_free_path(path);
5861 leaf = path->nodes[0];
5862 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5863 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5867 if (key.objectid + key.offset < bytenr) {
5871 if (key.objectid > bytenr + num_bytes)
5874 if (key.objectid == bytenr) {
5875 if (key.offset >= num_bytes) {
5879 num_bytes -= key.offset;
5880 bytenr += key.offset;
5881 } else if (key.objectid < bytenr) {
5882 if (key.objectid + key.offset >= bytenr + num_bytes) {
5886 num_bytes = (bytenr + num_bytes) -
5887 (key.objectid + key.offset);
5888 bytenr = key.objectid + key.offset;
5890 if (key.objectid + key.offset < bytenr + num_bytes) {
5891 u64 new_start = key.objectid + key.offset;
5892 u64 new_bytes = bytenr + num_bytes - new_start;
5895 * Weird case, the extent is in the middle of
5896 * our range, we'll have to search one side
5897 * and then the other. Not sure if this happens
5898 * in real life, but no harm in coding it up
5899 * anyway just in case.
5901 btrfs_release_path(path);
5902 ret = check_extent_exists(root, new_start,
5905 fprintf(stderr, "Right section didn't "
5909 num_bytes = key.objectid - bytenr;
5912 num_bytes = key.objectid - bytenr;
5919 if (num_bytes && !ret) {
5920 fprintf(stderr, "There are no extents for csum range "
5921 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5925 btrfs_free_path(path);
5929 static int check_csums(struct btrfs_root *root)
5931 struct btrfs_path *path;
5932 struct extent_buffer *leaf;
5933 struct btrfs_key key;
5934 u64 offset = 0, num_bytes = 0;
5935 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5939 unsigned long leaf_offset;
5941 root = root->fs_info->csum_root;
5942 if (!extent_buffer_uptodate(root->node)) {
5943 fprintf(stderr, "No valid csum tree found\n");
5947 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5948 key.type = BTRFS_EXTENT_CSUM_KEY;
5951 path = btrfs_alloc_path();
5955 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5957 fprintf(stderr, "Error searching csum tree %d\n", ret);
5958 btrfs_free_path(path);
5962 if (ret > 0 && path->slots[0])
5967 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5968 ret = btrfs_next_leaf(root, path);
5970 fprintf(stderr, "Error going to next leaf "
5977 leaf = path->nodes[0];
5979 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5980 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5985 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5986 csum_size) * root->sectorsize;
5987 if (!check_data_csum)
5988 goto skip_csum_check;
5989 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5990 ret = check_extent_csums(root, key.offset, data_len,
5996 offset = key.offset;
5997 } else if (key.offset != offset + num_bytes) {
5998 ret = check_extent_exists(root, offset, num_bytes);
6000 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6001 "there is no extent record\n",
6002 offset, offset+num_bytes);
6005 offset = key.offset;
6008 num_bytes += data_len;
6012 btrfs_free_path(path);
6016 static int is_dropped_key(struct btrfs_key *key,
6017 struct btrfs_key *drop_key) {
6018 if (key->objectid < drop_key->objectid)
6020 else if (key->objectid == drop_key->objectid) {
6021 if (key->type < drop_key->type)
6023 else if (key->type == drop_key->type) {
6024 if (key->offset < drop_key->offset)
6032 * Here are the rules for FULL_BACKREF.
6034 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6035 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6037 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6038 * if it happened after the relocation occurred since we'll have dropped the
6039 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6040 * have no real way to know for sure.
6042 * We process the blocks one root at a time, and we start from the lowest root
6043 * objectid and go to the highest. So we can just lookup the owner backref for
6044 * the record and if we don't find it then we know it doesn't exist and we have
6047 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6048 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6049 * be set or not and then we can check later once we've gathered all the refs.
6051 static int calc_extent_flag(struct btrfs_root *root,
6052 struct cache_tree *extent_cache,
6053 struct extent_buffer *buf,
6054 struct root_item_record *ri,
6057 struct extent_record *rec;
6058 struct cache_extent *cache;
6059 struct tree_backref *tback;
6062 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6063 /* we have added this extent before */
6065 rec = container_of(cache, struct extent_record, cache);
6068 * Except file/reloc tree, we can not have
6071 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6076 if (buf->start == ri->bytenr)
6079 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6082 owner = btrfs_header_owner(buf);
6083 if (owner == ri->objectid)
6086 tback = find_tree_backref(rec, 0, owner);
6091 if (rec->flag_block_full_backref != FLAG_UNSET &&
6092 rec->flag_block_full_backref != 0)
6093 rec->bad_full_backref = 1;
6096 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6097 if (rec->flag_block_full_backref != FLAG_UNSET &&
6098 rec->flag_block_full_backref != 1)
6099 rec->bad_full_backref = 1;
6103 static int run_next_block(struct btrfs_root *root,
6104 struct block_info *bits,
6107 struct cache_tree *pending,
6108 struct cache_tree *seen,
6109 struct cache_tree *reada,
6110 struct cache_tree *nodes,
6111 struct cache_tree *extent_cache,
6112 struct cache_tree *chunk_cache,
6113 struct rb_root *dev_cache,
6114 struct block_group_tree *block_group_cache,
6115 struct device_extent_tree *dev_extent_cache,
6116 struct root_item_record *ri)
6118 struct extent_buffer *buf;
6119 struct extent_record *rec = NULL;
6130 struct btrfs_key key;
6131 struct cache_extent *cache;
6134 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6135 bits_nr, &reada_bits);
6140 for(i = 0; i < nritems; i++) {
6141 ret = add_cache_extent(reada, bits[i].start,
6146 /* fixme, get the parent transid */
6147 readahead_tree_block(root, bits[i].start,
6151 *last = bits[0].start;
6152 bytenr = bits[0].start;
6153 size = bits[0].size;
6155 cache = lookup_cache_extent(pending, bytenr, size);
6157 remove_cache_extent(pending, cache);
6160 cache = lookup_cache_extent(reada, bytenr, size);
6162 remove_cache_extent(reada, cache);
6165 cache = lookup_cache_extent(nodes, bytenr, size);
6167 remove_cache_extent(nodes, cache);
6170 cache = lookup_cache_extent(extent_cache, bytenr, size);
6172 rec = container_of(cache, struct extent_record, cache);
6173 gen = rec->parent_generation;
6176 /* fixme, get the real parent transid */
6177 buf = read_tree_block(root, bytenr, size, gen);
6178 if (!extent_buffer_uptodate(buf)) {
6179 record_bad_block_io(root->fs_info,
6180 extent_cache, bytenr, size);
6184 nritems = btrfs_header_nritems(buf);
6187 if (!init_extent_tree) {
6188 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6189 btrfs_header_level(buf), 1, NULL,
6192 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6194 fprintf(stderr, "Couldn't calc extent flags\n");
6195 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6200 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6202 fprintf(stderr, "Couldn't calc extent flags\n");
6203 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6207 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6209 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6210 ri->objectid == btrfs_header_owner(buf)) {
6212 * Ok we got to this block from it's original owner and
6213 * we have FULL_BACKREF set. Relocation can leave
6214 * converted blocks over so this is altogether possible,
6215 * however it's not possible if the generation > the
6216 * last snapshot, so check for this case.
6218 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6219 btrfs_header_generation(buf) > ri->last_snapshot) {
6220 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6221 rec->bad_full_backref = 1;
6226 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6227 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6228 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6229 rec->bad_full_backref = 1;
6233 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6234 rec->flag_block_full_backref = 1;
6238 rec->flag_block_full_backref = 0;
6240 owner = btrfs_header_owner(buf);
6243 ret = check_block(root, extent_cache, buf, flags);
6247 if (btrfs_is_leaf(buf)) {
6248 btree_space_waste += btrfs_leaf_free_space(root, buf);
6249 for (i = 0; i < nritems; i++) {
6250 struct btrfs_file_extent_item *fi;
6251 btrfs_item_key_to_cpu(buf, &key, i);
6252 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6253 process_extent_item(root, extent_cache, buf,
6257 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6258 process_extent_item(root, extent_cache, buf,
6262 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6264 btrfs_item_size_nr(buf, i);
6267 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6268 process_chunk_item(chunk_cache, &key, buf, i);
6271 if (key.type == BTRFS_DEV_ITEM_KEY) {
6272 process_device_item(dev_cache, &key, buf, i);
6275 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6276 process_block_group_item(block_group_cache,
6280 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6281 process_device_extent_item(dev_extent_cache,
6286 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6287 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6288 process_extent_ref_v0(extent_cache, buf, i);
6295 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6296 add_tree_backref(extent_cache, key.objectid, 0,
6300 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6301 add_tree_backref(extent_cache, key.objectid,
6305 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6306 struct btrfs_extent_data_ref *ref;
6307 ref = btrfs_item_ptr(buf, i,
6308 struct btrfs_extent_data_ref);
6309 add_data_backref(extent_cache,
6311 btrfs_extent_data_ref_root(buf, ref),
6312 btrfs_extent_data_ref_objectid(buf,
6314 btrfs_extent_data_ref_offset(buf, ref),
6315 btrfs_extent_data_ref_count(buf, ref),
6316 0, root->sectorsize);
6319 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6320 struct btrfs_shared_data_ref *ref;
6321 ref = btrfs_item_ptr(buf, i,
6322 struct btrfs_shared_data_ref);
6323 add_data_backref(extent_cache,
6324 key.objectid, key.offset, 0, 0, 0,
6325 btrfs_shared_data_ref_count(buf, ref),
6326 0, root->sectorsize);
6329 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6330 struct bad_item *bad;
6332 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6336 bad = malloc(sizeof(struct bad_item));
6339 INIT_LIST_HEAD(&bad->list);
6340 memcpy(&bad->key, &key,
6341 sizeof(struct btrfs_key));
6342 bad->root_id = owner;
6343 list_add_tail(&bad->list, &delete_items);
6346 if (key.type != BTRFS_EXTENT_DATA_KEY)
6348 fi = btrfs_item_ptr(buf, i,
6349 struct btrfs_file_extent_item);
6350 if (btrfs_file_extent_type(buf, fi) ==
6351 BTRFS_FILE_EXTENT_INLINE)
6353 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6356 data_bytes_allocated +=
6357 btrfs_file_extent_disk_num_bytes(buf, fi);
6358 if (data_bytes_allocated < root->sectorsize) {
6361 data_bytes_referenced +=
6362 btrfs_file_extent_num_bytes(buf, fi);
6363 add_data_backref(extent_cache,
6364 btrfs_file_extent_disk_bytenr(buf, fi),
6365 parent, owner, key.objectid, key.offset -
6366 btrfs_file_extent_offset(buf, fi), 1, 1,
6367 btrfs_file_extent_disk_num_bytes(buf, fi));
6371 struct btrfs_key first_key;
6373 first_key.objectid = 0;
6376 btrfs_item_key_to_cpu(buf, &first_key, 0);
6377 level = btrfs_header_level(buf);
6378 for (i = 0; i < nritems; i++) {
6379 struct extent_record tmpl;
6381 ptr = btrfs_node_blockptr(buf, i);
6382 size = root->nodesize;
6383 btrfs_node_key_to_cpu(buf, &key, i);
6385 if ((level == ri->drop_level)
6386 && is_dropped_key(&key, &ri->drop_key)) {
6391 memset(&tmpl, 0, sizeof(tmpl));
6392 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6393 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6398 tmpl.max_size = size;
6399 ret = add_extent_rec(extent_cache, &tmpl);
6402 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6405 add_pending(nodes, seen, ptr, size);
6407 add_pending(pending, seen, ptr, size);
6410 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6411 nritems) * sizeof(struct btrfs_key_ptr);
6413 total_btree_bytes += buf->len;
6414 if (fs_root_objectid(btrfs_header_owner(buf)))
6415 total_fs_tree_bytes += buf->len;
6416 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6417 total_extent_tree_bytes += buf->len;
6418 if (!found_old_backref &&
6419 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6420 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6421 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6422 found_old_backref = 1;
6424 free_extent_buffer(buf);
6428 static int add_root_to_pending(struct extent_buffer *buf,
6429 struct cache_tree *extent_cache,
6430 struct cache_tree *pending,
6431 struct cache_tree *seen,
6432 struct cache_tree *nodes,
6435 struct extent_record tmpl;
6437 if (btrfs_header_level(buf) > 0)
6438 add_pending(nodes, seen, buf->start, buf->len);
6440 add_pending(pending, seen, buf->start, buf->len);
6442 memset(&tmpl, 0, sizeof(tmpl));
6443 tmpl.start = buf->start;
6448 tmpl.max_size = buf->len;
6449 add_extent_rec(extent_cache, &tmpl);
6451 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6452 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6453 add_tree_backref(extent_cache, buf->start, buf->start,
6456 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6460 /* as we fix the tree, we might be deleting blocks that
6461 * we're tracking for repair. This hook makes sure we
6462 * remove any backrefs for blocks as we are fixing them.
6464 static int free_extent_hook(struct btrfs_trans_handle *trans,
6465 struct btrfs_root *root,
6466 u64 bytenr, u64 num_bytes, u64 parent,
6467 u64 root_objectid, u64 owner, u64 offset,
6470 struct extent_record *rec;
6471 struct cache_extent *cache;
6473 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6475 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6476 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6480 rec = container_of(cache, struct extent_record, cache);
6482 struct data_backref *back;
6483 back = find_data_backref(rec, parent, root_objectid, owner,
6484 offset, 1, bytenr, num_bytes);
6487 if (back->node.found_ref) {
6488 back->found_ref -= refs_to_drop;
6490 rec->refs -= refs_to_drop;
6492 if (back->node.found_extent_tree) {
6493 back->num_refs -= refs_to_drop;
6494 if (rec->extent_item_refs)
6495 rec->extent_item_refs -= refs_to_drop;
6497 if (back->found_ref == 0)
6498 back->node.found_ref = 0;
6499 if (back->num_refs == 0)
6500 back->node.found_extent_tree = 0;
6502 if (!back->node.found_extent_tree && back->node.found_ref) {
6503 rb_erase(&back->node.node, &rec->backref_tree);
6507 struct tree_backref *back;
6508 back = find_tree_backref(rec, parent, root_objectid);
6511 if (back->node.found_ref) {
6514 back->node.found_ref = 0;
6516 if (back->node.found_extent_tree) {
6517 if (rec->extent_item_refs)
6518 rec->extent_item_refs--;
6519 back->node.found_extent_tree = 0;
6521 if (!back->node.found_extent_tree && back->node.found_ref) {
6522 rb_erase(&back->node.node, &rec->backref_tree);
6526 maybe_free_extent_rec(extent_cache, rec);
6531 static int delete_extent_records(struct btrfs_trans_handle *trans,
6532 struct btrfs_root *root,
6533 struct btrfs_path *path,
6534 u64 bytenr, u64 new_len)
6536 struct btrfs_key key;
6537 struct btrfs_key found_key;
6538 struct extent_buffer *leaf;
6543 key.objectid = bytenr;
6545 key.offset = (u64)-1;
6548 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6555 if (path->slots[0] == 0)
6561 leaf = path->nodes[0];
6562 slot = path->slots[0];
6564 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6565 if (found_key.objectid != bytenr)
6568 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6569 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6570 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6571 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6572 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6573 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6574 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6575 btrfs_release_path(path);
6576 if (found_key.type == 0) {
6577 if (found_key.offset == 0)
6579 key.offset = found_key.offset - 1;
6580 key.type = found_key.type;
6582 key.type = found_key.type - 1;
6583 key.offset = (u64)-1;
6587 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6588 found_key.objectid, found_key.type, found_key.offset);
6590 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6593 btrfs_release_path(path);
6595 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6596 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6597 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6598 found_key.offset : root->nodesize;
6600 ret = btrfs_update_block_group(trans, root, bytenr,
6607 btrfs_release_path(path);
6612 * for a single backref, this will allocate a new extent
6613 * and add the backref to it.
6615 static int record_extent(struct btrfs_trans_handle *trans,
6616 struct btrfs_fs_info *info,
6617 struct btrfs_path *path,
6618 struct extent_record *rec,
6619 struct extent_backref *back,
6620 int allocated, u64 flags)
6623 struct btrfs_root *extent_root = info->extent_root;
6624 struct extent_buffer *leaf;
6625 struct btrfs_key ins_key;
6626 struct btrfs_extent_item *ei;
6627 struct tree_backref *tback;
6628 struct data_backref *dback;
6629 struct btrfs_tree_block_info *bi;
6632 rec->max_size = max_t(u64, rec->max_size,
6633 info->extent_root->nodesize);
6636 u32 item_size = sizeof(*ei);
6639 item_size += sizeof(*bi);
6641 ins_key.objectid = rec->start;
6642 ins_key.offset = rec->max_size;
6643 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6645 ret = btrfs_insert_empty_item(trans, extent_root, path,
6646 &ins_key, item_size);
6650 leaf = path->nodes[0];
6651 ei = btrfs_item_ptr(leaf, path->slots[0],
6652 struct btrfs_extent_item);
6654 btrfs_set_extent_refs(leaf, ei, 0);
6655 btrfs_set_extent_generation(leaf, ei, rec->generation);
6657 if (back->is_data) {
6658 btrfs_set_extent_flags(leaf, ei,
6659 BTRFS_EXTENT_FLAG_DATA);
6661 struct btrfs_disk_key copy_key;;
6663 tback = to_tree_backref(back);
6664 bi = (struct btrfs_tree_block_info *)(ei + 1);
6665 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6668 btrfs_set_disk_key_objectid(©_key,
6669 rec->info_objectid);
6670 btrfs_set_disk_key_type(©_key, 0);
6671 btrfs_set_disk_key_offset(©_key, 0);
6673 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6674 btrfs_set_tree_block_key(leaf, bi, ©_key);
6676 btrfs_set_extent_flags(leaf, ei,
6677 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6680 btrfs_mark_buffer_dirty(leaf);
6681 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6682 rec->max_size, 1, 0);
6685 btrfs_release_path(path);
6688 if (back->is_data) {
6692 dback = to_data_backref(back);
6693 if (back->full_backref)
6694 parent = dback->parent;
6698 for (i = 0; i < dback->found_ref; i++) {
6699 /* if parent != 0, we're doing a full backref
6700 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6701 * just makes the backref allocator create a data
6704 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6705 rec->start, rec->max_size,
6709 BTRFS_FIRST_FREE_OBJECTID :
6715 fprintf(stderr, "adding new data backref"
6716 " on %llu %s %llu owner %llu"
6717 " offset %llu found %d\n",
6718 (unsigned long long)rec->start,
6719 back->full_backref ?
6721 back->full_backref ?
6722 (unsigned long long)parent :
6723 (unsigned long long)dback->root,
6724 (unsigned long long)dback->owner,
6725 (unsigned long long)dback->offset,
6730 tback = to_tree_backref(back);
6731 if (back->full_backref)
6732 parent = tback->parent;
6736 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6737 rec->start, rec->max_size,
6738 parent, tback->root, 0, 0);
6739 fprintf(stderr, "adding new tree backref on "
6740 "start %llu len %llu parent %llu root %llu\n",
6741 rec->start, rec->max_size, parent, tback->root);
6744 btrfs_release_path(path);
6748 static struct extent_entry *find_entry(struct list_head *entries,
6749 u64 bytenr, u64 bytes)
6751 struct extent_entry *entry = NULL;
6753 list_for_each_entry(entry, entries, list) {
6754 if (entry->bytenr == bytenr && entry->bytes == bytes)
6761 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6763 struct extent_entry *entry, *best = NULL, *prev = NULL;
6765 list_for_each_entry(entry, entries, list) {
6772 * If there are as many broken entries as entries then we know
6773 * not to trust this particular entry.
6775 if (entry->broken == entry->count)
6779 * If our current entry == best then we can't be sure our best
6780 * is really the best, so we need to keep searching.
6782 if (best && best->count == entry->count) {
6788 /* Prev == entry, not good enough, have to keep searching */
6789 if (!prev->broken && prev->count == entry->count)
6793 best = (prev->count > entry->count) ? prev : entry;
6794 else if (best->count < entry->count)
6802 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6803 struct data_backref *dback, struct extent_entry *entry)
6805 struct btrfs_trans_handle *trans;
6806 struct btrfs_root *root;
6807 struct btrfs_file_extent_item *fi;
6808 struct extent_buffer *leaf;
6809 struct btrfs_key key;
6813 key.objectid = dback->root;
6814 key.type = BTRFS_ROOT_ITEM_KEY;
6815 key.offset = (u64)-1;
6816 root = btrfs_read_fs_root(info, &key);
6818 fprintf(stderr, "Couldn't find root for our ref\n");
6823 * The backref points to the original offset of the extent if it was
6824 * split, so we need to search down to the offset we have and then walk
6825 * forward until we find the backref we're looking for.
6827 key.objectid = dback->owner;
6828 key.type = BTRFS_EXTENT_DATA_KEY;
6829 key.offset = dback->offset;
6830 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6832 fprintf(stderr, "Error looking up ref %d\n", ret);
6837 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6838 ret = btrfs_next_leaf(root, path);
6840 fprintf(stderr, "Couldn't find our ref, next\n");
6844 leaf = path->nodes[0];
6845 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6846 if (key.objectid != dback->owner ||
6847 key.type != BTRFS_EXTENT_DATA_KEY) {
6848 fprintf(stderr, "Couldn't find our ref, search\n");
6851 fi = btrfs_item_ptr(leaf, path->slots[0],
6852 struct btrfs_file_extent_item);
6853 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6854 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6856 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6861 btrfs_release_path(path);
6863 trans = btrfs_start_transaction(root, 1);
6865 return PTR_ERR(trans);
6868 * Ok we have the key of the file extent we want to fix, now we can cow
6869 * down to the thing and fix it.
6871 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6873 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6874 key.objectid, key.type, key.offset, ret);
6878 fprintf(stderr, "Well that's odd, we just found this key "
6879 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6884 leaf = path->nodes[0];
6885 fi = btrfs_item_ptr(leaf, path->slots[0],
6886 struct btrfs_file_extent_item);
6888 if (btrfs_file_extent_compression(leaf, fi) &&
6889 dback->disk_bytenr != entry->bytenr) {
6890 fprintf(stderr, "Ref doesn't match the record start and is "
6891 "compressed, please take a btrfs-image of this file "
6892 "system and send it to a btrfs developer so they can "
6893 "complete this functionality for bytenr %Lu\n",
6894 dback->disk_bytenr);
6899 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6900 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6901 } else if (dback->disk_bytenr > entry->bytenr) {
6902 u64 off_diff, offset;
6904 off_diff = dback->disk_bytenr - entry->bytenr;
6905 offset = btrfs_file_extent_offset(leaf, fi);
6906 if (dback->disk_bytenr + offset +
6907 btrfs_file_extent_num_bytes(leaf, fi) >
6908 entry->bytenr + entry->bytes) {
6909 fprintf(stderr, "Ref is past the entry end, please "
6910 "take a btrfs-image of this file system and "
6911 "send it to a btrfs developer, ref %Lu\n",
6912 dback->disk_bytenr);
6917 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6918 btrfs_set_file_extent_offset(leaf, fi, offset);
6919 } else if (dback->disk_bytenr < entry->bytenr) {
6922 offset = btrfs_file_extent_offset(leaf, fi);
6923 if (dback->disk_bytenr + offset < entry->bytenr) {
6924 fprintf(stderr, "Ref is before the entry start, please"
6925 " take a btrfs-image of this file system and "
6926 "send it to a btrfs developer, ref %Lu\n",
6927 dback->disk_bytenr);
6932 offset += dback->disk_bytenr;
6933 offset -= entry->bytenr;
6934 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6935 btrfs_set_file_extent_offset(leaf, fi, offset);
6938 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6941 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6942 * only do this if we aren't using compression, otherwise it's a
6945 if (!btrfs_file_extent_compression(leaf, fi))
6946 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6948 printf("ram bytes may be wrong?\n");
6949 btrfs_mark_buffer_dirty(leaf);
6951 err = btrfs_commit_transaction(trans, root);
6952 btrfs_release_path(path);
6953 return ret ? ret : err;
6956 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6957 struct extent_record *rec)
6959 struct extent_backref *back, *tmp;
6960 struct data_backref *dback;
6961 struct extent_entry *entry, *best = NULL;
6964 int broken_entries = 0;
6969 * Metadata is easy and the backrefs should always agree on bytenr and
6970 * size, if not we've got bigger issues.
6975 rbtree_postorder_for_each_entry_safe(back, tmp,
6976 &rec->backref_tree, node) {
6977 if (back->full_backref || !back->is_data)
6980 dback = to_data_backref(back);
6983 * We only pay attention to backrefs that we found a real
6986 if (dback->found_ref == 0)
6990 * For now we only catch when the bytes don't match, not the
6991 * bytenr. We can easily do this at the same time, but I want
6992 * to have a fs image to test on before we just add repair
6993 * functionality willy-nilly so we know we won't screw up the
6997 entry = find_entry(&entries, dback->disk_bytenr,
7000 entry = malloc(sizeof(struct extent_entry));
7005 memset(entry, 0, sizeof(*entry));
7006 entry->bytenr = dback->disk_bytenr;
7007 entry->bytes = dback->bytes;
7008 list_add_tail(&entry->list, &entries);
7013 * If we only have on entry we may think the entries agree when
7014 * in reality they don't so we have to do some extra checking.
7016 if (dback->disk_bytenr != rec->start ||
7017 dback->bytes != rec->nr || back->broken)
7028 /* Yay all the backrefs agree, carry on good sir */
7029 if (nr_entries <= 1 && !mismatch)
7032 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7033 "%Lu\n", rec->start);
7036 * First we want to see if the backrefs can agree amongst themselves who
7037 * is right, so figure out which one of the entries has the highest
7040 best = find_most_right_entry(&entries);
7043 * Ok so we may have an even split between what the backrefs think, so
7044 * this is where we use the extent ref to see what it thinks.
7047 entry = find_entry(&entries, rec->start, rec->nr);
7048 if (!entry && (!broken_entries || !rec->found_rec)) {
7049 fprintf(stderr, "Backrefs don't agree with each other "
7050 "and extent record doesn't agree with anybody,"
7051 " so we can't fix bytenr %Lu bytes %Lu\n",
7052 rec->start, rec->nr);
7055 } else if (!entry) {
7057 * Ok our backrefs were broken, we'll assume this is the
7058 * correct value and add an entry for this range.
7060 entry = malloc(sizeof(struct extent_entry));
7065 memset(entry, 0, sizeof(*entry));
7066 entry->bytenr = rec->start;
7067 entry->bytes = rec->nr;
7068 list_add_tail(&entry->list, &entries);
7072 best = find_most_right_entry(&entries);
7074 fprintf(stderr, "Backrefs and extent record evenly "
7075 "split on who is right, this is going to "
7076 "require user input to fix bytenr %Lu bytes "
7077 "%Lu\n", rec->start, rec->nr);
7084 * I don't think this can happen currently as we'll abort() if we catch
7085 * this case higher up, but in case somebody removes that we still can't
7086 * deal with it properly here yet, so just bail out of that's the case.
7088 if (best->bytenr != rec->start) {
7089 fprintf(stderr, "Extent start and backref starts don't match, "
7090 "please use btrfs-image on this file system and send "
7091 "it to a btrfs developer so they can make fsck fix "
7092 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7093 rec->start, rec->nr);
7099 * Ok great we all agreed on an extent record, let's go find the real
7100 * references and fix up the ones that don't match.
7102 rbtree_postorder_for_each_entry_safe(back, tmp,
7103 &rec->backref_tree, node) {
7104 if (back->full_backref || !back->is_data)
7107 dback = to_data_backref(back);
7110 * Still ignoring backrefs that don't have a real ref attached
7113 if (dback->found_ref == 0)
7116 if (dback->bytes == best->bytes &&
7117 dback->disk_bytenr == best->bytenr)
7120 ret = repair_ref(info, path, dback, best);
7126 * Ok we messed with the actual refs, which means we need to drop our
7127 * entire cache and go back and rescan. I know this is a huge pain and
7128 * adds a lot of extra work, but it's the only way to be safe. Once all
7129 * the backrefs agree we may not need to do anything to the extent
7134 while (!list_empty(&entries)) {
7135 entry = list_entry(entries.next, struct extent_entry, list);
7136 list_del_init(&entry->list);
7142 static int process_duplicates(struct btrfs_root *root,
7143 struct cache_tree *extent_cache,
7144 struct extent_record *rec)
7146 struct extent_record *good, *tmp;
7147 struct cache_extent *cache;
7151 * If we found a extent record for this extent then return, or if we
7152 * have more than one duplicate we are likely going to need to delete
7155 if (rec->found_rec || rec->num_duplicates > 1)
7158 /* Shouldn't happen but just in case */
7159 BUG_ON(!rec->num_duplicates);
7162 * So this happens if we end up with a backref that doesn't match the
7163 * actual extent entry. So either the backref is bad or the extent
7164 * entry is bad. Either way we want to have the extent_record actually
7165 * reflect what we found in the extent_tree, so we need to take the
7166 * duplicate out and use that as the extent_record since the only way we
7167 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7169 remove_cache_extent(extent_cache, &rec->cache);
7171 good = to_extent_record(rec->dups.next);
7172 list_del_init(&good->list);
7173 INIT_LIST_HEAD(&good->backrefs);
7174 INIT_LIST_HEAD(&good->dups);
7175 good->cache.start = good->start;
7176 good->cache.size = good->nr;
7177 good->content_checked = 0;
7178 good->owner_ref_checked = 0;
7179 good->num_duplicates = 0;
7180 good->refs = rec->refs;
7181 list_splice_init(&rec->backrefs, &good->backrefs);
7183 cache = lookup_cache_extent(extent_cache, good->start,
7187 tmp = container_of(cache, struct extent_record, cache);
7190 * If we find another overlapping extent and it's found_rec is
7191 * set then it's a duplicate and we need to try and delete
7194 if (tmp->found_rec || tmp->num_duplicates > 0) {
7195 if (list_empty(&good->list))
7196 list_add_tail(&good->list,
7197 &duplicate_extents);
7198 good->num_duplicates += tmp->num_duplicates + 1;
7199 list_splice_init(&tmp->dups, &good->dups);
7200 list_del_init(&tmp->list);
7201 list_add_tail(&tmp->list, &good->dups);
7202 remove_cache_extent(extent_cache, &tmp->cache);
7207 * Ok we have another non extent item backed extent rec, so lets
7208 * just add it to this extent and carry on like we did above.
7210 good->refs += tmp->refs;
7211 list_splice_init(&tmp->backrefs, &good->backrefs);
7212 remove_cache_extent(extent_cache, &tmp->cache);
7215 ret = insert_cache_extent(extent_cache, &good->cache);
7218 return good->num_duplicates ? 0 : 1;
7221 static int delete_duplicate_records(struct btrfs_root *root,
7222 struct extent_record *rec)
7224 struct btrfs_trans_handle *trans;
7225 LIST_HEAD(delete_list);
7226 struct btrfs_path *path;
7227 struct extent_record *tmp, *good, *n;
7230 struct btrfs_key key;
7232 path = btrfs_alloc_path();
7239 /* Find the record that covers all of the duplicates. */
7240 list_for_each_entry(tmp, &rec->dups, list) {
7241 if (good->start < tmp->start)
7243 if (good->nr > tmp->nr)
7246 if (tmp->start + tmp->nr < good->start + good->nr) {
7247 fprintf(stderr, "Ok we have overlapping extents that "
7248 "aren't completely covered by each other, this "
7249 "is going to require more careful thought. "
7250 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7251 tmp->start, tmp->nr, good->start, good->nr);
7258 list_add_tail(&rec->list, &delete_list);
7260 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7263 list_move_tail(&tmp->list, &delete_list);
7266 root = root->fs_info->extent_root;
7267 trans = btrfs_start_transaction(root, 1);
7268 if (IS_ERR(trans)) {
7269 ret = PTR_ERR(trans);
7273 list_for_each_entry(tmp, &delete_list, list) {
7274 if (tmp->found_rec == 0)
7276 key.objectid = tmp->start;
7277 key.type = BTRFS_EXTENT_ITEM_KEY;
7278 key.offset = tmp->nr;
7280 /* Shouldn't happen but just in case */
7281 if (tmp->metadata) {
7282 fprintf(stderr, "Well this shouldn't happen, extent "
7283 "record overlaps but is metadata? "
7284 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7288 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7294 ret = btrfs_del_item(trans, root, path);
7297 btrfs_release_path(path);
7300 err = btrfs_commit_transaction(trans, root);
7304 while (!list_empty(&delete_list)) {
7305 tmp = to_extent_record(delete_list.next);
7306 list_del_init(&tmp->list);
7312 while (!list_empty(&rec->dups)) {
7313 tmp = to_extent_record(rec->dups.next);
7314 list_del_init(&tmp->list);
7318 btrfs_free_path(path);
7320 if (!ret && !nr_del)
7321 rec->num_duplicates = 0;
7323 return ret ? ret : nr_del;
7326 static int find_possible_backrefs(struct btrfs_fs_info *info,
7327 struct btrfs_path *path,
7328 struct cache_tree *extent_cache,
7329 struct extent_record *rec)
7331 struct btrfs_root *root;
7332 struct extent_backref *back, *tmp;
7333 struct data_backref *dback;
7334 struct cache_extent *cache;
7335 struct btrfs_file_extent_item *fi;
7336 struct btrfs_key key;
7340 rbtree_postorder_for_each_entry_safe(back, tmp,
7341 &rec->backref_tree, node) {
7342 /* Don't care about full backrefs (poor unloved backrefs) */
7343 if (back->full_backref || !back->is_data)
7346 dback = to_data_backref(back);
7348 /* We found this one, we don't need to do a lookup */
7349 if (dback->found_ref)
7352 key.objectid = dback->root;
7353 key.type = BTRFS_ROOT_ITEM_KEY;
7354 key.offset = (u64)-1;
7356 root = btrfs_read_fs_root(info, &key);
7358 /* No root, definitely a bad ref, skip */
7359 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7361 /* Other err, exit */
7363 return PTR_ERR(root);
7365 key.objectid = dback->owner;
7366 key.type = BTRFS_EXTENT_DATA_KEY;
7367 key.offset = dback->offset;
7368 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7370 btrfs_release_path(path);
7373 /* Didn't find it, we can carry on */
7378 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7379 struct btrfs_file_extent_item);
7380 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7381 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7382 btrfs_release_path(path);
7383 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7385 struct extent_record *tmp;
7386 tmp = container_of(cache, struct extent_record, cache);
7389 * If we found an extent record for the bytenr for this
7390 * particular backref then we can't add it to our
7391 * current extent record. We only want to add backrefs
7392 * that don't have a corresponding extent item in the
7393 * extent tree since they likely belong to this record
7394 * and we need to fix it if it doesn't match bytenrs.
7400 dback->found_ref += 1;
7401 dback->disk_bytenr = bytenr;
7402 dback->bytes = bytes;
7405 * Set this so the verify backref code knows not to trust the
7406 * values in this backref.
7415 * Record orphan data ref into corresponding root.
7417 * Return 0 if the extent item contains data ref and recorded.
7418 * Return 1 if the extent item contains no useful data ref
7419 * On that case, it may contains only shared_dataref or metadata backref
7420 * or the file extent exists(this should be handled by the extent bytenr
7422 * Return <0 if something goes wrong.
7424 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7425 struct extent_record *rec)
7427 struct btrfs_key key;
7428 struct btrfs_root *dest_root;
7429 struct extent_backref *back, *tmp;
7430 struct data_backref *dback;
7431 struct orphan_data_extent *orphan;
7432 struct btrfs_path *path;
7433 int recorded_data_ref = 0;
7438 path = btrfs_alloc_path();
7441 rbtree_postorder_for_each_entry_safe(back, tmp,
7442 &rec->backref_tree, node) {
7443 if (back->full_backref || !back->is_data ||
7444 !back->found_extent_tree)
7446 dback = to_data_backref(back);
7447 if (dback->found_ref)
7449 key.objectid = dback->root;
7450 key.type = BTRFS_ROOT_ITEM_KEY;
7451 key.offset = (u64)-1;
7453 dest_root = btrfs_read_fs_root(fs_info, &key);
7455 /* For non-exist root we just skip it */
7456 if (IS_ERR(dest_root) || !dest_root)
7459 key.objectid = dback->owner;
7460 key.type = BTRFS_EXTENT_DATA_KEY;
7461 key.offset = dback->offset;
7463 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7465 * For ret < 0, it's OK since the fs-tree may be corrupted,
7466 * we need to record it for inode/file extent rebuild.
7467 * For ret > 0, we record it only for file extent rebuild.
7468 * For ret == 0, the file extent exists but only bytenr
7469 * mismatch, let the original bytenr fix routine to handle,
7475 orphan = malloc(sizeof(*orphan));
7480 INIT_LIST_HEAD(&orphan->list);
7481 orphan->root = dback->root;
7482 orphan->objectid = dback->owner;
7483 orphan->offset = dback->offset;
7484 orphan->disk_bytenr = rec->cache.start;
7485 orphan->disk_len = rec->cache.size;
7486 list_add(&dest_root->orphan_data_extents, &orphan->list);
7487 recorded_data_ref = 1;
7490 btrfs_free_path(path);
7492 return !recorded_data_ref;
7498 * when an incorrect extent item is found, this will delete
7499 * all of the existing entries for it and recreate them
7500 * based on what the tree scan found.
7502 static int fixup_extent_refs(struct btrfs_fs_info *info,
7503 struct cache_tree *extent_cache,
7504 struct extent_record *rec)
7506 struct btrfs_trans_handle *trans = NULL;
7508 struct btrfs_path *path;
7509 struct cache_extent *cache;
7510 struct extent_backref *back, *tmp;
7514 if (rec->flag_block_full_backref)
7515 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7517 path = btrfs_alloc_path();
7521 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7523 * Sometimes the backrefs themselves are so broken they don't
7524 * get attached to any meaningful rec, so first go back and
7525 * check any of our backrefs that we couldn't find and throw
7526 * them into the list if we find the backref so that
7527 * verify_backrefs can figure out what to do.
7529 ret = find_possible_backrefs(info, path, extent_cache, rec);
7534 /* step one, make sure all of the backrefs agree */
7535 ret = verify_backrefs(info, path, rec);
7539 trans = btrfs_start_transaction(info->extent_root, 1);
7540 if (IS_ERR(trans)) {
7541 ret = PTR_ERR(trans);
7545 /* step two, delete all the existing records */
7546 ret = delete_extent_records(trans, info->extent_root, path,
7547 rec->start, rec->max_size);
7552 /* was this block corrupt? If so, don't add references to it */
7553 cache = lookup_cache_extent(info->corrupt_blocks,
7554 rec->start, rec->max_size);
7560 /* step three, recreate all the refs we did find */
7561 rbtree_postorder_for_each_entry_safe(back, tmp,
7562 &rec->backref_tree, node) {
7564 * if we didn't find any references, don't create a
7567 if (!back->found_ref)
7570 rec->bad_full_backref = 0;
7571 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7579 int err = btrfs_commit_transaction(trans, info->extent_root);
7584 btrfs_free_path(path);
7588 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7589 struct extent_record *rec)
7591 struct btrfs_trans_handle *trans;
7592 struct btrfs_root *root = fs_info->extent_root;
7593 struct btrfs_path *path;
7594 struct btrfs_extent_item *ei;
7595 struct btrfs_key key;
7599 key.objectid = rec->start;
7600 if (rec->metadata) {
7601 key.type = BTRFS_METADATA_ITEM_KEY;
7602 key.offset = rec->info_level;
7604 key.type = BTRFS_EXTENT_ITEM_KEY;
7605 key.offset = rec->max_size;
7608 path = btrfs_alloc_path();
7612 trans = btrfs_start_transaction(root, 0);
7613 if (IS_ERR(trans)) {
7614 btrfs_free_path(path);
7615 return PTR_ERR(trans);
7618 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7620 btrfs_free_path(path);
7621 btrfs_commit_transaction(trans, root);
7624 fprintf(stderr, "Didn't find extent for %llu\n",
7625 (unsigned long long)rec->start);
7626 btrfs_free_path(path);
7627 btrfs_commit_transaction(trans, root);
7631 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7632 struct btrfs_extent_item);
7633 flags = btrfs_extent_flags(path->nodes[0], ei);
7634 if (rec->flag_block_full_backref) {
7635 fprintf(stderr, "setting full backref on %llu\n",
7636 (unsigned long long)key.objectid);
7637 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7639 fprintf(stderr, "clearing full backref on %llu\n",
7640 (unsigned long long)key.objectid);
7641 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7643 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7644 btrfs_mark_buffer_dirty(path->nodes[0]);
7645 btrfs_free_path(path);
7646 return btrfs_commit_transaction(trans, root);
7649 /* right now we only prune from the extent allocation tree */
7650 static int prune_one_block(struct btrfs_trans_handle *trans,
7651 struct btrfs_fs_info *info,
7652 struct btrfs_corrupt_block *corrupt)
7655 struct btrfs_path path;
7656 struct extent_buffer *eb;
7660 int level = corrupt->level + 1;
7662 btrfs_init_path(&path);
7664 /* we want to stop at the parent to our busted block */
7665 path.lowest_level = level;
7667 ret = btrfs_search_slot(trans, info->extent_root,
7668 &corrupt->key, &path, -1, 1);
7673 eb = path.nodes[level];
7680 * hopefully the search gave us the block we want to prune,
7681 * lets try that first
7683 slot = path.slots[level];
7684 found = btrfs_node_blockptr(eb, slot);
7685 if (found == corrupt->cache.start)
7688 nritems = btrfs_header_nritems(eb);
7690 /* the search failed, lets scan this node and hope we find it */
7691 for (slot = 0; slot < nritems; slot++) {
7692 found = btrfs_node_blockptr(eb, slot);
7693 if (found == corrupt->cache.start)
7697 * we couldn't find the bad block. TODO, search all the nodes for pointers
7700 if (eb == info->extent_root->node) {
7705 btrfs_release_path(&path);
7710 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7711 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7714 btrfs_release_path(&path);
7718 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7720 struct btrfs_trans_handle *trans = NULL;
7721 struct cache_extent *cache;
7722 struct btrfs_corrupt_block *corrupt;
7725 cache = search_cache_extent(info->corrupt_blocks, 0);
7729 trans = btrfs_start_transaction(info->extent_root, 1);
7731 return PTR_ERR(trans);
7733 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7734 prune_one_block(trans, info, corrupt);
7735 remove_cache_extent(info->corrupt_blocks, cache);
7738 return btrfs_commit_transaction(trans, info->extent_root);
7742 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7744 struct btrfs_block_group_cache *cache;
7749 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7750 &start, &end, EXTENT_DIRTY);
7753 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7759 cache = btrfs_lookup_first_block_group(fs_info, start);
7764 start = cache->key.objectid + cache->key.offset;
7768 static int check_extent_refs(struct btrfs_root *root,
7769 struct cache_tree *extent_cache)
7771 struct extent_record *rec;
7772 struct cache_extent *cache;
7781 * if we're doing a repair, we have to make sure
7782 * we don't allocate from the problem extents.
7783 * In the worst case, this will be all the
7786 cache = search_cache_extent(extent_cache, 0);
7788 rec = container_of(cache, struct extent_record, cache);
7789 set_extent_dirty(root->fs_info->excluded_extents,
7791 rec->start + rec->max_size - 1,
7793 cache = next_cache_extent(cache);
7796 /* pin down all the corrupted blocks too */
7797 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7799 set_extent_dirty(root->fs_info->excluded_extents,
7801 cache->start + cache->size - 1,
7803 cache = next_cache_extent(cache);
7805 prune_corrupt_blocks(root->fs_info);
7806 reset_cached_block_groups(root->fs_info);
7809 reset_cached_block_groups(root->fs_info);
7812 * We need to delete any duplicate entries we find first otherwise we
7813 * could mess up the extent tree when we have backrefs that actually
7814 * belong to a different extent item and not the weird duplicate one.
7816 while (repair && !list_empty(&duplicate_extents)) {
7817 rec = to_extent_record(duplicate_extents.next);
7818 list_del_init(&rec->list);
7820 /* Sometimes we can find a backref before we find an actual
7821 * extent, so we need to process it a little bit to see if there
7822 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7823 * if this is a backref screwup. If we need to delete stuff
7824 * process_duplicates() will return 0, otherwise it will return
7827 if (process_duplicates(root, extent_cache, rec))
7829 ret = delete_duplicate_records(root, rec);
7833 * delete_duplicate_records will return the number of entries
7834 * deleted, so if it's greater than 0 then we know we actually
7835 * did something and we need to remove.
7849 cache = search_cache_extent(extent_cache, 0);
7852 rec = container_of(cache, struct extent_record, cache);
7853 if (rec->num_duplicates) {
7854 fprintf(stderr, "extent item %llu has multiple extent "
7855 "items\n", (unsigned long long)rec->start);
7860 if (rec->refs != rec->extent_item_refs) {
7861 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7862 (unsigned long long)rec->start,
7863 (unsigned long long)rec->nr);
7864 fprintf(stderr, "extent item %llu, found %llu\n",
7865 (unsigned long long)rec->extent_item_refs,
7866 (unsigned long long)rec->refs);
7867 ret = record_orphan_data_extents(root->fs_info, rec);
7874 * we can't use the extent to repair file
7875 * extent, let the fallback method handle it.
7877 if (!fixed && repair) {
7878 ret = fixup_extent_refs(
7889 if (all_backpointers_checked(rec, 1)) {
7890 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7891 (unsigned long long)rec->start,
7892 (unsigned long long)rec->nr);
7894 if (!fixed && !recorded && repair) {
7895 ret = fixup_extent_refs(root->fs_info,
7904 if (!rec->owner_ref_checked) {
7905 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7906 (unsigned long long)rec->start,
7907 (unsigned long long)rec->nr);
7908 if (!fixed && !recorded && repair) {
7909 ret = fixup_extent_refs(root->fs_info,
7918 if (rec->bad_full_backref) {
7919 fprintf(stderr, "bad full backref, on [%llu]\n",
7920 (unsigned long long)rec->start);
7922 ret = fixup_extent_flags(root->fs_info, rec);
7931 * Although it's not a extent ref's problem, we reuse this
7932 * routine for error reporting.
7933 * No repair function yet.
7935 if (rec->crossing_stripes) {
7937 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7938 rec->start, rec->start + rec->max_size);
7943 if (rec->wrong_chunk_type) {
7945 "bad extent [%llu, %llu), type mismatch with chunk\n",
7946 rec->start, rec->start + rec->max_size);
7951 remove_cache_extent(extent_cache, cache);
7952 free_all_extent_backrefs(rec);
7953 if (!init_extent_tree && repair && (!cur_err || fixed))
7954 clear_extent_dirty(root->fs_info->excluded_extents,
7956 rec->start + rec->max_size - 1,
7962 if (ret && ret != -EAGAIN) {
7963 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7966 struct btrfs_trans_handle *trans;
7968 root = root->fs_info->extent_root;
7969 trans = btrfs_start_transaction(root, 1);
7970 if (IS_ERR(trans)) {
7971 ret = PTR_ERR(trans);
7975 btrfs_fix_block_accounting(trans, root);
7976 ret = btrfs_commit_transaction(trans, root);
7981 fprintf(stderr, "repaired damaged extent references\n");
7987 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7991 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7992 stripe_size = length;
7993 stripe_size /= num_stripes;
7994 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7995 stripe_size = length * 2;
7996 stripe_size /= num_stripes;
7997 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7998 stripe_size = length;
7999 stripe_size /= (num_stripes - 1);
8000 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8001 stripe_size = length;
8002 stripe_size /= (num_stripes - 2);
8004 stripe_size = length;
8010 * Check the chunk with its block group/dev list ref:
8011 * Return 0 if all refs seems valid.
8012 * Return 1 if part of refs seems valid, need later check for rebuild ref
8013 * like missing block group and needs to search extent tree to rebuild them.
8014 * Return -1 if essential refs are missing and unable to rebuild.
8016 static int check_chunk_refs(struct chunk_record *chunk_rec,
8017 struct block_group_tree *block_group_cache,
8018 struct device_extent_tree *dev_extent_cache,
8021 struct cache_extent *block_group_item;
8022 struct block_group_record *block_group_rec;
8023 struct cache_extent *dev_extent_item;
8024 struct device_extent_record *dev_extent_rec;
8028 int metadump_v2 = 0;
8032 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8035 if (block_group_item) {
8036 block_group_rec = container_of(block_group_item,
8037 struct block_group_record,
8039 if (chunk_rec->length != block_group_rec->offset ||
8040 chunk_rec->offset != block_group_rec->objectid ||
8042 chunk_rec->type_flags != block_group_rec->flags)) {
8045 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8046 chunk_rec->objectid,
8051 chunk_rec->type_flags,
8052 block_group_rec->objectid,
8053 block_group_rec->type,
8054 block_group_rec->offset,
8055 block_group_rec->offset,
8056 block_group_rec->objectid,
8057 block_group_rec->flags);
8060 list_del_init(&block_group_rec->list);
8061 chunk_rec->bg_rec = block_group_rec;
8066 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8067 chunk_rec->objectid,
8072 chunk_rec->type_flags);
8079 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8080 chunk_rec->num_stripes);
8081 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8082 devid = chunk_rec->stripes[i].devid;
8083 offset = chunk_rec->stripes[i].offset;
8084 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8085 devid, offset, length);
8086 if (dev_extent_item) {
8087 dev_extent_rec = container_of(dev_extent_item,
8088 struct device_extent_record,
8090 if (dev_extent_rec->objectid != devid ||
8091 dev_extent_rec->offset != offset ||
8092 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8093 dev_extent_rec->length != length) {
8096 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8097 chunk_rec->objectid,
8100 chunk_rec->stripes[i].devid,
8101 chunk_rec->stripes[i].offset,
8102 dev_extent_rec->objectid,
8103 dev_extent_rec->offset,
8104 dev_extent_rec->length);
8107 list_move(&dev_extent_rec->chunk_list,
8108 &chunk_rec->dextents);
8113 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8114 chunk_rec->objectid,
8117 chunk_rec->stripes[i].devid,
8118 chunk_rec->stripes[i].offset);
8125 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8126 int check_chunks(struct cache_tree *chunk_cache,
8127 struct block_group_tree *block_group_cache,
8128 struct device_extent_tree *dev_extent_cache,
8129 struct list_head *good, struct list_head *bad,
8130 struct list_head *rebuild, int silent)
8132 struct cache_extent *chunk_item;
8133 struct chunk_record *chunk_rec;
8134 struct block_group_record *bg_rec;
8135 struct device_extent_record *dext_rec;
8139 chunk_item = first_cache_extent(chunk_cache);
8140 while (chunk_item) {
8141 chunk_rec = container_of(chunk_item, struct chunk_record,
8143 err = check_chunk_refs(chunk_rec, block_group_cache,
8144 dev_extent_cache, silent);
8147 if (err == 0 && good)
8148 list_add_tail(&chunk_rec->list, good);
8149 if (err > 0 && rebuild)
8150 list_add_tail(&chunk_rec->list, rebuild);
8152 list_add_tail(&chunk_rec->list, bad);
8153 chunk_item = next_cache_extent(chunk_item);
8156 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8159 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8167 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8171 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8182 static int check_device_used(struct device_record *dev_rec,
8183 struct device_extent_tree *dext_cache)
8185 struct cache_extent *cache;
8186 struct device_extent_record *dev_extent_rec;
8189 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8191 dev_extent_rec = container_of(cache,
8192 struct device_extent_record,
8194 if (dev_extent_rec->objectid != dev_rec->devid)
8197 list_del_init(&dev_extent_rec->device_list);
8198 total_byte += dev_extent_rec->length;
8199 cache = next_cache_extent(cache);
8202 if (total_byte != dev_rec->byte_used) {
8204 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8205 total_byte, dev_rec->byte_used, dev_rec->objectid,
8206 dev_rec->type, dev_rec->offset);
8213 /* check btrfs_dev_item -> btrfs_dev_extent */
8214 static int check_devices(struct rb_root *dev_cache,
8215 struct device_extent_tree *dev_extent_cache)
8217 struct rb_node *dev_node;
8218 struct device_record *dev_rec;
8219 struct device_extent_record *dext_rec;
8223 dev_node = rb_first(dev_cache);
8225 dev_rec = container_of(dev_node, struct device_record, node);
8226 err = check_device_used(dev_rec, dev_extent_cache);
8230 dev_node = rb_next(dev_node);
8232 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8235 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8236 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8243 static int add_root_item_to_list(struct list_head *head,
8244 u64 objectid, u64 bytenr, u64 last_snapshot,
8245 u8 level, u8 drop_level,
8246 int level_size, struct btrfs_key *drop_key)
8249 struct root_item_record *ri_rec;
8250 ri_rec = malloc(sizeof(*ri_rec));
8253 ri_rec->bytenr = bytenr;
8254 ri_rec->objectid = objectid;
8255 ri_rec->level = level;
8256 ri_rec->level_size = level_size;
8257 ri_rec->drop_level = drop_level;
8258 ri_rec->last_snapshot = last_snapshot;
8260 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8261 list_add_tail(&ri_rec->list, head);
8266 static void free_root_item_list(struct list_head *list)
8268 struct root_item_record *ri_rec;
8270 while (!list_empty(list)) {
8271 ri_rec = list_first_entry(list, struct root_item_record,
8273 list_del_init(&ri_rec->list);
8278 static int deal_root_from_list(struct list_head *list,
8279 struct btrfs_root *root,
8280 struct block_info *bits,
8282 struct cache_tree *pending,
8283 struct cache_tree *seen,
8284 struct cache_tree *reada,
8285 struct cache_tree *nodes,
8286 struct cache_tree *extent_cache,
8287 struct cache_tree *chunk_cache,
8288 struct rb_root *dev_cache,
8289 struct block_group_tree *block_group_cache,
8290 struct device_extent_tree *dev_extent_cache)
8295 while (!list_empty(list)) {
8296 struct root_item_record *rec;
8297 struct extent_buffer *buf;
8298 rec = list_entry(list->next,
8299 struct root_item_record, list);
8301 buf = read_tree_block(root->fs_info->tree_root,
8302 rec->bytenr, rec->level_size, 0);
8303 if (!extent_buffer_uptodate(buf)) {
8304 free_extent_buffer(buf);
8308 add_root_to_pending(buf, extent_cache, pending,
8309 seen, nodes, rec->objectid);
8311 * To rebuild extent tree, we need deal with snapshot
8312 * one by one, otherwise we deal with node firstly which
8313 * can maximize readahead.
8316 ret = run_next_block(root, bits, bits_nr, &last,
8317 pending, seen, reada, nodes,
8318 extent_cache, chunk_cache,
8319 dev_cache, block_group_cache,
8320 dev_extent_cache, rec);
8324 free_extent_buffer(buf);
8325 list_del(&rec->list);
8331 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8332 reada, nodes, extent_cache, chunk_cache,
8333 dev_cache, block_group_cache,
8334 dev_extent_cache, NULL);
8344 static int check_chunks_and_extents(struct btrfs_root *root)
8346 struct rb_root dev_cache;
8347 struct cache_tree chunk_cache;
8348 struct block_group_tree block_group_cache;
8349 struct device_extent_tree dev_extent_cache;
8350 struct cache_tree extent_cache;
8351 struct cache_tree seen;
8352 struct cache_tree pending;
8353 struct cache_tree reada;
8354 struct cache_tree nodes;
8355 struct extent_io_tree excluded_extents;
8356 struct cache_tree corrupt_blocks;
8357 struct btrfs_path path;
8358 struct btrfs_key key;
8359 struct btrfs_key found_key;
8361 struct block_info *bits;
8363 struct extent_buffer *leaf;
8365 struct btrfs_root_item ri;
8366 struct list_head dropping_trees;
8367 struct list_head normal_trees;
8368 struct btrfs_root *root1;
8373 dev_cache = RB_ROOT;
8374 cache_tree_init(&chunk_cache);
8375 block_group_tree_init(&block_group_cache);
8376 device_extent_tree_init(&dev_extent_cache);
8378 cache_tree_init(&extent_cache);
8379 cache_tree_init(&seen);
8380 cache_tree_init(&pending);
8381 cache_tree_init(&nodes);
8382 cache_tree_init(&reada);
8383 cache_tree_init(&corrupt_blocks);
8384 extent_io_tree_init(&excluded_extents);
8385 INIT_LIST_HEAD(&dropping_trees);
8386 INIT_LIST_HEAD(&normal_trees);
8389 root->fs_info->excluded_extents = &excluded_extents;
8390 root->fs_info->fsck_extent_cache = &extent_cache;
8391 root->fs_info->free_extent_hook = free_extent_hook;
8392 root->fs_info->corrupt_blocks = &corrupt_blocks;
8396 bits = malloc(bits_nr * sizeof(struct block_info));
8402 if (ctx.progress_enabled) {
8403 ctx.tp = TASK_EXTENTS;
8404 task_start(ctx.info);
8408 root1 = root->fs_info->tree_root;
8409 level = btrfs_header_level(root1->node);
8410 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8411 root1->node->start, 0, level, 0,
8412 root1->nodesize, NULL);
8415 root1 = root->fs_info->chunk_root;
8416 level = btrfs_header_level(root1->node);
8417 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8418 root1->node->start, 0, level, 0,
8419 root1->nodesize, NULL);
8422 btrfs_init_path(&path);
8425 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8431 leaf = path.nodes[0];
8432 slot = path.slots[0];
8433 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8434 ret = btrfs_next_leaf(root, &path);
8437 leaf = path.nodes[0];
8438 slot = path.slots[0];
8440 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8441 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8442 unsigned long offset;
8445 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8446 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8447 last_snapshot = btrfs_root_last_snapshot(&ri);
8448 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8449 level = btrfs_root_level(&ri);
8450 level_size = root->nodesize;
8451 ret = add_root_item_to_list(&normal_trees,
8453 btrfs_root_bytenr(&ri),
8454 last_snapshot, level,
8455 0, level_size, NULL);
8459 level = btrfs_root_level(&ri);
8460 level_size = root->nodesize;
8461 objectid = found_key.objectid;
8462 btrfs_disk_key_to_cpu(&found_key,
8464 ret = add_root_item_to_list(&dropping_trees,
8466 btrfs_root_bytenr(&ri),
8467 last_snapshot, level,
8469 level_size, &found_key);
8476 btrfs_release_path(&path);
8479 * check_block can return -EAGAIN if it fixes something, please keep
8480 * this in mind when dealing with return values from these functions, if
8481 * we get -EAGAIN we want to fall through and restart the loop.
8483 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8484 &seen, &reada, &nodes, &extent_cache,
8485 &chunk_cache, &dev_cache, &block_group_cache,
8492 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8493 &pending, &seen, &reada, &nodes,
8494 &extent_cache, &chunk_cache, &dev_cache,
8495 &block_group_cache, &dev_extent_cache);
8502 ret = check_chunks(&chunk_cache, &block_group_cache,
8503 &dev_extent_cache, NULL, NULL, NULL, 0);
8510 ret = check_extent_refs(root, &extent_cache);
8517 ret = check_devices(&dev_cache, &dev_extent_cache);
8522 task_stop(ctx.info);
8524 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8525 extent_io_tree_cleanup(&excluded_extents);
8526 root->fs_info->fsck_extent_cache = NULL;
8527 root->fs_info->free_extent_hook = NULL;
8528 root->fs_info->corrupt_blocks = NULL;
8529 root->fs_info->excluded_extents = NULL;
8532 free_chunk_cache_tree(&chunk_cache);
8533 free_device_cache_tree(&dev_cache);
8534 free_block_group_tree(&block_group_cache);
8535 free_device_extent_tree(&dev_extent_cache);
8536 free_extent_cache_tree(&seen);
8537 free_extent_cache_tree(&pending);
8538 free_extent_cache_tree(&reada);
8539 free_extent_cache_tree(&nodes);
8542 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8543 free_extent_cache_tree(&seen);
8544 free_extent_cache_tree(&pending);
8545 free_extent_cache_tree(&reada);
8546 free_extent_cache_tree(&nodes);
8547 free_chunk_cache_tree(&chunk_cache);
8548 free_block_group_tree(&block_group_cache);
8549 free_device_cache_tree(&dev_cache);
8550 free_device_extent_tree(&dev_extent_cache);
8551 free_extent_record_cache(root->fs_info, &extent_cache);
8552 free_root_item_list(&normal_trees);
8553 free_root_item_list(&dropping_trees);
8554 extent_io_tree_cleanup(&excluded_extents);
8559 * Check backrefs of a tree block given by @bytenr or @eb.
8561 * @root: the root containing the @bytenr or @eb
8562 * @eb: tree block extent buffer, can be NULL
8563 * @bytenr: bytenr of the tree block to search
8564 * @level: tree level of the tree block
8565 * @owner: owner of the tree block
8567 * Return >0 for any error found and output error message
8568 * Return 0 for no error found
8570 static int check_tree_block_ref(struct btrfs_root *root,
8571 struct extent_buffer *eb, u64 bytenr,
8572 int level, u64 owner)
8574 struct btrfs_key key;
8575 struct btrfs_root *extent_root = root->fs_info->extent_root;
8576 struct btrfs_path path;
8577 struct btrfs_extent_item *ei;
8578 struct btrfs_extent_inline_ref *iref;
8579 struct extent_buffer *leaf;
8585 u32 nodesize = root->nodesize;
8592 btrfs_init_path(&path);
8593 key.objectid = bytenr;
8594 if (btrfs_fs_incompat(root->fs_info,
8595 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8596 key.type = BTRFS_METADATA_ITEM_KEY;
8598 key.type = BTRFS_EXTENT_ITEM_KEY;
8599 key.offset = (u64)-1;
8601 /* Search for the backref in extent tree */
8602 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8604 err |= BACKREF_MISSING;
8607 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8609 err |= BACKREF_MISSING;
8613 leaf = path.nodes[0];
8614 slot = path.slots[0];
8615 btrfs_item_key_to_cpu(leaf, &key, slot);
8617 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8619 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8620 skinny_level = (int)key.offset;
8621 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8623 struct btrfs_tree_block_info *info;
8625 info = (struct btrfs_tree_block_info *)(ei + 1);
8626 skinny_level = btrfs_tree_block_level(leaf, info);
8627 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8634 if (!(btrfs_extent_flags(leaf, ei) &
8635 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8637 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8638 key.objectid, nodesize,
8639 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8640 err = BACKREF_MISMATCH;
8642 header_gen = btrfs_header_generation(eb);
8643 extent_gen = btrfs_extent_generation(leaf, ei);
8644 if (header_gen != extent_gen) {
8646 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8647 key.objectid, nodesize, header_gen,
8649 err = BACKREF_MISMATCH;
8651 if (level != skinny_level) {
8653 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8654 key.objectid, nodesize, level, skinny_level);
8655 err = BACKREF_MISMATCH;
8657 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8659 "extent[%llu %u] is referred by other roots than %llu",
8660 key.objectid, nodesize, root->objectid);
8661 err = BACKREF_MISMATCH;
8666 * Iterate the extent/metadata item to find the exact backref
8668 item_size = btrfs_item_size_nr(leaf, slot);
8669 ptr = (unsigned long)iref;
8670 end = (unsigned long)ei + item_size;
8672 iref = (struct btrfs_extent_inline_ref *)ptr;
8673 type = btrfs_extent_inline_ref_type(leaf, iref);
8674 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8676 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8677 (offset == root->objectid || offset == owner)) {
8679 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8680 /* Check if the backref points to valid referencer */
8681 found_ref = !check_tree_block_ref(root, NULL, offset,
8687 ptr += btrfs_extent_inline_ref_size(type);
8691 * Inlined extent item doesn't have what we need, check
8692 * TREE_BLOCK_REF_KEY
8695 btrfs_release_path(&path);
8696 key.objectid = bytenr;
8697 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8698 key.offset = root->objectid;
8700 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8705 err |= BACKREF_MISSING;
8707 btrfs_release_path(&path);
8708 if (eb && (err & BACKREF_MISSING))
8709 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8710 bytenr, nodesize, owner, level);
8715 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8717 * Return >0 any error found and output error message
8718 * Return 0 for no error found
8720 static int check_extent_data_item(struct btrfs_root *root,
8721 struct extent_buffer *eb, int slot)
8723 struct btrfs_file_extent_item *fi;
8724 struct btrfs_path path;
8725 struct btrfs_root *extent_root = root->fs_info->extent_root;
8726 struct btrfs_key fi_key;
8727 struct btrfs_key dbref_key;
8728 struct extent_buffer *leaf;
8729 struct btrfs_extent_item *ei;
8730 struct btrfs_extent_inline_ref *iref;
8731 struct btrfs_extent_data_ref *dref;
8733 u64 file_extent_gen;
8736 u64 extent_num_bytes;
8744 int found_dbackref = 0;
8748 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8749 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8750 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8752 /* Nothing to check for hole and inline data extents */
8753 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8754 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8757 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8758 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8759 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8761 /* Check unaligned disk_num_bytes and num_bytes */
8762 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8764 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8765 fi_key.objectid, fi_key.offset, disk_num_bytes,
8767 err |= BYTES_UNALIGNED;
8769 data_bytes_allocated += disk_num_bytes;
8771 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8773 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8774 fi_key.objectid, fi_key.offset, extent_num_bytes,
8776 err |= BYTES_UNALIGNED;
8778 data_bytes_referenced += extent_num_bytes;
8780 owner = btrfs_header_owner(eb);
8782 /* Check the extent item of the file extent in extent tree */
8783 btrfs_init_path(&path);
8784 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8785 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8786 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8788 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8790 err |= BACKREF_MISSING;
8794 leaf = path.nodes[0];
8795 slot = path.slots[0];
8796 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8798 extent_flags = btrfs_extent_flags(leaf, ei);
8799 extent_gen = btrfs_extent_generation(leaf, ei);
8801 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8803 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8804 disk_bytenr, disk_num_bytes,
8805 BTRFS_EXTENT_FLAG_DATA);
8806 err |= BACKREF_MISMATCH;
8809 if (file_extent_gen < extent_gen) {
8811 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8812 disk_bytenr, disk_num_bytes, file_extent_gen,
8814 err |= BACKREF_MISMATCH;
8817 /* Check data backref inside that extent item */
8818 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8819 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8820 ptr = (unsigned long)iref;
8821 end = (unsigned long)ei + item_size;
8823 iref = (struct btrfs_extent_inline_ref *)ptr;
8824 type = btrfs_extent_inline_ref_type(leaf, iref);
8825 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8827 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8828 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8829 if (ref_root == owner || ref_root == root->objectid)
8831 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8832 found_dbackref = !check_tree_block_ref(root, NULL,
8833 btrfs_extent_inline_ref_offset(leaf, iref),
8839 ptr += btrfs_extent_inline_ref_size(type);
8842 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8843 if (!found_dbackref) {
8844 btrfs_release_path(&path);
8846 btrfs_init_path(&path);
8847 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8848 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8849 dbref_key.offset = hash_extent_data_ref(root->objectid,
8850 fi_key.objectid, fi_key.offset);
8852 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8853 &dbref_key, &path, 0, 0);
8858 if (!found_dbackref)
8859 err |= BACKREF_MISSING;
8861 btrfs_release_path(&path);
8862 if (err & BACKREF_MISSING) {
8863 error("data extent[%llu %llu] backref lost",
8864 disk_bytenr, disk_num_bytes);
8870 * Get real tree block level for the case like shared block
8871 * Return >= 0 as tree level
8872 * Return <0 for error
8874 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8876 struct extent_buffer *eb;
8877 struct btrfs_path path;
8878 struct btrfs_key key;
8879 struct btrfs_extent_item *ei;
8882 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8887 /* Search extent tree for extent generation and level */
8888 key.objectid = bytenr;
8889 key.type = BTRFS_METADATA_ITEM_KEY;
8890 key.offset = (u64)-1;
8892 btrfs_init_path(&path);
8893 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8896 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8904 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8905 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8906 struct btrfs_extent_item);
8907 flags = btrfs_extent_flags(path.nodes[0], ei);
8908 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8913 /* Get transid for later read_tree_block() check */
8914 transid = btrfs_extent_generation(path.nodes[0], ei);
8916 /* Get backref level as one source */
8917 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8918 backref_level = key.offset;
8920 struct btrfs_tree_block_info *info;
8922 info = (struct btrfs_tree_block_info *)(ei + 1);
8923 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8925 btrfs_release_path(&path);
8927 /* Get level from tree block as an alternative source */
8928 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8929 if (!extent_buffer_uptodate(eb)) {
8930 free_extent_buffer(eb);
8933 header_level = btrfs_header_level(eb);
8934 free_extent_buffer(eb);
8936 if (header_level != backref_level)
8938 return header_level;
8941 btrfs_release_path(&path);
8946 * Check if a tree block backref is valid (points to a valid tree block)
8947 * if level == -1, level will be resolved
8948 * Return >0 for any error found and print error message
8950 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8951 u64 bytenr, int level)
8953 struct btrfs_root *root;
8954 struct btrfs_key key;
8955 struct btrfs_path path;
8956 struct extent_buffer *eb;
8957 struct extent_buffer *node;
8958 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8962 /* Query level for level == -1 special case */
8964 level = query_tree_block_level(fs_info, bytenr);
8966 err |= REFERENCER_MISSING;
8970 key.objectid = root_id;
8971 key.type = BTRFS_ROOT_ITEM_KEY;
8972 key.offset = (u64)-1;
8974 root = btrfs_read_fs_root(fs_info, &key);
8976 err |= REFERENCER_MISSING;
8980 /* Read out the tree block to get item/node key */
8981 eb = read_tree_block(root, bytenr, root->nodesize, 0);
8982 if (!extent_buffer_uptodate(eb)) {
8983 err |= REFERENCER_MISSING;
8984 free_extent_buffer(eb);
8988 /* Empty tree, no need to check key */
8989 if (!btrfs_header_nritems(eb) && !level) {
8990 free_extent_buffer(eb);
8995 btrfs_node_key_to_cpu(eb, &key, 0);
8997 btrfs_item_key_to_cpu(eb, &key, 0);
8999 free_extent_buffer(eb);
9001 btrfs_init_path(&path);
9002 /* Search with the first key, to ensure we can reach it */
9003 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9005 err |= REFERENCER_MISSING;
9009 node = path.nodes[level];
9010 if (btrfs_header_bytenr(node) != bytenr) {
9012 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9013 bytenr, nodesize, bytenr,
9014 btrfs_header_bytenr(node));
9015 err |= REFERENCER_MISMATCH;
9017 if (btrfs_header_level(node) != level) {
9019 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9020 bytenr, nodesize, level,
9021 btrfs_header_level(node));
9022 err |= REFERENCER_MISMATCH;
9026 btrfs_release_path(&path);
9028 if (err & REFERENCER_MISSING) {
9030 error("extent [%llu %d] lost referencer (owner: %llu)",
9031 bytenr, nodesize, root_id);
9034 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9035 bytenr, nodesize, root_id, level);
9042 * Check referencer for shared block backref
9043 * If level == -1, this function will resolve the level.
9045 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9046 u64 parent, u64 bytenr, int level)
9048 struct extent_buffer *eb;
9049 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9051 int found_parent = 0;
9054 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9055 if (!extent_buffer_uptodate(eb))
9059 level = query_tree_block_level(fs_info, bytenr);
9063 if (level + 1 != btrfs_header_level(eb))
9066 nr = btrfs_header_nritems(eb);
9067 for (i = 0; i < nr; i++) {
9068 if (bytenr == btrfs_node_blockptr(eb, i)) {
9074 free_extent_buffer(eb);
9075 if (!found_parent) {
9077 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9078 bytenr, nodesize, parent, level);
9079 return REFERENCER_MISSING;
9085 * Check referencer for normal (inlined) data ref
9086 * If len == 0, it will be resolved by searching in extent tree
9088 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9089 u64 root_id, u64 objectid, u64 offset,
9090 u64 bytenr, u64 len, u32 count)
9092 struct btrfs_root *root;
9093 struct btrfs_root *extent_root = fs_info->extent_root;
9094 struct btrfs_key key;
9095 struct btrfs_path path;
9096 struct extent_buffer *leaf;
9097 struct btrfs_file_extent_item *fi;
9098 u32 found_count = 0;
9103 key.objectid = bytenr;
9104 key.type = BTRFS_EXTENT_ITEM_KEY;
9105 key.offset = (u64)-1;
9107 btrfs_init_path(&path);
9108 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9111 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9114 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9115 if (key.objectid != bytenr ||
9116 key.type != BTRFS_EXTENT_ITEM_KEY)
9119 btrfs_release_path(&path);
9121 key.objectid = root_id;
9122 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9123 key.offset = (u64)-1;
9124 btrfs_init_path(&path);
9126 root = btrfs_read_fs_root(fs_info, &key);
9130 key.objectid = objectid;
9131 key.type = BTRFS_EXTENT_DATA_KEY;
9133 * It can be nasty as data backref offset is
9134 * file offset - file extent offset, which is smaller or
9135 * equal to original backref offset. The only special case is
9136 * overflow. So we need to special check and do further search.
9138 key.offset = offset & (1ULL << 63) ? 0 : offset;
9140 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9145 * Search afterwards to get correct one
9146 * NOTE: As we must do a comprehensive check on the data backref to
9147 * make sure the dref count also matches, we must iterate all file
9148 * extents for that inode.
9151 leaf = path.nodes[0];
9152 slot = path.slots[0];
9154 btrfs_item_key_to_cpu(leaf, &key, slot);
9155 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9157 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9159 * Except normal disk bytenr and disk num bytes, we still
9160 * need to do extra check on dbackref offset as
9161 * dbackref offset = file_offset - file_extent_offset
9163 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9164 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9165 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9169 ret = btrfs_next_item(root, &path);
9174 btrfs_release_path(&path);
9175 if (found_count != count) {
9177 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9178 bytenr, len, root_id, objectid, offset, count, found_count);
9179 return REFERENCER_MISSING;
9185 * Check if the referencer of a shared data backref exists
9187 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9188 u64 parent, u64 bytenr)
9190 struct extent_buffer *eb;
9191 struct btrfs_key key;
9192 struct btrfs_file_extent_item *fi;
9193 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9195 int found_parent = 0;
9198 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9199 if (!extent_buffer_uptodate(eb))
9202 nr = btrfs_header_nritems(eb);
9203 for (i = 0; i < nr; i++) {
9204 btrfs_item_key_to_cpu(eb, &key, i);
9205 if (key.type != BTRFS_EXTENT_DATA_KEY)
9208 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9209 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9212 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9219 free_extent_buffer(eb);
9220 if (!found_parent) {
9221 error("shared extent %llu referencer lost (parent: %llu)",
9223 return REFERENCER_MISSING;
9229 * This function will check a given extent item, including its backref and
9230 * itself (like crossing stripe boundary and type)
9232 * Since we don't use extent_record anymore, introduce new error bit
9234 static int check_extent_item(struct btrfs_fs_info *fs_info,
9235 struct extent_buffer *eb, int slot)
9237 struct btrfs_extent_item *ei;
9238 struct btrfs_extent_inline_ref *iref;
9239 struct btrfs_extent_data_ref *dref;
9243 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9244 u32 item_size = btrfs_item_size_nr(eb, slot);
9249 struct btrfs_key key;
9253 btrfs_item_key_to_cpu(eb, &key, slot);
9254 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9255 bytes_used += key.offset;
9257 bytes_used += nodesize;
9259 if (item_size < sizeof(*ei)) {
9261 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9262 * old thing when on disk format is still un-determined.
9263 * No need to care about it anymore
9265 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9269 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9270 flags = btrfs_extent_flags(eb, ei);
9272 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9274 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9275 error("bad metadata [%llu, %llu) crossing stripe boundary",
9276 key.objectid, key.objectid + nodesize);
9277 err |= CROSSING_STRIPE_BOUNDARY;
9280 ptr = (unsigned long)(ei + 1);
9282 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9283 /* Old EXTENT_ITEM metadata */
9284 struct btrfs_tree_block_info *info;
9286 info = (struct btrfs_tree_block_info *)ptr;
9287 level = btrfs_tree_block_level(eb, info);
9288 ptr += sizeof(struct btrfs_tree_block_info);
9290 /* New METADATA_ITEM */
9293 end = (unsigned long)ei + item_size;
9296 err |= ITEM_SIZE_MISMATCH;
9300 /* Now check every backref in this extent item */
9302 iref = (struct btrfs_extent_inline_ref *)ptr;
9303 type = btrfs_extent_inline_ref_type(eb, iref);
9304 offset = btrfs_extent_inline_ref_offset(eb, iref);
9306 case BTRFS_TREE_BLOCK_REF_KEY:
9307 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9311 case BTRFS_SHARED_BLOCK_REF_KEY:
9312 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9316 case BTRFS_EXTENT_DATA_REF_KEY:
9317 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9318 ret = check_extent_data_backref(fs_info,
9319 btrfs_extent_data_ref_root(eb, dref),
9320 btrfs_extent_data_ref_objectid(eb, dref),
9321 btrfs_extent_data_ref_offset(eb, dref),
9322 key.objectid, key.offset,
9323 btrfs_extent_data_ref_count(eb, dref));
9326 case BTRFS_SHARED_DATA_REF_KEY:
9327 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9331 error("extent[%llu %d %llu] has unknown ref type: %d",
9332 key.objectid, key.type, key.offset, type);
9333 err |= UNKNOWN_TYPE;
9337 ptr += btrfs_extent_inline_ref_size(type);
9346 * Check if a dev extent item is referred correctly by its chunk
9348 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9349 struct extent_buffer *eb, int slot)
9351 struct btrfs_root *chunk_root = fs_info->chunk_root;
9352 struct btrfs_dev_extent *ptr;
9353 struct btrfs_path path;
9354 struct btrfs_key chunk_key;
9355 struct btrfs_key devext_key;
9356 struct btrfs_chunk *chunk;
9357 struct extent_buffer *l;
9361 int found_chunk = 0;
9364 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9365 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9366 length = btrfs_dev_extent_length(eb, ptr);
9368 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9369 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9370 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9372 btrfs_init_path(&path);
9373 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9378 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9379 if (btrfs_chunk_length(l, chunk) != length)
9382 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9383 for (i = 0; i < num_stripes; i++) {
9384 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9385 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9387 if (devid == devext_key.objectid &&
9388 offset == devext_key.offset) {
9394 btrfs_release_path(&path);
9397 "device extent[%llu, %llu, %llu] did not find the related chunk",
9398 devext_key.objectid, devext_key.offset, length);
9399 return REFERENCER_MISSING;
9405 * Check if the used space is correct with the dev item
9407 static int check_dev_item(struct btrfs_fs_info *fs_info,
9408 struct extent_buffer *eb, int slot)
9410 struct btrfs_root *dev_root = fs_info->dev_root;
9411 struct btrfs_dev_item *dev_item;
9412 struct btrfs_path path;
9413 struct btrfs_key key;
9414 struct btrfs_dev_extent *ptr;
9420 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9421 dev_id = btrfs_device_id(eb, dev_item);
9422 used = btrfs_device_bytes_used(eb, dev_item);
9424 key.objectid = dev_id;
9425 key.type = BTRFS_DEV_EXTENT_KEY;
9428 btrfs_init_path(&path);
9429 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9431 btrfs_item_key_to_cpu(eb, &key, slot);
9432 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9433 key.objectid, key.type, key.offset);
9434 btrfs_release_path(&path);
9435 return REFERENCER_MISSING;
9438 /* Iterate dev_extents to calculate the used space of a device */
9440 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9442 if (key.objectid > dev_id)
9444 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9447 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9448 struct btrfs_dev_extent);
9449 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9451 ret = btrfs_next_item(dev_root, &path);
9455 btrfs_release_path(&path);
9457 if (used != total) {
9458 btrfs_item_key_to_cpu(eb, &key, slot);
9460 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9461 total, used, BTRFS_ROOT_TREE_OBJECTID,
9462 BTRFS_DEV_EXTENT_KEY, dev_id);
9463 return ACCOUNTING_MISMATCH;
9468 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9469 struct btrfs_root *root, int overwrite)
9471 struct extent_buffer *c;
9472 struct extent_buffer *old = root->node;
9475 struct btrfs_disk_key disk_key = {0,0,0};
9481 extent_buffer_get(c);
9484 c = btrfs_alloc_free_block(trans, root,
9486 root->root_key.objectid,
9487 &disk_key, level, 0, 0);
9490 extent_buffer_get(c);
9494 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9495 btrfs_set_header_level(c, level);
9496 btrfs_set_header_bytenr(c, c->start);
9497 btrfs_set_header_generation(c, trans->transid);
9498 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9499 btrfs_set_header_owner(c, root->root_key.objectid);
9501 write_extent_buffer(c, root->fs_info->fsid,
9502 btrfs_header_fsid(), BTRFS_FSID_SIZE);
9504 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9505 btrfs_header_chunk_tree_uuid(c),
9508 btrfs_mark_buffer_dirty(c);
9510 * this case can happen in the following case:
9512 * 1.overwrite previous root.
9514 * 2.reinit reloc data root, this is because we skip pin
9515 * down reloc data tree before which means we can allocate
9516 * same block bytenr here.
9518 if (old->start == c->start) {
9519 btrfs_set_root_generation(&root->root_item,
9521 root->root_item.level = btrfs_header_level(root->node);
9522 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9523 &root->root_key, &root->root_item);
9525 free_extent_buffer(c);
9529 free_extent_buffer(old);
9531 add_root_to_dirty_list(root);
9535 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9536 struct extent_buffer *eb, int tree_root)
9538 struct extent_buffer *tmp;
9539 struct btrfs_root_item *ri;
9540 struct btrfs_key key;
9543 int level = btrfs_header_level(eb);
9549 * If we have pinned this block before, don't pin it again.
9550 * This can not only avoid forever loop with broken filesystem
9551 * but also give us some speedups.
9553 if (test_range_bit(&fs_info->pinned_extents, eb->start,
9554 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9557 btrfs_pin_extent(fs_info, eb->start, eb->len);
9559 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9560 nritems = btrfs_header_nritems(eb);
9561 for (i = 0; i < nritems; i++) {
9563 btrfs_item_key_to_cpu(eb, &key, i);
9564 if (key.type != BTRFS_ROOT_ITEM_KEY)
9566 /* Skip the extent root and reloc roots */
9567 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9568 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9569 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9571 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9572 bytenr = btrfs_disk_root_bytenr(eb, ri);
9575 * If at any point we start needing the real root we
9576 * will have to build a stump root for the root we are
9577 * in, but for now this doesn't actually use the root so
9578 * just pass in extent_root.
9580 tmp = read_tree_block(fs_info->extent_root, bytenr,
9582 if (!extent_buffer_uptodate(tmp)) {
9583 fprintf(stderr, "Error reading root block\n");
9586 ret = pin_down_tree_blocks(fs_info, tmp, 0);
9587 free_extent_buffer(tmp);
9591 bytenr = btrfs_node_blockptr(eb, i);
9593 /* If we aren't the tree root don't read the block */
9594 if (level == 1 && !tree_root) {
9595 btrfs_pin_extent(fs_info, bytenr, nodesize);
9599 tmp = read_tree_block(fs_info->extent_root, bytenr,
9601 if (!extent_buffer_uptodate(tmp)) {
9602 fprintf(stderr, "Error reading tree block\n");
9605 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9606 free_extent_buffer(tmp);
9615 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9619 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9623 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9626 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9628 struct btrfs_block_group_cache *cache;
9629 struct btrfs_path *path;
9630 struct extent_buffer *leaf;
9631 struct btrfs_chunk *chunk;
9632 struct btrfs_key key;
9636 path = btrfs_alloc_path();
9641 key.type = BTRFS_CHUNK_ITEM_KEY;
9644 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9646 btrfs_free_path(path);
9651 * We do this in case the block groups were screwed up and had alloc
9652 * bits that aren't actually set on the chunks. This happens with
9653 * restored images every time and could happen in real life I guess.
9655 fs_info->avail_data_alloc_bits = 0;
9656 fs_info->avail_metadata_alloc_bits = 0;
9657 fs_info->avail_system_alloc_bits = 0;
9659 /* First we need to create the in-memory block groups */
9661 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9662 ret = btrfs_next_leaf(fs_info->chunk_root, path);
9664 btrfs_free_path(path);
9672 leaf = path->nodes[0];
9673 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9674 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9679 chunk = btrfs_item_ptr(leaf, path->slots[0],
9680 struct btrfs_chunk);
9681 btrfs_add_block_group(fs_info, 0,
9682 btrfs_chunk_type(leaf, chunk),
9683 key.objectid, key.offset,
9684 btrfs_chunk_length(leaf, chunk));
9685 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9686 key.offset + btrfs_chunk_length(leaf, chunk),
9692 cache = btrfs_lookup_first_block_group(fs_info, start);
9696 start = cache->key.objectid + cache->key.offset;
9699 btrfs_free_path(path);
9703 static int reset_balance(struct btrfs_trans_handle *trans,
9704 struct btrfs_fs_info *fs_info)
9706 struct btrfs_root *root = fs_info->tree_root;
9707 struct btrfs_path *path;
9708 struct extent_buffer *leaf;
9709 struct btrfs_key key;
9710 int del_slot, del_nr = 0;
9714 path = btrfs_alloc_path();
9718 key.objectid = BTRFS_BALANCE_OBJECTID;
9719 key.type = BTRFS_BALANCE_ITEM_KEY;
9722 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9727 goto reinit_data_reloc;
9732 ret = btrfs_del_item(trans, root, path);
9735 btrfs_release_path(path);
9737 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9738 key.type = BTRFS_ROOT_ITEM_KEY;
9741 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9745 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9750 ret = btrfs_del_items(trans, root, path,
9757 btrfs_release_path(path);
9760 ret = btrfs_search_slot(trans, root, &key, path,
9767 leaf = path->nodes[0];
9768 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9769 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9771 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9776 del_slot = path->slots[0];
9785 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9789 btrfs_release_path(path);
9792 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9793 key.type = BTRFS_ROOT_ITEM_KEY;
9794 key.offset = (u64)-1;
9795 root = btrfs_read_fs_root(fs_info, &key);
9797 fprintf(stderr, "Error reading data reloc tree\n");
9798 ret = PTR_ERR(root);
9801 record_root_in_trans(trans, root);
9802 ret = btrfs_fsck_reinit_root(trans, root, 0);
9805 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9807 btrfs_free_path(path);
9811 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9812 struct btrfs_fs_info *fs_info)
9818 * The only reason we don't do this is because right now we're just
9819 * walking the trees we find and pinning down their bytes, we don't look
9820 * at any of the leaves. In order to do mixed groups we'd have to check
9821 * the leaves of any fs roots and pin down the bytes for any file
9822 * extents we find. Not hard but why do it if we don't have to?
9824 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9825 fprintf(stderr, "We don't support re-initing the extent tree "
9826 "for mixed block groups yet, please notify a btrfs "
9827 "developer you want to do this so they can add this "
9828 "functionality.\n");
9833 * first we need to walk all of the trees except the extent tree and pin
9834 * down the bytes that are in use so we don't overwrite any existing
9837 ret = pin_metadata_blocks(fs_info);
9839 fprintf(stderr, "error pinning down used bytes\n");
9844 * Need to drop all the block groups since we're going to recreate all
9847 btrfs_free_block_groups(fs_info);
9848 ret = reset_block_groups(fs_info);
9850 fprintf(stderr, "error resetting the block groups\n");
9854 /* Ok we can allocate now, reinit the extent root */
9855 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9857 fprintf(stderr, "extent root initialization failed\n");
9859 * When the transaction code is updated we should end the
9860 * transaction, but for now progs only knows about commit so
9861 * just return an error.
9867 * Now we have all the in-memory block groups setup so we can make
9868 * allocations properly, and the metadata we care about is safe since we
9869 * pinned all of it above.
9872 struct btrfs_block_group_cache *cache;
9874 cache = btrfs_lookup_first_block_group(fs_info, start);
9877 start = cache->key.objectid + cache->key.offset;
9878 ret = btrfs_insert_item(trans, fs_info->extent_root,
9879 &cache->key, &cache->item,
9880 sizeof(cache->item));
9882 fprintf(stderr, "Error adding block group\n");
9885 btrfs_extent_post_op(trans, fs_info->extent_root);
9888 ret = reset_balance(trans, fs_info);
9890 fprintf(stderr, "error resetting the pending balance\n");
9895 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9897 struct btrfs_path *path;
9898 struct btrfs_trans_handle *trans;
9899 struct btrfs_key key;
9902 printf("Recowing metadata block %llu\n", eb->start);
9903 key.objectid = btrfs_header_owner(eb);
9904 key.type = BTRFS_ROOT_ITEM_KEY;
9905 key.offset = (u64)-1;
9907 root = btrfs_read_fs_root(root->fs_info, &key);
9909 fprintf(stderr, "Couldn't find owner root %llu\n",
9911 return PTR_ERR(root);
9914 path = btrfs_alloc_path();
9918 trans = btrfs_start_transaction(root, 1);
9919 if (IS_ERR(trans)) {
9920 btrfs_free_path(path);
9921 return PTR_ERR(trans);
9924 path->lowest_level = btrfs_header_level(eb);
9925 if (path->lowest_level)
9926 btrfs_node_key_to_cpu(eb, &key, 0);
9928 btrfs_item_key_to_cpu(eb, &key, 0);
9930 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9931 btrfs_commit_transaction(trans, root);
9932 btrfs_free_path(path);
9936 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9938 struct btrfs_path *path;
9939 struct btrfs_trans_handle *trans;
9940 struct btrfs_key key;
9943 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9944 bad->key.type, bad->key.offset);
9945 key.objectid = bad->root_id;
9946 key.type = BTRFS_ROOT_ITEM_KEY;
9947 key.offset = (u64)-1;
9949 root = btrfs_read_fs_root(root->fs_info, &key);
9951 fprintf(stderr, "Couldn't find owner root %llu\n",
9953 return PTR_ERR(root);
9956 path = btrfs_alloc_path();
9960 trans = btrfs_start_transaction(root, 1);
9961 if (IS_ERR(trans)) {
9962 btrfs_free_path(path);
9963 return PTR_ERR(trans);
9966 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9972 ret = btrfs_del_item(trans, root, path);
9974 btrfs_commit_transaction(trans, root);
9975 btrfs_free_path(path);
9979 static int zero_log_tree(struct btrfs_root *root)
9981 struct btrfs_trans_handle *trans;
9984 trans = btrfs_start_transaction(root, 1);
9985 if (IS_ERR(trans)) {
9986 ret = PTR_ERR(trans);
9989 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9990 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9991 ret = btrfs_commit_transaction(trans, root);
9995 static int populate_csum(struct btrfs_trans_handle *trans,
9996 struct btrfs_root *csum_root, char *buf, u64 start,
10003 while (offset < len) {
10004 sectorsize = csum_root->sectorsize;
10005 ret = read_extent_data(csum_root, buf, start + offset,
10009 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10010 start + offset, buf, sectorsize);
10013 offset += sectorsize;
10018 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10019 struct btrfs_root *csum_root,
10020 struct btrfs_root *cur_root)
10022 struct btrfs_path *path;
10023 struct btrfs_key key;
10024 struct extent_buffer *node;
10025 struct btrfs_file_extent_item *fi;
10032 path = btrfs_alloc_path();
10035 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10045 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10048 /* Iterate all regular file extents and fill its csum */
10050 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10052 if (key.type != BTRFS_EXTENT_DATA_KEY)
10054 node = path->nodes[0];
10055 slot = path->slots[0];
10056 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10057 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10059 start = btrfs_file_extent_disk_bytenr(node, fi);
10060 len = btrfs_file_extent_disk_num_bytes(node, fi);
10062 ret = populate_csum(trans, csum_root, buf, start, len);
10063 if (ret == -EEXIST)
10069 * TODO: if next leaf is corrupted, jump to nearest next valid
10072 ret = btrfs_next_item(cur_root, path);
10082 btrfs_free_path(path);
10087 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10088 struct btrfs_root *csum_root)
10090 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10091 struct btrfs_path *path;
10092 struct btrfs_root *tree_root = fs_info->tree_root;
10093 struct btrfs_root *cur_root;
10094 struct extent_buffer *node;
10095 struct btrfs_key key;
10099 path = btrfs_alloc_path();
10103 key.objectid = BTRFS_FS_TREE_OBJECTID;
10105 key.type = BTRFS_ROOT_ITEM_KEY;
10107 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10116 node = path->nodes[0];
10117 slot = path->slots[0];
10118 btrfs_item_key_to_cpu(node, &key, slot);
10119 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10121 if (key.type != BTRFS_ROOT_ITEM_KEY)
10123 if (!is_fstree(key.objectid))
10125 key.offset = (u64)-1;
10127 cur_root = btrfs_read_fs_root(fs_info, &key);
10128 if (IS_ERR(cur_root) || !cur_root) {
10129 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10133 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10138 ret = btrfs_next_item(tree_root, path);
10148 btrfs_free_path(path);
10152 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10153 struct btrfs_root *csum_root)
10155 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10156 struct btrfs_path *path;
10157 struct btrfs_extent_item *ei;
10158 struct extent_buffer *leaf;
10160 struct btrfs_key key;
10163 path = btrfs_alloc_path();
10168 key.type = BTRFS_EXTENT_ITEM_KEY;
10171 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10173 btrfs_free_path(path);
10177 buf = malloc(csum_root->sectorsize);
10179 btrfs_free_path(path);
10184 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10185 ret = btrfs_next_leaf(extent_root, path);
10193 leaf = path->nodes[0];
10195 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10196 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10201 ei = btrfs_item_ptr(leaf, path->slots[0],
10202 struct btrfs_extent_item);
10203 if (!(btrfs_extent_flags(leaf, ei) &
10204 BTRFS_EXTENT_FLAG_DATA)) {
10209 ret = populate_csum(trans, csum_root, buf, key.objectid,
10216 btrfs_free_path(path);
10222 * Recalculate the csum and put it into the csum tree.
10224 * Extent tree init will wipe out all the extent info, so in that case, we
10225 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10226 * will use fs/subvol trees to init the csum tree.
10228 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10229 struct btrfs_root *csum_root,
10230 int search_fs_tree)
10232 if (search_fs_tree)
10233 return fill_csum_tree_from_fs(trans, csum_root);
10235 return fill_csum_tree_from_extent(trans, csum_root);
10238 static void free_roots_info_cache(void)
10240 if (!roots_info_cache)
10243 while (!cache_tree_empty(roots_info_cache)) {
10244 struct cache_extent *entry;
10245 struct root_item_info *rii;
10247 entry = first_cache_extent(roots_info_cache);
10250 remove_cache_extent(roots_info_cache, entry);
10251 rii = container_of(entry, struct root_item_info, cache_extent);
10255 free(roots_info_cache);
10256 roots_info_cache = NULL;
10259 static int build_roots_info_cache(struct btrfs_fs_info *info)
10262 struct btrfs_key key;
10263 struct extent_buffer *leaf;
10264 struct btrfs_path *path;
10266 if (!roots_info_cache) {
10267 roots_info_cache = malloc(sizeof(*roots_info_cache));
10268 if (!roots_info_cache)
10270 cache_tree_init(roots_info_cache);
10273 path = btrfs_alloc_path();
10278 key.type = BTRFS_EXTENT_ITEM_KEY;
10281 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10284 leaf = path->nodes[0];
10287 struct btrfs_key found_key;
10288 struct btrfs_extent_item *ei;
10289 struct btrfs_extent_inline_ref *iref;
10290 int slot = path->slots[0];
10295 struct cache_extent *entry;
10296 struct root_item_info *rii;
10298 if (slot >= btrfs_header_nritems(leaf)) {
10299 ret = btrfs_next_leaf(info->extent_root, path);
10306 leaf = path->nodes[0];
10307 slot = path->slots[0];
10310 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10312 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10313 found_key.type != BTRFS_METADATA_ITEM_KEY)
10316 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10317 flags = btrfs_extent_flags(leaf, ei);
10319 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10320 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10323 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10324 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10325 level = found_key.offset;
10327 struct btrfs_tree_block_info *binfo;
10329 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10330 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10331 level = btrfs_tree_block_level(leaf, binfo);
10335 * For a root extent, it must be of the following type and the
10336 * first (and only one) iref in the item.
10338 type = btrfs_extent_inline_ref_type(leaf, iref);
10339 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10342 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10343 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10345 rii = malloc(sizeof(struct root_item_info));
10350 rii->cache_extent.start = root_id;
10351 rii->cache_extent.size = 1;
10352 rii->level = (u8)-1;
10353 entry = &rii->cache_extent;
10354 ret = insert_cache_extent(roots_info_cache, entry);
10357 rii = container_of(entry, struct root_item_info,
10361 ASSERT(rii->cache_extent.start == root_id);
10362 ASSERT(rii->cache_extent.size == 1);
10364 if (level > rii->level || rii->level == (u8)-1) {
10365 rii->level = level;
10366 rii->bytenr = found_key.objectid;
10367 rii->gen = btrfs_extent_generation(leaf, ei);
10368 rii->node_count = 1;
10369 } else if (level == rii->level) {
10377 btrfs_free_path(path);
10382 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10383 struct btrfs_path *path,
10384 const struct btrfs_key *root_key,
10385 const int read_only_mode)
10387 const u64 root_id = root_key->objectid;
10388 struct cache_extent *entry;
10389 struct root_item_info *rii;
10390 struct btrfs_root_item ri;
10391 unsigned long offset;
10393 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10396 "Error: could not find extent items for root %llu\n",
10397 root_key->objectid);
10401 rii = container_of(entry, struct root_item_info, cache_extent);
10402 ASSERT(rii->cache_extent.start == root_id);
10403 ASSERT(rii->cache_extent.size == 1);
10405 if (rii->node_count != 1) {
10407 "Error: could not find btree root extent for root %llu\n",
10412 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10413 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10415 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10416 btrfs_root_level(&ri) != rii->level ||
10417 btrfs_root_generation(&ri) != rii->gen) {
10420 * If we're in repair mode but our caller told us to not update
10421 * the root item, i.e. just check if it needs to be updated, don't
10422 * print this message, since the caller will call us again shortly
10423 * for the same root item without read only mode (the caller will
10424 * open a transaction first).
10426 if (!(read_only_mode && repair))
10428 "%sroot item for root %llu,"
10429 " current bytenr %llu, current gen %llu, current level %u,"
10430 " new bytenr %llu, new gen %llu, new level %u\n",
10431 (read_only_mode ? "" : "fixing "),
10433 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10434 btrfs_root_level(&ri),
10435 rii->bytenr, rii->gen, rii->level);
10437 if (btrfs_root_generation(&ri) > rii->gen) {
10439 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10440 root_id, btrfs_root_generation(&ri), rii->gen);
10444 if (!read_only_mode) {
10445 btrfs_set_root_bytenr(&ri, rii->bytenr);
10446 btrfs_set_root_level(&ri, rii->level);
10447 btrfs_set_root_generation(&ri, rii->gen);
10448 write_extent_buffer(path->nodes[0], &ri,
10449 offset, sizeof(ri));
10459 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10460 * caused read-only snapshots to be corrupted if they were created at a moment
10461 * when the source subvolume/snapshot had orphan items. The issue was that the
10462 * on-disk root items became incorrect, referring to the pre orphan cleanup root
10463 * node instead of the post orphan cleanup root node.
10464 * So this function, and its callees, just detects and fixes those cases. Even
10465 * though the regression was for read-only snapshots, this function applies to
10466 * any snapshot/subvolume root.
10467 * This must be run before any other repair code - not doing it so, makes other
10468 * repair code delete or modify backrefs in the extent tree for example, which
10469 * will result in an inconsistent fs after repairing the root items.
10471 static int repair_root_items(struct btrfs_fs_info *info)
10473 struct btrfs_path *path = NULL;
10474 struct btrfs_key key;
10475 struct extent_buffer *leaf;
10476 struct btrfs_trans_handle *trans = NULL;
10479 int need_trans = 0;
10481 ret = build_roots_info_cache(info);
10485 path = btrfs_alloc_path();
10491 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10492 key.type = BTRFS_ROOT_ITEM_KEY;
10497 * Avoid opening and committing transactions if a leaf doesn't have
10498 * any root items that need to be fixed, so that we avoid rotating
10499 * backup roots unnecessarily.
10502 trans = btrfs_start_transaction(info->tree_root, 1);
10503 if (IS_ERR(trans)) {
10504 ret = PTR_ERR(trans);
10509 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10513 leaf = path->nodes[0];
10516 struct btrfs_key found_key;
10518 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10519 int no_more_keys = find_next_key(path, &key);
10521 btrfs_release_path(path);
10523 ret = btrfs_commit_transaction(trans,
10535 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10537 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10539 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10542 ret = maybe_repair_root_item(info, path, &found_key,
10547 if (!trans && repair) {
10550 btrfs_release_path(path);
10560 free_roots_info_cache();
10561 btrfs_free_path(path);
10563 btrfs_commit_transaction(trans, info->tree_root);
10570 const char * const cmd_check_usage[] = {
10571 "btrfs check [options] <device>",
10572 "Check structural integrity of a filesystem (unmounted).",
10573 "Check structural integrity of an unmounted filesystem. Verify internal",
10574 "trees' consistency and item connectivity. In the repair mode try to",
10575 "fix the problems found.",
10576 "WARNING: the repair mode is considered dangerous",
10578 "-s|--super <superblock> use this superblock copy",
10579 "-b|--backup use the first valid backup root copy",
10580 "--repair try to repair the filesystem",
10581 "--readonly run in read-only mode (default)",
10582 "--init-csum-tree create a new CRC tree",
10583 "--init-extent-tree create a new extent tree",
10584 "--check-data-csum verify checksums of data blocks",
10585 "-Q|--qgroup-report print a report on qgroup consistency",
10586 "-E|--subvol-extents <subvolid>",
10587 " print subvolume extents and sharing state",
10588 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
10589 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
10590 "-p|--progress indicate progress",
10594 int cmd_check(int argc, char **argv)
10596 struct cache_tree root_cache;
10597 struct btrfs_root *root;
10598 struct btrfs_fs_info *info;
10601 u64 tree_root_bytenr = 0;
10602 u64 chunk_root_bytenr = 0;
10603 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10606 int init_csum_tree = 0;
10608 int qgroup_report = 0;
10609 int qgroups_repaired = 0;
10610 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10614 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10615 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10616 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10617 static const struct option long_options[] = {
10618 { "super", required_argument, NULL, 's' },
10619 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10620 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10621 { "init-csum-tree", no_argument, NULL,
10622 GETOPT_VAL_INIT_CSUM },
10623 { "init-extent-tree", no_argument, NULL,
10624 GETOPT_VAL_INIT_EXTENT },
10625 { "check-data-csum", no_argument, NULL,
10626 GETOPT_VAL_CHECK_CSUM },
10627 { "backup", no_argument, NULL, 'b' },
10628 { "subvol-extents", required_argument, NULL, 'E' },
10629 { "qgroup-report", no_argument, NULL, 'Q' },
10630 { "tree-root", required_argument, NULL, 'r' },
10631 { "chunk-root", required_argument, NULL,
10632 GETOPT_VAL_CHUNK_TREE },
10633 { "progress", no_argument, NULL, 'p' },
10634 { NULL, 0, NULL, 0}
10637 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10641 case 'a': /* ignored */ break;
10643 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10646 num = arg_strtou64(optarg);
10647 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10649 "ERROR: super mirror should be less than: %d\n",
10650 BTRFS_SUPER_MIRROR_MAX);
10653 bytenr = btrfs_sb_offset(((int)num));
10654 printf("using SB copy %llu, bytenr %llu\n", num,
10655 (unsigned long long)bytenr);
10661 subvolid = arg_strtou64(optarg);
10664 tree_root_bytenr = arg_strtou64(optarg);
10666 case GETOPT_VAL_CHUNK_TREE:
10667 chunk_root_bytenr = arg_strtou64(optarg);
10670 ctx.progress_enabled = true;
10674 usage(cmd_check_usage);
10675 case GETOPT_VAL_REPAIR:
10676 printf("enabling repair mode\n");
10678 ctree_flags |= OPEN_CTREE_WRITES;
10680 case GETOPT_VAL_READONLY:
10683 case GETOPT_VAL_INIT_CSUM:
10684 printf("Creating a new CRC tree\n");
10685 init_csum_tree = 1;
10687 ctree_flags |= OPEN_CTREE_WRITES;
10689 case GETOPT_VAL_INIT_EXTENT:
10690 init_extent_tree = 1;
10691 ctree_flags |= (OPEN_CTREE_WRITES |
10692 OPEN_CTREE_NO_BLOCK_GROUPS);
10695 case GETOPT_VAL_CHECK_CSUM:
10696 check_data_csum = 1;
10701 if (check_argc_exact(argc - optind, 1))
10702 usage(cmd_check_usage);
10704 if (ctx.progress_enabled) {
10705 ctx.tp = TASK_NOTHING;
10706 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10709 /* This check is the only reason for --readonly to exist */
10710 if (readonly && repair) {
10711 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10716 cache_tree_init(&root_cache);
10718 if((ret = check_mounted(argv[optind])) < 0) {
10719 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10722 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10727 /* only allow partial opening under repair mode */
10729 ctree_flags |= OPEN_CTREE_PARTIAL;
10731 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10732 chunk_root_bytenr, ctree_flags);
10734 fprintf(stderr, "Couldn't open file system\n");
10739 global_info = info;
10740 root = info->fs_root;
10743 * repair mode will force us to commit transaction which
10744 * will make us fail to load log tree when mounting.
10746 if (repair && btrfs_super_log_root(info->super_copy)) {
10747 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10752 ret = zero_log_tree(root);
10754 fprintf(stderr, "fail to zero log tree\n");
10759 uuid_unparse(info->super_copy->fsid, uuidbuf);
10760 if (qgroup_report) {
10761 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10763 ret = qgroup_verify_all(info);
10769 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10770 subvolid, argv[optind], uuidbuf);
10771 ret = print_extent_state(info, subvolid);
10774 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10776 if (!extent_buffer_uptodate(info->tree_root->node) ||
10777 !extent_buffer_uptodate(info->dev_root->node) ||
10778 !extent_buffer_uptodate(info->chunk_root->node)) {
10779 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10784 if (init_extent_tree || init_csum_tree) {
10785 struct btrfs_trans_handle *trans;
10787 trans = btrfs_start_transaction(info->extent_root, 0);
10788 if (IS_ERR(trans)) {
10789 fprintf(stderr, "Error starting transaction\n");
10790 ret = PTR_ERR(trans);
10794 if (init_extent_tree) {
10795 printf("Creating a new extent tree\n");
10796 ret = reinit_extent_tree(trans, info);
10801 if (init_csum_tree) {
10802 fprintf(stderr, "Reinit crc root\n");
10803 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10805 fprintf(stderr, "crc root initialization failed\n");
10810 ret = fill_csum_tree(trans, info->csum_root,
10813 fprintf(stderr, "crc refilling failed\n");
10818 * Ok now we commit and run the normal fsck, which will add
10819 * extent entries for all of the items it finds.
10821 ret = btrfs_commit_transaction(trans, info->extent_root);
10825 if (!extent_buffer_uptodate(info->extent_root->node)) {
10826 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10830 if (!extent_buffer_uptodate(info->csum_root->node)) {
10831 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10836 if (!ctx.progress_enabled)
10837 fprintf(stderr, "checking extents\n");
10838 ret = check_chunks_and_extents(root);
10840 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10842 ret = repair_root_items(info);
10846 fprintf(stderr, "Fixed %d roots.\n", ret);
10848 } else if (ret > 0) {
10850 "Found %d roots with an outdated root item.\n",
10853 "Please run a filesystem check with the option --repair to fix them.\n");
10858 if (!ctx.progress_enabled) {
10859 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10860 fprintf(stderr, "checking free space tree\n");
10862 fprintf(stderr, "checking free space cache\n");
10864 ret = check_space_cache(root);
10869 * We used to have to have these hole extents in between our real
10870 * extents so if we don't have this flag set we need to make sure there
10871 * are no gaps in the file extents for inodes, otherwise we can just
10872 * ignore it when this happens.
10874 no_holes = btrfs_fs_incompat(root->fs_info,
10875 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10876 if (!ctx.progress_enabled)
10877 fprintf(stderr, "checking fs roots\n");
10878 ret = check_fs_roots(root, &root_cache);
10882 fprintf(stderr, "checking csums\n");
10883 ret = check_csums(root);
10887 fprintf(stderr, "checking root refs\n");
10888 ret = check_root_refs(root, &root_cache);
10892 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10893 struct extent_buffer *eb;
10895 eb = list_first_entry(&root->fs_info->recow_ebs,
10896 struct extent_buffer, recow);
10897 list_del_init(&eb->recow);
10898 ret = recow_extent_buffer(root, eb);
10903 while (!list_empty(&delete_items)) {
10904 struct bad_item *bad;
10906 bad = list_first_entry(&delete_items, struct bad_item, list);
10907 list_del_init(&bad->list);
10909 ret = delete_bad_item(root, bad);
10913 if (info->quota_enabled) {
10915 fprintf(stderr, "checking quota groups\n");
10916 err = qgroup_verify_all(info);
10920 err = repair_qgroups(info, &qgroups_repaired);
10925 if (!list_empty(&root->fs_info->recow_ebs)) {
10926 fprintf(stderr, "Transid errors in file system\n");
10930 /* Don't override original ret */
10931 if (!ret && qgroups_repaired)
10932 ret = qgroups_repaired;
10934 if (found_old_backref) { /*
10935 * there was a disk format change when mixed
10936 * backref was in testing tree. The old format
10937 * existed about one week.
10939 printf("\n * Found old mixed backref format. "
10940 "The old format is not supported! *"
10941 "\n * Please mount the FS in readonly mode, "
10942 "backup data and re-format the FS. *\n\n");
10945 printf("found %llu bytes used err is %d\n",
10946 (unsigned long long)bytes_used, ret);
10947 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10948 printf("total tree bytes: %llu\n",
10949 (unsigned long long)total_btree_bytes);
10950 printf("total fs tree bytes: %llu\n",
10951 (unsigned long long)total_fs_tree_bytes);
10952 printf("total extent tree bytes: %llu\n",
10953 (unsigned long long)total_extent_tree_bytes);
10954 printf("btree space waste bytes: %llu\n",
10955 (unsigned long long)btree_space_waste);
10956 printf("file data blocks allocated: %llu\n referenced %llu\n",
10957 (unsigned long long)data_bytes_allocated,
10958 (unsigned long long)data_bytes_referenced);
10960 free_qgroup_counts();
10961 free_root_recs_tree(&root_cache);
10965 if (ctx.progress_enabled)
10966 task_deinit(ctx.info);