2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1639 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1640 location.type, key->objectid, key->offset);
1641 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1642 key->objectid, key->offset, namebuf,
1643 len, filetype, key->type, error);
1646 len = sizeof(*di) + name_len + data_len;
1647 di = (struct btrfs_dir_item *)((char *)di + len);
1650 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1651 rec->errors |= I_ERR_DUP_DIR_INDEX;
1656 static int process_inode_ref(struct extent_buffer *eb,
1657 int slot, struct btrfs_key *key,
1658 struct shared_node *active_node)
1666 struct cache_tree *inode_cache;
1667 struct btrfs_inode_ref *ref;
1668 char namebuf[BTRFS_NAME_LEN];
1670 inode_cache = &active_node->inode_cache;
1672 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1673 total = btrfs_item_size_nr(eb, slot);
1674 while (cur < total) {
1675 name_len = btrfs_inode_ref_name_len(eb, ref);
1676 index = btrfs_inode_ref_index(eb, ref);
1678 /* inode_ref + namelen should not cross item boundary */
1679 if (cur + sizeof(*ref) + name_len > total ||
1680 name_len > BTRFS_NAME_LEN) {
1681 if (total < cur + sizeof(*ref))
1684 /* Still try to read out the remaining part */
1685 len = min_t(u32, total - cur - sizeof(*ref),
1687 error = REF_ERR_NAME_TOO_LONG;
1693 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1694 add_inode_backref(inode_cache, key->objectid, key->offset,
1695 index, namebuf, len, 0, key->type, error);
1697 len = sizeof(*ref) + name_len;
1698 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1704 static int process_inode_extref(struct extent_buffer *eb,
1705 int slot, struct btrfs_key *key,
1706 struct shared_node *active_node)
1715 struct cache_tree *inode_cache;
1716 struct btrfs_inode_extref *extref;
1717 char namebuf[BTRFS_NAME_LEN];
1719 inode_cache = &active_node->inode_cache;
1721 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1722 total = btrfs_item_size_nr(eb, slot);
1723 while (cur < total) {
1724 name_len = btrfs_inode_extref_name_len(eb, extref);
1725 index = btrfs_inode_extref_index(eb, extref);
1726 parent = btrfs_inode_extref_parent(eb, extref);
1727 if (name_len <= BTRFS_NAME_LEN) {
1731 len = BTRFS_NAME_LEN;
1732 error = REF_ERR_NAME_TOO_LONG;
1734 read_extent_buffer(eb, namebuf,
1735 (unsigned long)(extref + 1), len);
1736 add_inode_backref(inode_cache, key->objectid, parent,
1737 index, namebuf, len, 0, key->type, error);
1739 len = sizeof(*extref) + name_len;
1740 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1747 static int count_csum_range(struct btrfs_root *root, u64 start,
1748 u64 len, u64 *found)
1750 struct btrfs_key key;
1751 struct btrfs_path path;
1752 struct extent_buffer *leaf;
1757 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1759 btrfs_init_path(&path);
1761 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1763 key.type = BTRFS_EXTENT_CSUM_KEY;
1765 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1769 if (ret > 0 && path.slots[0] > 0) {
1770 leaf = path.nodes[0];
1771 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1772 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1773 key.type == BTRFS_EXTENT_CSUM_KEY)
1778 leaf = path.nodes[0];
1779 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1780 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1785 leaf = path.nodes[0];
1788 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1790 key.type != BTRFS_EXTENT_CSUM_KEY)
1793 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1794 if (key.offset >= start + len)
1797 if (key.offset > start)
1800 size = btrfs_item_size_nr(leaf, path.slots[0]);
1801 csum_end = key.offset + (size / csum_size) *
1802 root->fs_info->sectorsize;
1803 if (csum_end > start) {
1804 size = min(csum_end - start, len);
1813 btrfs_release_path(&path);
1819 static int process_file_extent(struct btrfs_root *root,
1820 struct extent_buffer *eb,
1821 int slot, struct btrfs_key *key,
1822 struct shared_node *active_node)
1824 struct inode_record *rec;
1825 struct btrfs_file_extent_item *fi;
1827 u64 disk_bytenr = 0;
1828 u64 extent_offset = 0;
1829 u64 mask = root->fs_info->sectorsize - 1;
1833 rec = active_node->current;
1834 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1835 rec->found_file_extent = 1;
1837 if (rec->extent_start == (u64)-1) {
1838 rec->extent_start = key->offset;
1839 rec->extent_end = key->offset;
1842 if (rec->extent_end > key->offset)
1843 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1844 else if (rec->extent_end < key->offset) {
1845 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1846 key->offset - rec->extent_end);
1851 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1852 extent_type = btrfs_file_extent_type(eb, fi);
1854 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1855 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1857 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1858 rec->found_size += num_bytes;
1859 num_bytes = (num_bytes + mask) & ~mask;
1860 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1861 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1862 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1863 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1864 extent_offset = btrfs_file_extent_offset(eb, fi);
1865 if (num_bytes == 0 || (num_bytes & mask))
1866 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867 if (num_bytes + extent_offset >
1868 btrfs_file_extent_ram_bytes(eb, fi))
1869 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1871 (btrfs_file_extent_compression(eb, fi) ||
1872 btrfs_file_extent_encryption(eb, fi) ||
1873 btrfs_file_extent_other_encoding(eb, fi)))
1874 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875 if (disk_bytenr > 0)
1876 rec->found_size += num_bytes;
1878 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1880 rec->extent_end = key->offset + num_bytes;
1883 * The data reloc tree will copy full extents into its inode and then
1884 * copy the corresponding csums. Because the extent it copied could be
1885 * a preallocated extent that hasn't been written to yet there may be no
1886 * csums to copy, ergo we won't have csums for our file extent. This is
1887 * ok so just don't bother checking csums if the inode belongs to the
1890 if (disk_bytenr > 0 &&
1891 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1893 if (btrfs_file_extent_compression(eb, fi))
1894 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1896 disk_bytenr += extent_offset;
1898 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1901 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1903 rec->found_csum_item = 1;
1904 if (found < num_bytes)
1905 rec->some_csum_missing = 1;
1906 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1908 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1914 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1915 struct walk_control *wc)
1917 struct btrfs_key key;
1921 struct cache_tree *inode_cache;
1922 struct shared_node *active_node;
1924 if (wc->root_level == wc->active_node &&
1925 btrfs_root_refs(&root->root_item) == 0)
1928 active_node = wc->nodes[wc->active_node];
1929 inode_cache = &active_node->inode_cache;
1930 nritems = btrfs_header_nritems(eb);
1931 for (i = 0; i < nritems; i++) {
1932 btrfs_item_key_to_cpu(eb, &key, i);
1934 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1936 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1939 if (active_node->current == NULL ||
1940 active_node->current->ino < key.objectid) {
1941 if (active_node->current) {
1942 active_node->current->checked = 1;
1943 maybe_free_inode_rec(inode_cache,
1944 active_node->current);
1946 active_node->current = get_inode_rec(inode_cache,
1948 BUG_ON(IS_ERR(active_node->current));
1951 case BTRFS_DIR_ITEM_KEY:
1952 case BTRFS_DIR_INDEX_KEY:
1953 ret = process_dir_item(eb, i, &key, active_node);
1955 case BTRFS_INODE_REF_KEY:
1956 ret = process_inode_ref(eb, i, &key, active_node);
1958 case BTRFS_INODE_EXTREF_KEY:
1959 ret = process_inode_extref(eb, i, &key, active_node);
1961 case BTRFS_INODE_ITEM_KEY:
1962 ret = process_inode_item(eb, i, &key, active_node);
1964 case BTRFS_EXTENT_DATA_KEY:
1965 ret = process_file_extent(root, eb, i, &key,
1976 u64 bytenr[BTRFS_MAX_LEVEL];
1977 u64 refs[BTRFS_MAX_LEVEL];
1978 int need_check[BTRFS_MAX_LEVEL];
1979 /* field for checking all trees */
1980 int checked[BTRFS_MAX_LEVEL];
1981 /* the corresponding extent should be marked as full backref or not */
1982 int full_backref[BTRFS_MAX_LEVEL];
1985 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1986 struct extent_buffer *eb, struct node_refs *nrefs,
1987 u64 level, int check_all);
1988 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1989 unsigned int ext_ref);
1992 * Returns >0 Found error, not fatal, should continue
1993 * Returns <0 Fatal error, must exit the whole check
1994 * Returns 0 No errors found
1996 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1997 struct node_refs *nrefs, int *level, int ext_ref)
1999 struct extent_buffer *cur = path->nodes[0];
2000 struct btrfs_key key;
2004 int root_level = btrfs_header_level(root->node);
2006 int ret = 0; /* Final return value */
2007 int err = 0; /* Positive error bitmap */
2009 cur_bytenr = cur->start;
2011 /* skip to first inode item or the first inode number change */
2012 nritems = btrfs_header_nritems(cur);
2013 for (i = 0; i < nritems; i++) {
2014 btrfs_item_key_to_cpu(cur, &key, i);
2016 first_ino = key.objectid;
2017 if (key.type == BTRFS_INODE_ITEM_KEY ||
2018 (first_ino && first_ino != key.objectid))
2022 path->slots[0] = nritems;
2028 err |= check_inode_item(root, path, ext_ref);
2030 /* modify cur since check_inode_item may change path */
2031 cur = path->nodes[0];
2033 if (err & LAST_ITEM)
2036 /* still have inode items in thie leaf */
2037 if (cur->start == cur_bytenr)
2041 * we have switched to another leaf, above nodes may
2042 * have changed, here walk down the path, if a node
2043 * or leaf is shared, check whether we can skip this
2046 for (i = root_level; i >= 0; i--) {
2047 if (path->nodes[i]->start == nrefs->bytenr[i])
2050 ret = update_nodes_refs(root, path->nodes[i]->start,
2051 path->nodes[i], nrefs, i, 0);
2055 if (!nrefs->need_check[i]) {
2061 for (i = 0; i < *level; i++) {
2062 free_extent_buffer(path->nodes[i]);
2063 path->nodes[i] = NULL;
2072 static void reada_walk_down(struct btrfs_root *root,
2073 struct extent_buffer *node, int slot)
2075 struct btrfs_fs_info *fs_info = root->fs_info;
2082 level = btrfs_header_level(node);
2086 nritems = btrfs_header_nritems(node);
2087 for (i = slot; i < nritems; i++) {
2088 bytenr = btrfs_node_blockptr(node, i);
2089 ptr_gen = btrfs_node_ptr_generation(node, i);
2090 readahead_tree_block(fs_info, bytenr, ptr_gen);
2095 * Check the child node/leaf by the following condition:
2096 * 1. the first item key of the node/leaf should be the same with the one
2098 * 2. block in parent node should match the child node/leaf.
2099 * 3. generation of parent node and child's header should be consistent.
2101 * Or the child node/leaf pointed by the key in parent is not valid.
2103 * We hope to check leaf owner too, but since subvol may share leaves,
2104 * which makes leaf owner check not so strong, key check should be
2105 * sufficient enough for that case.
2107 static int check_child_node(struct extent_buffer *parent, int slot,
2108 struct extent_buffer *child)
2110 struct btrfs_key parent_key;
2111 struct btrfs_key child_key;
2114 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2115 if (btrfs_header_level(child) == 0)
2116 btrfs_item_key_to_cpu(child, &child_key, 0);
2118 btrfs_node_key_to_cpu(child, &child_key, 0);
2120 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2123 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2124 parent_key.objectid, parent_key.type, parent_key.offset,
2125 child_key.objectid, child_key.type, child_key.offset);
2127 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2129 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2130 btrfs_node_blockptr(parent, slot),
2131 btrfs_header_bytenr(child));
2133 if (btrfs_node_ptr_generation(parent, slot) !=
2134 btrfs_header_generation(child)) {
2136 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2137 btrfs_header_generation(child),
2138 btrfs_node_ptr_generation(parent, slot));
2144 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2145 * in every fs or file tree check. Here we find its all root ids, and only check
2146 * it in the fs or file tree which has the smallest root id.
2148 static int need_check(struct btrfs_root *root, struct ulist *roots)
2150 struct rb_node *node;
2151 struct ulist_node *u;
2154 * @roots can be empty if it belongs to tree reloc tree
2155 * In that case, we should always check the leaf, as we can't use
2156 * the tree owner to ensure some other root will check it.
2158 if (roots->nnodes == 1 || roots->nnodes == 0)
2161 node = rb_first(&roots->root);
2162 u = rb_entry(node, struct ulist_node, rb_node);
2164 * current root id is not smallest, we skip it and let it be checked
2165 * in the fs or file tree who hash the smallest root id.
2167 if (root->objectid != u->val)
2173 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2176 struct btrfs_root *extent_root = root->fs_info->extent_root;
2177 struct btrfs_root_item *ri = &root->root_item;
2178 struct btrfs_extent_inline_ref *iref;
2179 struct btrfs_extent_item *ei;
2180 struct btrfs_key key;
2181 struct btrfs_path *path = NULL;
2192 * Except file/reloc tree, we can not have FULL BACKREF MODE
2194 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2198 if (eb->start == btrfs_root_bytenr(ri))
2201 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2204 owner = btrfs_header_owner(eb);
2205 if (owner == root->objectid)
2208 path = btrfs_alloc_path();
2212 key.objectid = btrfs_header_bytenr(eb);
2214 key.offset = (u64)-1;
2216 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2223 ret = btrfs_previous_extent_item(extent_root, path,
2229 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2231 eb = path->nodes[0];
2232 slot = path->slots[0];
2233 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2235 flags = btrfs_extent_flags(eb, ei);
2236 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2239 ptr = (unsigned long)(ei + 1);
2240 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2242 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2243 ptr += sizeof(struct btrfs_tree_block_info);
2246 /* Reached extent item ends normally */
2250 /* Beyond extent item end, wrong item size */
2252 error("extent item at bytenr %llu slot %d has wrong size",
2257 iref = (struct btrfs_extent_inline_ref *)ptr;
2258 offset = btrfs_extent_inline_ref_offset(eb, iref);
2259 type = btrfs_extent_inline_ref_type(eb, iref);
2261 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2263 ptr += btrfs_extent_inline_ref_size(type);
2267 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2271 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2273 btrfs_free_path(path);
2278 * for a tree node or leaf, we record its reference count, so later if we still
2279 * process this node or leaf, don't need to compute its reference count again.
2281 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2283 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2284 struct extent_buffer *eb, struct node_refs *nrefs,
2285 u64 level, int check_all)
2287 struct ulist *roots;
2290 int root_level = btrfs_header_level(root->node);
2294 if (nrefs->bytenr[level] == bytenr)
2297 if (bytenr != (u64)-1) {
2298 /* the return value of this function seems a mistake */
2299 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2300 level, 1, &refs, &flags);
2302 if (ret < 0 && !check_all)
2305 nrefs->bytenr[level] = bytenr;
2306 nrefs->refs[level] = refs;
2307 nrefs->full_backref[level] = 0;
2308 nrefs->checked[level] = 0;
2311 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2316 check = need_check(root, roots);
2318 nrefs->need_check[level] = check;
2321 nrefs->need_check[level] = 1;
2323 if (level == root_level) {
2324 nrefs->need_check[level] = 1;
2327 * The node refs may have not been
2328 * updated if upper needs checking (the
2329 * lowest root_objectid) the node can
2332 nrefs->need_check[level] =
2333 nrefs->need_check[level + 1];
2339 if (check_all && eb) {
2340 calc_extent_flag_v2(root, eb, &flags);
2341 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2342 nrefs->full_backref[level] = 1;
2349 * @level if @level == -1 means extent data item
2350 * else normal treeblocl.
2352 static int should_check_extent_strictly(struct btrfs_root *root,
2353 struct node_refs *nrefs, int level)
2355 int root_level = btrfs_header_level(root->node);
2357 if (level > root_level || level < -1)
2359 if (level == root_level)
2362 * if the upper node is marked full backref, it should contain shared
2363 * backref of the parent (except owner == root->objectid).
2365 while (++level <= root_level)
2366 if (nrefs->refs[level] > 1)
2372 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2373 struct walk_control *wc, int *level,
2374 struct node_refs *nrefs)
2376 enum btrfs_tree_block_status status;
2379 struct btrfs_fs_info *fs_info = root->fs_info;
2380 struct extent_buffer *next;
2381 struct extent_buffer *cur;
2385 WARN_ON(*level < 0);
2386 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2388 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2389 refs = nrefs->refs[*level];
2392 ret = btrfs_lookup_extent_info(NULL, root,
2393 path->nodes[*level]->start,
2394 *level, 1, &refs, NULL);
2399 nrefs->bytenr[*level] = path->nodes[*level]->start;
2400 nrefs->refs[*level] = refs;
2404 ret = enter_shared_node(root, path->nodes[*level]->start,
2412 while (*level >= 0) {
2413 WARN_ON(*level < 0);
2414 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2415 cur = path->nodes[*level];
2417 if (btrfs_header_level(cur) != *level)
2420 if (path->slots[*level] >= btrfs_header_nritems(cur))
2423 ret = process_one_leaf(root, cur, wc);
2428 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2429 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2431 if (bytenr == nrefs->bytenr[*level - 1]) {
2432 refs = nrefs->refs[*level - 1];
2434 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2435 *level - 1, 1, &refs, NULL);
2439 nrefs->bytenr[*level - 1] = bytenr;
2440 nrefs->refs[*level - 1] = refs;
2445 ret = enter_shared_node(root, bytenr, refs,
2448 path->slots[*level]++;
2453 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2454 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2455 free_extent_buffer(next);
2456 reada_walk_down(root, cur, path->slots[*level]);
2457 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2458 if (!extent_buffer_uptodate(next)) {
2459 struct btrfs_key node_key;
2461 btrfs_node_key_to_cpu(path->nodes[*level],
2463 path->slots[*level]);
2464 btrfs_add_corrupt_extent_record(root->fs_info,
2466 path->nodes[*level]->start,
2467 root->fs_info->nodesize,
2474 ret = check_child_node(cur, path->slots[*level], next);
2476 free_extent_buffer(next);
2481 if (btrfs_is_leaf(next))
2482 status = btrfs_check_leaf(root, NULL, next);
2484 status = btrfs_check_node(root, NULL, next);
2485 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2486 free_extent_buffer(next);
2491 *level = *level - 1;
2492 free_extent_buffer(path->nodes[*level]);
2493 path->nodes[*level] = next;
2494 path->slots[*level] = 0;
2497 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2501 static int fs_root_objectid(u64 objectid);
2504 * Update global fs information.
2506 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2510 struct extent_buffer *eb = path->nodes[level];
2512 total_btree_bytes += eb->len;
2513 if (fs_root_objectid(root->objectid))
2514 total_fs_tree_bytes += eb->len;
2515 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2516 total_extent_tree_bytes += eb->len;
2519 btree_space_waste += btrfs_leaf_free_space(root, eb);
2521 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2522 btrfs_header_nritems(eb));
2523 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2528 * This function only handles BACKREF_MISSING,
2529 * If corresponding extent item exists, increase the ref, else insert an extent
2532 * Returns error bits after repair.
2534 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2535 struct btrfs_root *root,
2536 struct extent_buffer *node,
2537 struct node_refs *nrefs, int level, int err)
2539 struct btrfs_fs_info *fs_info = root->fs_info;
2540 struct btrfs_root *extent_root = fs_info->extent_root;
2541 struct btrfs_path path;
2542 struct btrfs_extent_item *ei;
2543 struct btrfs_tree_block_info *bi;
2544 struct btrfs_key key;
2545 struct extent_buffer *eb;
2546 u32 size = sizeof(*ei);
2547 u32 node_size = root->fs_info->nodesize;
2548 int insert_extent = 0;
2549 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2550 int root_level = btrfs_header_level(root->node);
2555 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2558 if ((err & BACKREF_MISSING) == 0)
2561 WARN_ON(level > BTRFS_MAX_LEVEL);
2564 btrfs_init_path(&path);
2565 bytenr = btrfs_header_bytenr(node);
2566 owner = btrfs_header_owner(node);
2567 generation = btrfs_header_generation(node);
2569 key.objectid = bytenr;
2571 key.offset = (u64)-1;
2573 /* Search for the extent item */
2574 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2580 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2584 /* calculate if the extent item flag is full backref or not */
2585 if (nrefs->full_backref[level] != 0)
2586 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2588 /* insert an extent item */
2589 if (insert_extent) {
2590 struct btrfs_disk_key copy_key;
2592 generation = btrfs_header_generation(node);
2594 if (level < root_level && nrefs->full_backref[level + 1] &&
2595 owner != root->objectid) {
2596 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2599 key.objectid = bytenr;
2600 if (!skinny_metadata) {
2601 key.type = BTRFS_EXTENT_ITEM_KEY;
2602 key.offset = node_size;
2603 size += sizeof(*bi);
2605 key.type = BTRFS_METADATA_ITEM_KEY;
2609 btrfs_release_path(&path);
2610 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2616 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2618 btrfs_set_extent_refs(eb, ei, 0);
2619 btrfs_set_extent_generation(eb, ei, generation);
2620 btrfs_set_extent_flags(eb, ei, flags);
2622 if (!skinny_metadata) {
2623 bi = (struct btrfs_tree_block_info *)(ei + 1);
2624 memset_extent_buffer(eb, 0, (unsigned long)bi,
2626 btrfs_set_disk_key_objectid(©_key, root->objectid);
2627 btrfs_set_disk_key_type(©_key, 0);
2628 btrfs_set_disk_key_offset(©_key, 0);
2630 btrfs_set_tree_block_level(eb, bi, level);
2631 btrfs_set_tree_block_key(eb, bi, ©_key);
2633 btrfs_mark_buffer_dirty(eb);
2634 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2635 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2637 nrefs->refs[level] = 0;
2638 nrefs->full_backref[level] =
2639 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640 btrfs_release_path(&path);
2643 if (level < root_level && nrefs->full_backref[level + 1] &&
2644 owner != root->objectid)
2645 parent = nrefs->bytenr[level + 1];
2647 /* increase the ref */
2648 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649 parent, root->objectid, level, 0);
2651 nrefs->refs[level]++;
2653 btrfs_release_path(&path);
2656 "failed to repair tree block ref start %llu root %llu due to %s",
2657 bytenr, root->objectid, strerror(-ret));
2659 printf("Added one tree block ref start %llu %s %llu\n",
2660 bytenr, parent ? "parent" : "root",
2661 parent ? parent : root->objectid);
2662 err &= ~BACKREF_MISSING;
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669 unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671 struct extent_buffer *eb, u64 bytenr,
2672 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674 struct btrfs_root *root, struct btrfs_path *path,
2675 struct node_refs *nrefs, int account_bytes);
2678 * @trans just for lowmem repair mode
2679 * @check all if not 0 then check all tree block backrefs and items
2680 * 0 then just check relationship of items in fs tree(s)
2682 * Returns >0 Found error, should continue
2683 * Returns <0 Fatal error, must exit the whole check
2684 * Returns 0 No errors found
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687 struct btrfs_root *root, struct btrfs_path *path,
2688 int *level, struct node_refs *nrefs, int ext_ref,
2692 enum btrfs_tree_block_status status;
2695 struct btrfs_fs_info *fs_info = root->fs_info;
2696 struct extent_buffer *next;
2697 struct extent_buffer *cur;
2701 int account_file_data = 0;
2703 WARN_ON(*level < 0);
2704 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2706 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707 path->nodes[*level], nrefs, *level, check_all);
2711 while (*level >= 0) {
2712 WARN_ON(*level < 0);
2713 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714 cur = path->nodes[*level];
2715 bytenr = btrfs_header_bytenr(cur);
2716 check = nrefs->need_check[*level];
2718 if (btrfs_header_level(cur) != *level)
2721 * Update bytes accounting and check tree block ref
2722 * NOTE: Doing accounting and check before checking nritems
2723 * is necessary because of empty node/leaf.
2725 if ((check_all && !nrefs->checked[*level]) ||
2726 (!check_all && nrefs->need_check[*level])) {
2727 ret = check_tree_block_ref(root, cur,
2728 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729 btrfs_header_owner(cur), nrefs);
2732 ret = repair_tree_block_ref(trans, root,
2733 path->nodes[*level], nrefs, *level, ret);
2736 if (check_all && nrefs->need_check[*level] &&
2737 nrefs->refs[*level]) {
2738 account_bytes(root, path, *level);
2739 account_file_data = 1;
2741 nrefs->checked[*level] = 1;
2744 if (path->slots[*level] >= btrfs_header_nritems(cur))
2747 /* Don't forgot to check leaf/node validation */
2749 /* skip duplicate check */
2750 if (check || !check_all) {
2751 ret = btrfs_check_leaf(root, NULL, cur);
2752 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2760 ret = process_one_leaf_v2(root, path, nrefs,
2763 ret = check_leaf_items(trans, root, path,
2764 nrefs, account_file_data);
2768 if (check || !check_all) {
2769 ret = btrfs_check_node(root, NULL, cur);
2770 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2777 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2780 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2785 * check all trees in check_chunks_and_extent_v2
2786 * check shared node once in check_fs_roots
2788 if (!check_all && !nrefs->need_check[*level - 1]) {
2789 path->slots[*level]++;
2793 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795 free_extent_buffer(next);
2796 reada_walk_down(root, cur, path->slots[*level]);
2797 next = read_tree_block(fs_info, bytenr, ptr_gen);
2798 if (!extent_buffer_uptodate(next)) {
2799 struct btrfs_key node_key;
2801 btrfs_node_key_to_cpu(path->nodes[*level],
2803 path->slots[*level]);
2804 btrfs_add_corrupt_extent_record(fs_info,
2805 &node_key, path->nodes[*level]->start,
2806 fs_info->nodesize, *level);
2812 ret = check_child_node(cur, path->slots[*level], next);
2817 if (btrfs_is_leaf(next))
2818 status = btrfs_check_leaf(root, NULL, next);
2820 status = btrfs_check_node(root, NULL, next);
2821 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822 free_extent_buffer(next);
2827 *level = *level - 1;
2828 free_extent_buffer(path->nodes[*level]);
2829 path->nodes[*level] = next;
2830 path->slots[*level] = 0;
2831 account_file_data = 0;
2833 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839 struct walk_control *wc, int *level)
2842 struct extent_buffer *leaf;
2844 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845 leaf = path->nodes[i];
2846 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2851 free_extent_buffer(path->nodes[*level]);
2852 path->nodes[*level] = NULL;
2853 BUG_ON(*level > wc->active_node);
2854 if (*level == wc->active_node)
2855 leave_shared_node(root, wc, *level);
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2866 struct extent_buffer *leaf;
2868 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869 leaf = path->nodes[i];
2870 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2875 free_extent_buffer(path->nodes[*level]);
2876 path->nodes[*level] = NULL;
2883 static int check_root_dir(struct inode_record *rec)
2885 struct inode_backref *backref;
2888 if (!rec->found_inode_item || rec->errors)
2890 if (rec->nlink != 1 || rec->found_link != 0)
2892 if (list_empty(&rec->backrefs))
2894 backref = to_inode_backref(rec->backrefs.next);
2895 if (!backref->found_inode_ref)
2897 if (backref->index != 0 || backref->namelen != 2 ||
2898 memcmp(backref->name, "..", 2))
2900 if (backref->found_dir_index || backref->found_dir_item)
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root, struct btrfs_path *path,
2909 struct inode_record *rec)
2911 struct btrfs_inode_item *ei;
2912 struct btrfs_key key;
2915 key.objectid = rec->ino;
2916 key.type = BTRFS_INODE_ITEM_KEY;
2917 key.offset = (u64)-1;
2919 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2923 if (!path->slots[0]) {
2930 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931 if (key.objectid != rec->ino) {
2936 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937 struct btrfs_inode_item);
2938 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939 btrfs_mark_buffer_dirty(path->nodes[0]);
2940 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942 root->root_key.objectid);
2944 btrfs_release_path(path);
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949 struct btrfs_root *root,
2950 struct btrfs_path *path,
2951 struct inode_record *rec)
2955 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956 btrfs_release_path(path);
2958 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963 struct btrfs_root *root,
2964 struct btrfs_path *path,
2965 struct inode_record *rec)
2967 struct btrfs_inode_item *ei;
2968 struct btrfs_key key;
2971 key.objectid = rec->ino;
2972 key.type = BTRFS_INODE_ITEM_KEY;
2975 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2982 /* Since ret == 0, no need to check anything */
2983 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984 struct btrfs_inode_item);
2985 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986 btrfs_mark_buffer_dirty(path->nodes[0]);
2987 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988 printf("reset nbytes for ino %llu root %llu\n",
2989 rec->ino, root->root_key.objectid);
2991 btrfs_release_path(path);
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996 struct cache_tree *inode_cache,
2997 struct inode_record *rec,
2998 struct inode_backref *backref)
3000 struct btrfs_path path;
3001 struct btrfs_trans_handle *trans;
3002 struct btrfs_dir_item *dir_item;
3003 struct extent_buffer *leaf;
3004 struct btrfs_key key;
3005 struct btrfs_disk_key disk_key;
3006 struct inode_record *dir_rec;
3007 unsigned long name_ptr;
3008 u32 data_size = sizeof(*dir_item) + backref->namelen;
3011 trans = btrfs_start_transaction(root, 1);
3013 return PTR_ERR(trans);
3015 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016 (unsigned long long)rec->ino);
3018 btrfs_init_path(&path);
3019 key.objectid = backref->dir;
3020 key.type = BTRFS_DIR_INDEX_KEY;
3021 key.offset = backref->index;
3022 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3025 leaf = path.nodes[0];
3026 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3028 disk_key.objectid = cpu_to_le64(rec->ino);
3029 disk_key.type = BTRFS_INODE_ITEM_KEY;
3030 disk_key.offset = 0;
3032 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034 btrfs_set_dir_data_len(leaf, dir_item, 0);
3035 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036 name_ptr = (unsigned long)(dir_item + 1);
3037 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038 btrfs_mark_buffer_dirty(leaf);
3039 btrfs_release_path(&path);
3040 btrfs_commit_transaction(trans, root);
3042 backref->found_dir_index = 1;
3043 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044 BUG_ON(IS_ERR(dir_rec));
3047 dir_rec->found_size += backref->namelen;
3048 if (dir_rec->found_size == dir_rec->isize &&
3049 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051 if (dir_rec->found_size != dir_rec->isize)
3052 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3057 static int delete_dir_index(struct btrfs_root *root,
3058 struct inode_backref *backref)
3060 struct btrfs_trans_handle *trans;
3061 struct btrfs_dir_item *di;
3062 struct btrfs_path path;
3065 trans = btrfs_start_transaction(root, 1);
3067 return PTR_ERR(trans);
3069 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070 (unsigned long long)backref->dir,
3071 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072 (unsigned long long)root->objectid);
3074 btrfs_init_path(&path);
3075 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076 backref->name, backref->namelen,
3077 backref->index, -1);
3080 btrfs_release_path(&path);
3081 btrfs_commit_transaction(trans, root);
3088 ret = btrfs_del_item(trans, root, &path);
3090 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3092 btrfs_release_path(&path);
3093 btrfs_commit_transaction(trans, root);
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098 struct btrfs_root *root, u64 ino, u64 size,
3099 u64 nbytes, u64 nlink, u32 mode)
3101 struct btrfs_inode_item ii;
3102 time_t now = time(NULL);
3105 btrfs_set_stack_inode_size(&ii, size);
3106 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107 btrfs_set_stack_inode_nlink(&ii, nlink);
3108 btrfs_set_stack_inode_mode(&ii, mode);
3109 btrfs_set_stack_inode_generation(&ii, trans->transid);
3110 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3118 ret = btrfs_insert_inode(trans, root, ino, &ii);
3121 warning("root %llu inode %llu recreating inode item, this may "
3122 "be incomplete, please check permissions and content after "
3123 "the fsck completes.\n", (unsigned long long)root->objectid,
3124 (unsigned long long)ino);
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130 struct btrfs_root *root, u64 ino,
3133 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3135 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3138 static int create_inode_item(struct btrfs_root *root,
3139 struct inode_record *rec, int root_dir)
3141 struct btrfs_trans_handle *trans;
3147 trans = btrfs_start_transaction(root, 1);
3148 if (IS_ERR(trans)) {
3149 ret = PTR_ERR(trans);
3153 nlink = root_dir ? 1 : rec->found_link;
3154 if (rec->found_dir_item) {
3155 if (rec->found_file_extent)
3156 fprintf(stderr, "root %llu inode %llu has both a dir "
3157 "item and extents, unsure if it is a dir or a "
3158 "regular file so setting it as a directory\n",
3159 (unsigned long long)root->objectid,
3160 (unsigned long long)rec->ino);
3161 mode = S_IFDIR | 0755;
3162 size = rec->found_size;
3163 } else if (!rec->found_dir_item) {
3164 size = rec->extent_end;
3165 mode = S_IFREG | 0755;
3168 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3170 btrfs_commit_transaction(trans, root);
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175 struct inode_record *rec,
3176 struct cache_tree *inode_cache,
3179 struct inode_backref *tmp, *backref;
3180 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3184 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185 if (!delete && rec->ino == root_dirid) {
3186 if (!rec->found_inode_item) {
3187 ret = create_inode_item(root, rec, 1);
3194 /* Index 0 for root dir's are special, don't mess with it */
3195 if (rec->ino == root_dirid && backref->index == 0)
3199 ((backref->found_dir_index && !backref->found_inode_ref) ||
3200 (backref->found_dir_index && backref->found_inode_ref &&
3201 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202 ret = delete_dir_index(root, backref);
3206 list_del(&backref->list);
3211 if (!delete && !backref->found_dir_index &&
3212 backref->found_dir_item && backref->found_inode_ref) {
3213 ret = add_missing_dir_index(root, inode_cache, rec,
3218 if (backref->found_dir_item &&
3219 backref->found_dir_index) {
3220 if (!backref->errors &&
3221 backref->found_inode_ref) {
3222 list_del(&backref->list);
3229 if (!delete && (!backref->found_dir_index &&
3230 !backref->found_dir_item &&
3231 backref->found_inode_ref)) {
3232 struct btrfs_trans_handle *trans;
3233 struct btrfs_key location;
3235 ret = check_dir_conflict(root, backref->name,
3241 * let nlink fixing routine to handle it,
3242 * which can do it better.
3247 location.objectid = rec->ino;
3248 location.type = BTRFS_INODE_ITEM_KEY;
3249 location.offset = 0;
3251 trans = btrfs_start_transaction(root, 1);
3252 if (IS_ERR(trans)) {
3253 ret = PTR_ERR(trans);
3256 fprintf(stderr, "adding missing dir index/item pair "
3258 (unsigned long long)rec->ino);
3259 ret = btrfs_insert_dir_item(trans, root, backref->name,
3261 backref->dir, &location,
3262 imode_to_type(rec->imode),
3265 btrfs_commit_transaction(trans, root);
3269 if (!delete && (backref->found_inode_ref &&
3270 backref->found_dir_index &&
3271 backref->found_dir_item &&
3272 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273 !rec->found_inode_item)) {
3274 ret = create_inode_item(root, rec, 0);
3281 return ret ? ret : repaired;
3285 * To determine the file type for nlink/inode_item repair
3287 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288 * Return -ENOENT if file type is not found.
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3292 struct inode_backref *backref;
3294 /* For inode item recovered case */
3295 if (rec->found_inode_item) {
3296 *type = imode_to_type(rec->imode);
3300 list_for_each_entry(backref, &rec->backrefs, list) {
3301 if (backref->found_dir_index || backref->found_dir_item) {
3302 *type = backref->filetype;
3310 * To determine the file name for nlink repair
3312 * Return 0 if file name is found, set name and namelen.
3313 * Return -ENOENT if file name is not found.
3315 static int find_file_name(struct inode_record *rec,
3316 char *name, int *namelen)
3318 struct inode_backref *backref;
3320 list_for_each_entry(backref, &rec->backrefs, list) {
3321 if (backref->found_dir_index || backref->found_dir_item ||
3322 backref->found_inode_ref) {
3323 memcpy(name, backref->name, backref->namelen);
3324 *namelen = backref->namelen;
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root,
3334 struct btrfs_path *path,
3335 struct inode_record *rec)
3337 struct inode_backref *backref;
3338 struct inode_backref *tmp;
3339 struct btrfs_key key;
3340 struct btrfs_inode_item *inode_item;
3343 /* We don't believe this either, reset it and iterate backref */
3344 rec->found_link = 0;
3346 /* Remove all backref including the valid ones */
3347 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349 backref->index, backref->name,
3350 backref->namelen, 0);
3354 /* remove invalid backref, so it won't be added back */
3355 if (!(backref->found_dir_index &&
3356 backref->found_dir_item &&
3357 backref->found_inode_ref)) {
3358 list_del(&backref->list);
3365 /* Set nlink to 0 */
3366 key.objectid = rec->ino;
3367 key.type = BTRFS_INODE_ITEM_KEY;
3369 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3376 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377 struct btrfs_inode_item);
3378 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379 btrfs_mark_buffer_dirty(path->nodes[0]);
3380 btrfs_release_path(path);
3383 * Add back valid inode_ref/dir_item/dir_index,
3384 * add_link() will handle the nlink inc, so new nlink must be correct
3386 list_for_each_entry(backref, &rec->backrefs, list) {
3387 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388 backref->name, backref->namelen,
3389 backref->filetype, &backref->index, 1, 0);
3394 btrfs_release_path(path);
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399 struct btrfs_root *root,
3400 struct btrfs_path *path,
3403 struct btrfs_key key, found_key;
3406 btrfs_init_path(path);
3407 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3409 key.type = BTRFS_INODE_ITEM_KEY;
3410 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3412 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413 path->slots[0] - 1);
3414 *highest_ino = found_key.objectid;
3417 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3419 btrfs_release_path(path);
3424 * Link inode to dir 'lost+found'. Increase @ref_count.
3426 * Returns 0 means success.
3427 * Returns <0 means failure.
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430 struct btrfs_root *root,
3431 struct btrfs_path *path,
3432 u64 ino, char *namebuf, u32 name_len,
3433 u8 filetype, u64 *ref_count)
3435 char *dir_name = "lost+found";
3440 btrfs_release_path(path);
3441 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3446 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3450 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3453 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454 namebuf, name_len, filetype, NULL, 1, 0);
3456 * Add ".INO" suffix several times to handle case where
3457 * "FILENAME.INO" is already taken by another file.
3459 while (ret == -EEXIST) {
3461 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3463 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3467 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3469 name_len += count_digits(ino) + 1;
3470 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471 name_len, filetype, NULL, 1, 0);
3474 error("failed to link the inode %llu to %s dir: %s",
3475 ino, dir_name, strerror(-ret));
3480 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481 name_len, namebuf, dir_name);
3483 btrfs_release_path(path);
3485 error("failed to move file '%.*s' to '%s' dir", name_len,
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491 struct btrfs_root *root,
3492 struct btrfs_path *path,
3493 struct inode_record *rec)
3495 char namebuf[BTRFS_NAME_LEN] = {0};
3498 int name_recovered = 0;
3499 int type_recovered = 0;
3503 * Get file name and type first before these invalid inode ref
3504 * are deleted by remove_all_invalid_backref()
3506 name_recovered = !find_file_name(rec, namebuf, &namelen);
3507 type_recovered = !find_file_type(rec, &type);
3509 if (!name_recovered) {
3510 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511 rec->ino, rec->ino);
3512 namelen = count_digits(rec->ino);
3513 sprintf(namebuf, "%llu", rec->ino);
3516 if (!type_recovered) {
3517 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3519 type = BTRFS_FT_REG_FILE;
3523 ret = reset_nlink(trans, root, path, rec);
3526 "Failed to reset nlink for inode %llu: %s\n",
3527 rec->ino, strerror(-ret));
3531 if (rec->found_link == 0) {
3532 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533 namebuf, namelen, type,
3534 (u64 *)&rec->found_link);
3538 printf("Fixed the nlink of inode %llu\n", rec->ino);
3541 * Clear the flag anyway, or we will loop forever for the same inode
3542 * as it will not be removed from the bad inode list and the dead loop
3545 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546 btrfs_release_path(path);
3551 * Check if there is any normal(reg or prealloc) file extent for given
3553 * This is used to determine the file type when neither its dir_index/item or
3554 * inode_item exists.
3556 * This will *NOT* report error, if any error happens, just consider it does
3557 * not have any normal file extent.
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3561 struct btrfs_path path;
3562 struct btrfs_key key;
3563 struct btrfs_key found_key;
3564 struct btrfs_file_extent_item *fi;
3568 btrfs_init_path(&path);
3570 key.type = BTRFS_EXTENT_DATA_KEY;
3573 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3578 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579 ret = btrfs_next_leaf(root, &path);
3586 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3588 if (found_key.objectid != ino ||
3589 found_key.type != BTRFS_EXTENT_DATA_KEY)
3591 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592 struct btrfs_file_extent_item);
3593 type = btrfs_file_extent_type(path.nodes[0], fi);
3594 if (type != BTRFS_FILE_EXTENT_INLINE) {
3600 btrfs_release_path(&path);
3604 static u32 btrfs_type_to_imode(u8 type)
3606 static u32 imode_by_btrfs_type[] = {
3607 [BTRFS_FT_REG_FILE] = S_IFREG,
3608 [BTRFS_FT_DIR] = S_IFDIR,
3609 [BTRFS_FT_CHRDEV] = S_IFCHR,
3610 [BTRFS_FT_BLKDEV] = S_IFBLK,
3611 [BTRFS_FT_FIFO] = S_IFIFO,
3612 [BTRFS_FT_SOCK] = S_IFSOCK,
3613 [BTRFS_FT_SYMLINK] = S_IFLNK,
3616 return imode_by_btrfs_type[(type)];
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620 struct btrfs_root *root,
3621 struct btrfs_path *path,
3622 struct inode_record *rec)
3626 int type_recovered = 0;
3629 printf("Trying to rebuild inode:%llu\n", rec->ino);
3631 type_recovered = !find_file_type(rec, &filetype);
3634 * Try to determine inode type if type not found.
3636 * For found regular file extent, it must be FILE.
3637 * For found dir_item/index, it must be DIR.
3639 * For undetermined one, use FILE as fallback.
3642 * 1. If found backref(inode_index/item is already handled) to it,
3644 * Need new inode-inode ref structure to allow search for that.
3646 if (!type_recovered) {
3647 if (rec->found_file_extent &&
3648 find_normal_file_extent(root, rec->ino)) {
3650 filetype = BTRFS_FT_REG_FILE;
3651 } else if (rec->found_dir_item) {
3653 filetype = BTRFS_FT_DIR;
3654 } else if (!list_empty(&rec->orphan_extents)) {
3656 filetype = BTRFS_FT_REG_FILE;
3658 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3661 filetype = BTRFS_FT_REG_FILE;
3665 ret = btrfs_new_inode(trans, root, rec->ino,
3666 mode | btrfs_type_to_imode(filetype));
3671 * Here inode rebuild is done, we only rebuild the inode item,
3672 * don't repair the nlink(like move to lost+found).
3673 * That is the job of nlink repair.
3675 * We just fill the record and return
3677 rec->found_dir_item = 1;
3678 rec->imode = mode | btrfs_type_to_imode(filetype);
3680 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681 /* Ensure the inode_nlinks repair function will be called */
3682 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688 struct btrfs_root *root,
3689 struct btrfs_path *path,
3690 struct inode_record *rec)
3692 struct orphan_data_extent *orphan;
3693 struct orphan_data_extent *tmp;
3696 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3698 * Check for conflicting file extents
3700 * Here we don't know whether the extents is compressed or not,
3701 * so we can only assume it not compressed nor data offset,
3702 * and use its disk_len as extent length.
3704 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705 orphan->offset, orphan->disk_len, 0);
3706 btrfs_release_path(path);
3711 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712 orphan->disk_bytenr, orphan->disk_len);
3713 ret = btrfs_free_extent(trans,
3714 root->fs_info->extent_root,
3715 orphan->disk_bytenr, orphan->disk_len,
3716 0, root->objectid, orphan->objectid,
3721 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722 orphan->offset, orphan->disk_bytenr,
3723 orphan->disk_len, orphan->disk_len);
3727 /* Update file size info */
3728 rec->found_size += orphan->disk_len;
3729 if (rec->found_size == rec->nbytes)
3730 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3732 /* Update the file extent hole info too */
3733 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3737 if (RB_EMPTY_ROOT(&rec->holes))
3738 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3740 list_del(&orphan->list);
3743 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749 struct btrfs_root *root,
3750 struct btrfs_path *path,
3751 struct inode_record *rec)
3753 struct rb_node *node;
3754 struct file_extent_hole *hole;
3758 node = rb_first(&rec->holes);
3762 hole = rb_entry(node, struct file_extent_hole, node);
3763 ret = btrfs_punch_hole(trans, root, rec->ino,
3764 hole->start, hole->len);
3767 ret = del_file_extent_hole(&rec->holes, hole->start,
3771 if (RB_EMPTY_ROOT(&rec->holes))
3772 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773 node = rb_first(&rec->holes);
3775 /* special case for a file losing all its file extent */
3777 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778 round_up(rec->isize,
3779 root->fs_info->sectorsize));
3783 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784 rec->ino, root->objectid);
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3791 struct btrfs_trans_handle *trans;
3792 struct btrfs_path path;
3795 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796 I_ERR_NO_ORPHAN_ITEM |
3797 I_ERR_LINK_COUNT_WRONG |
3798 I_ERR_NO_INODE_ITEM |
3799 I_ERR_FILE_EXTENT_ORPHAN |
3800 I_ERR_FILE_EXTENT_DISCOUNT|
3801 I_ERR_FILE_NBYTES_WRONG)))
3805 * For nlink repair, it may create a dir and add link, so
3806 * 2 for parent(256)'s dir_index and dir_item
3807 * 2 for lost+found dir's inode_item and inode_ref
3808 * 1 for the new inode_ref of the file
3809 * 2 for lost+found dir's dir_index and dir_item for the file
3811 trans = btrfs_start_transaction(root, 7);
3813 return PTR_ERR(trans);
3815 btrfs_init_path(&path);
3816 if (rec->errors & I_ERR_NO_INODE_ITEM)
3817 ret = repair_inode_no_item(trans, root, &path, rec);
3818 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823 ret = repair_inode_isize(trans, root, &path, rec);
3824 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827 ret = repair_inode_nlinks(trans, root, &path, rec);
3828 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829 ret = repair_inode_nbytes(trans, root, &path, rec);
3830 btrfs_commit_transaction(trans, root);
3831 btrfs_release_path(&path);
3835 static int check_inode_recs(struct btrfs_root *root,
3836 struct cache_tree *inode_cache)
3838 struct cache_extent *cache;
3839 struct ptr_node *node;
3840 struct inode_record *rec;
3841 struct inode_backref *backref;
3846 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3848 if (btrfs_root_refs(&root->root_item) == 0) {
3849 if (!cache_tree_empty(inode_cache))
3850 fprintf(stderr, "warning line %d\n", __LINE__);
3855 * We need to repair backrefs first because we could change some of the
3856 * errors in the inode recs.
3858 * We also need to go through and delete invalid backrefs first and then
3859 * add the correct ones second. We do this because we may get EEXIST
3860 * when adding back the correct index because we hadn't yet deleted the
3863 * For example, if we were missing a dir index then the directories
3864 * isize would be wrong, so if we fixed the isize to what we thought it
3865 * would be and then fixed the backref we'd still have a invalid fs, so
3866 * we need to add back the dir index and then check to see if the isize
3871 if (stage == 3 && !err)
3874 cache = search_cache_extent(inode_cache, 0);
3875 while (repair && cache) {
3876 node = container_of(cache, struct ptr_node, cache);
3878 cache = next_cache_extent(cache);
3880 /* Need to free everything up and rescan */
3882 remove_cache_extent(inode_cache, &node->cache);
3884 free_inode_rec(rec);
3888 if (list_empty(&rec->backrefs))
3891 ret = repair_inode_backrefs(root, rec, inode_cache,
3905 rec = get_inode_rec(inode_cache, root_dirid, 0);
3906 BUG_ON(IS_ERR(rec));
3908 ret = check_root_dir(rec);
3910 fprintf(stderr, "root %llu root dir %llu error\n",
3911 (unsigned long long)root->root_key.objectid,
3912 (unsigned long long)root_dirid);
3913 print_inode_error(root, rec);
3918 struct btrfs_trans_handle *trans;
3920 trans = btrfs_start_transaction(root, 1);
3921 if (IS_ERR(trans)) {
3922 err = PTR_ERR(trans);
3927 "root %llu missing its root dir, recreating\n",
3928 (unsigned long long)root->objectid);
3930 ret = btrfs_make_root_dir(trans, root, root_dirid);
3933 btrfs_commit_transaction(trans, root);
3937 fprintf(stderr, "root %llu root dir %llu not found\n",
3938 (unsigned long long)root->root_key.objectid,
3939 (unsigned long long)root_dirid);
3943 cache = search_cache_extent(inode_cache, 0);
3946 node = container_of(cache, struct ptr_node, cache);
3948 remove_cache_extent(inode_cache, &node->cache);
3950 if (rec->ino == root_dirid ||
3951 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952 free_inode_rec(rec);
3956 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957 ret = check_orphan_item(root, rec->ino);
3959 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960 if (can_free_inode_rec(rec)) {
3961 free_inode_rec(rec);
3966 if (!rec->found_inode_item)
3967 rec->errors |= I_ERR_NO_INODE_ITEM;
3968 if (rec->found_link != rec->nlink)
3969 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3971 ret = try_repair_inode(root, rec);
3972 if (ret == 0 && can_free_inode_rec(rec)) {
3973 free_inode_rec(rec);
3979 if (!(repair && ret == 0))
3981 print_inode_error(root, rec);
3982 list_for_each_entry(backref, &rec->backrefs, list) {
3983 if (!backref->found_dir_item)
3984 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985 if (!backref->found_dir_index)
3986 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987 if (!backref->found_inode_ref)
3988 backref->errors |= REF_ERR_NO_INODE_REF;
3989 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990 " namelen %u name %s filetype %d errors %x",
3991 (unsigned long long)backref->dir,
3992 (unsigned long long)backref->index,
3993 backref->namelen, backref->name,
3994 backref->filetype, backref->errors);
3995 print_ref_error(backref->errors);
3997 free_inode_rec(rec);
3999 return (error > 0) ? -1 : 0;
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4005 struct cache_extent *cache;
4006 struct root_record *rec = NULL;
4009 cache = lookup_cache_extent(root_cache, objectid, 1);
4011 rec = container_of(cache, struct root_record, cache);
4013 rec = calloc(1, sizeof(*rec));
4015 return ERR_PTR(-ENOMEM);
4016 rec->objectid = objectid;
4017 INIT_LIST_HEAD(&rec->backrefs);
4018 rec->cache.start = objectid;
4019 rec->cache.size = 1;
4021 ret = insert_cache_extent(root_cache, &rec->cache);
4023 return ERR_PTR(-EEXIST);
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029 u64 ref_root, u64 dir, u64 index,
4030 const char *name, int namelen)
4032 struct root_backref *backref;
4034 list_for_each_entry(backref, &rec->backrefs, list) {
4035 if (backref->ref_root != ref_root || backref->dir != dir ||
4036 backref->namelen != namelen)
4038 if (memcmp(name, backref->name, namelen))
4043 backref = calloc(1, sizeof(*backref) + namelen + 1);
4046 backref->ref_root = ref_root;
4048 backref->index = index;
4049 backref->namelen = namelen;
4050 memcpy(backref->name, name, namelen);
4051 backref->name[namelen] = '\0';
4052 list_add_tail(&backref->list, &rec->backrefs);
4056 static void free_root_record(struct cache_extent *cache)
4058 struct root_record *rec;
4059 struct root_backref *backref;
4061 rec = container_of(cache, struct root_record, cache);
4062 while (!list_empty(&rec->backrefs)) {
4063 backref = to_root_backref(rec->backrefs.next);
4064 list_del(&backref->list);
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4073 static int add_root_backref(struct cache_tree *root_cache,
4074 u64 root_id, u64 ref_root, u64 dir, u64 index,
4075 const char *name, int namelen,
4076 int item_type, int errors)
4078 struct root_record *rec;
4079 struct root_backref *backref;
4081 rec = get_root_rec(root_cache, root_id);
4082 BUG_ON(IS_ERR(rec));
4083 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4086 backref->errors |= errors;
4088 if (item_type != BTRFS_DIR_ITEM_KEY) {
4089 if (backref->found_dir_index || backref->found_back_ref ||
4090 backref->found_forward_ref) {
4091 if (backref->index != index)
4092 backref->errors |= REF_ERR_INDEX_UNMATCH;
4094 backref->index = index;
4098 if (item_type == BTRFS_DIR_ITEM_KEY) {
4099 if (backref->found_forward_ref)
4101 backref->found_dir_item = 1;
4102 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103 backref->found_dir_index = 1;
4104 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105 if (backref->found_forward_ref)
4106 backref->errors |= REF_ERR_DUP_ROOT_REF;
4107 else if (backref->found_dir_item)
4109 backref->found_forward_ref = 1;
4110 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111 if (backref->found_back_ref)
4112 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113 backref->found_back_ref = 1;
4118 if (backref->found_forward_ref && backref->found_dir_item)
4119 backref->reachable = 1;
4123 static int merge_root_recs(struct btrfs_root *root,
4124 struct cache_tree *src_cache,
4125 struct cache_tree *dst_cache)
4127 struct cache_extent *cache;
4128 struct ptr_node *node;
4129 struct inode_record *rec;
4130 struct inode_backref *backref;
4133 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134 free_inode_recs_tree(src_cache);
4139 cache = search_cache_extent(src_cache, 0);
4142 node = container_of(cache, struct ptr_node, cache);
4144 remove_cache_extent(src_cache, &node->cache);
4147 ret = is_child_root(root, root->objectid, rec->ino);
4153 list_for_each_entry(backref, &rec->backrefs, list) {
4154 BUG_ON(backref->found_inode_ref);
4155 if (backref->found_dir_item)
4156 add_root_backref(dst_cache, rec->ino,
4157 root->root_key.objectid, backref->dir,
4158 backref->index, backref->name,
4159 backref->namelen, BTRFS_DIR_ITEM_KEY,
4161 if (backref->found_dir_index)
4162 add_root_backref(dst_cache, rec->ino,
4163 root->root_key.objectid, backref->dir,
4164 backref->index, backref->name,
4165 backref->namelen, BTRFS_DIR_INDEX_KEY,
4169 free_inode_rec(rec);
4176 static int check_root_refs(struct btrfs_root *root,
4177 struct cache_tree *root_cache)
4179 struct root_record *rec;
4180 struct root_record *ref_root;
4181 struct root_backref *backref;
4182 struct cache_extent *cache;
4188 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189 BUG_ON(IS_ERR(rec));
4192 /* fixme: this can not detect circular references */
4195 cache = search_cache_extent(root_cache, 0);
4199 rec = container_of(cache, struct root_record, cache);
4200 cache = next_cache_extent(cache);
4202 if (rec->found_ref == 0)
4205 list_for_each_entry(backref, &rec->backrefs, list) {
4206 if (!backref->reachable)
4209 ref_root = get_root_rec(root_cache,
4211 BUG_ON(IS_ERR(ref_root));
4212 if (ref_root->found_ref > 0)
4215 backref->reachable = 0;
4217 if (rec->found_ref == 0)
4223 cache = search_cache_extent(root_cache, 0);
4227 rec = container_of(cache, struct root_record, cache);
4228 cache = next_cache_extent(cache);
4230 if (rec->found_ref == 0 &&
4231 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233 ret = check_orphan_item(root->fs_info->tree_root,
4239 * If we don't have a root item then we likely just have
4240 * a dir item in a snapshot for this root but no actual
4241 * ref key or anything so it's meaningless.
4243 if (!rec->found_root_item)
4246 fprintf(stderr, "fs tree %llu not referenced\n",
4247 (unsigned long long)rec->objectid);
4251 if (rec->found_ref > 0 && !rec->found_root_item)
4253 list_for_each_entry(backref, &rec->backrefs, list) {
4254 if (!backref->found_dir_item)
4255 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256 if (!backref->found_dir_index)
4257 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258 if (!backref->found_back_ref)
4259 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260 if (!backref->found_forward_ref)
4261 backref->errors |= REF_ERR_NO_ROOT_REF;
4262 if (backref->reachable && backref->errors)
4269 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270 (unsigned long long)rec->objectid, rec->found_ref,
4271 rec->found_root_item ? "" : "not found");
4273 list_for_each_entry(backref, &rec->backrefs, list) {
4274 if (!backref->reachable)
4276 if (!backref->errors && rec->found_root_item)
4278 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279 " index %llu namelen %u name %s errors %x\n",
4280 (unsigned long long)backref->ref_root,
4281 (unsigned long long)backref->dir,
4282 (unsigned long long)backref->index,
4283 backref->namelen, backref->name,
4285 print_ref_error(backref->errors);
4288 return errors > 0 ? 1 : 0;
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292 struct btrfs_key *key,
4293 struct cache_tree *root_cache)
4299 struct btrfs_root_ref *ref;
4300 char namebuf[BTRFS_NAME_LEN];
4303 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4305 dirid = btrfs_root_ref_dirid(eb, ref);
4306 index = btrfs_root_ref_sequence(eb, ref);
4307 name_len = btrfs_root_ref_name_len(eb, ref);
4309 if (name_len <= BTRFS_NAME_LEN) {
4313 len = BTRFS_NAME_LEN;
4314 error = REF_ERR_NAME_TOO_LONG;
4316 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4318 if (key->type == BTRFS_ROOT_REF_KEY) {
4319 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320 index, namebuf, len, key->type, error);
4322 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323 index, namebuf, len, key->type, error);
4328 static void free_corrupt_block(struct cache_extent *cache)
4330 struct btrfs_corrupt_block *corrupt;
4332 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4339 * Repair the btree of the given root.
4341 * The fix is to remove the node key in corrupt_blocks cache_tree.
4342 * and rebalance the tree.
4343 * After the fix, the btree should be writeable.
4345 static int repair_btree(struct btrfs_root *root,
4346 struct cache_tree *corrupt_blocks)
4348 struct btrfs_trans_handle *trans;
4349 struct btrfs_path path;
4350 struct btrfs_corrupt_block *corrupt;
4351 struct cache_extent *cache;
4352 struct btrfs_key key;
4357 if (cache_tree_empty(corrupt_blocks))
4360 trans = btrfs_start_transaction(root, 1);
4361 if (IS_ERR(trans)) {
4362 ret = PTR_ERR(trans);
4363 fprintf(stderr, "Error starting transaction: %s\n",
4367 btrfs_init_path(&path);
4368 cache = first_cache_extent(corrupt_blocks);
4370 corrupt = container_of(cache, struct btrfs_corrupt_block,
4372 level = corrupt->level;
4373 path.lowest_level = level;
4374 key.objectid = corrupt->key.objectid;
4375 key.type = corrupt->key.type;
4376 key.offset = corrupt->key.offset;
4379 * Here we don't want to do any tree balance, since it may
4380 * cause a balance with corrupted brother leaf/node,
4381 * so ins_len set to 0 here.
4382 * Balance will be done after all corrupt node/leaf is deleted.
4384 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4387 offset = btrfs_node_blockptr(path.nodes[level],
4390 /* Remove the ptr */
4391 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4395 * Remove the corresponding extent
4396 * return value is not concerned.
4398 btrfs_release_path(&path);
4399 ret = btrfs_free_extent(trans, root, offset,
4400 root->fs_info->nodesize, 0,
4401 root->root_key.objectid, level - 1, 0);
4402 cache = next_cache_extent(cache);
4405 /* Balance the btree using btrfs_search_slot() */
4406 cache = first_cache_extent(corrupt_blocks);
4408 corrupt = container_of(cache, struct btrfs_corrupt_block,
4410 memcpy(&key, &corrupt->key, sizeof(key));
4411 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4414 /* return will always >0 since it won't find the item */
4416 btrfs_release_path(&path);
4417 cache = next_cache_extent(cache);
4420 btrfs_commit_transaction(trans, root);
4421 btrfs_release_path(&path);
4425 static int check_fs_root(struct btrfs_root *root,
4426 struct cache_tree *root_cache,
4427 struct walk_control *wc)
4433 struct btrfs_path path;
4434 struct shared_node root_node;
4435 struct root_record *rec;
4436 struct btrfs_root_item *root_item = &root->root_item;
4437 struct cache_tree corrupt_blocks;
4438 struct orphan_data_extent *orphan;
4439 struct orphan_data_extent *tmp;
4440 enum btrfs_tree_block_status status;
4441 struct node_refs nrefs;
4444 * Reuse the corrupt_block cache tree to record corrupted tree block
4446 * Unlike the usage in extent tree check, here we do it in a per
4447 * fs/subvol tree base.
4449 cache_tree_init(&corrupt_blocks);
4450 root->fs_info->corrupt_blocks = &corrupt_blocks;
4452 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453 rec = get_root_rec(root_cache, root->root_key.objectid);
4454 BUG_ON(IS_ERR(rec));
4455 if (btrfs_root_refs(root_item) > 0)
4456 rec->found_root_item = 1;
4459 btrfs_init_path(&path);
4460 memset(&root_node, 0, sizeof(root_node));
4461 cache_tree_init(&root_node.root_cache);
4462 cache_tree_init(&root_node.inode_cache);
4463 memset(&nrefs, 0, sizeof(nrefs));
4465 /* Move the orphan extent record to corresponding inode_record */
4466 list_for_each_entry_safe(orphan, tmp,
4467 &root->orphan_data_extents, list) {
4468 struct inode_record *inode;
4470 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4472 BUG_ON(IS_ERR(inode));
4473 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474 list_move(&orphan->list, &inode->orphan_extents);
4477 level = btrfs_header_level(root->node);
4478 memset(wc->nodes, 0, sizeof(wc->nodes));
4479 wc->nodes[level] = &root_node;
4480 wc->active_node = level;
4481 wc->root_level = level;
4483 /* We may not have checked the root block, lets do that now */
4484 if (btrfs_is_leaf(root->node))
4485 status = btrfs_check_leaf(root, NULL, root->node);
4487 status = btrfs_check_node(root, NULL, root->node);
4488 if (status != BTRFS_TREE_BLOCK_CLEAN)
4491 if (btrfs_root_refs(root_item) > 0 ||
4492 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493 path.nodes[level] = root->node;
4494 extent_buffer_get(root->node);
4495 path.slots[level] = 0;
4497 struct btrfs_key key;
4498 struct btrfs_disk_key found_key;
4500 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501 level = root_item->drop_level;
4502 path.lowest_level = level;
4503 if (level > btrfs_header_level(root->node) ||
4504 level >= BTRFS_MAX_LEVEL) {
4505 error("ignoring invalid drop level: %u", level);
4508 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4511 btrfs_node_key(path.nodes[level], &found_key,
4513 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514 sizeof(found_key)));
4518 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4524 wret = walk_up_tree(root, &path, wc, &level);
4531 btrfs_release_path(&path);
4533 if (!cache_tree_empty(&corrupt_blocks)) {
4534 struct cache_extent *cache;
4535 struct btrfs_corrupt_block *corrupt;
4537 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538 root->root_key.objectid);
4539 cache = first_cache_extent(&corrupt_blocks);
4541 corrupt = container_of(cache,
4542 struct btrfs_corrupt_block,
4544 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545 cache->start, corrupt->level,
4546 corrupt->key.objectid, corrupt->key.type,
4547 corrupt->key.offset);
4548 cache = next_cache_extent(cache);
4551 printf("Try to repair the btree for root %llu\n",
4552 root->root_key.objectid);
4553 ret = repair_btree(root, &corrupt_blocks);
4555 fprintf(stderr, "Failed to repair btree: %s\n",
4558 printf("Btree for root %llu is fixed\n",
4559 root->root_key.objectid);
4563 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4567 if (root_node.current) {
4568 root_node.current->checked = 1;
4569 maybe_free_inode_rec(&root_node.inode_cache,
4573 err = check_inode_recs(root, &root_node.inode_cache);
4577 free_corrupt_blocks_tree(&corrupt_blocks);
4578 root->fs_info->corrupt_blocks = NULL;
4579 free_orphan_data_extents(&root->orphan_data_extents);
4583 static int fs_root_objectid(u64 objectid)
4585 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4588 return is_fstree(objectid);
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592 struct cache_tree *root_cache)
4594 struct btrfs_path path;
4595 struct btrfs_key key;
4596 struct walk_control wc;
4597 struct extent_buffer *leaf, *tree_node;
4598 struct btrfs_root *tmp_root;
4599 struct btrfs_root *tree_root = fs_info->tree_root;
4603 if (ctx.progress_enabled) {
4604 ctx.tp = TASK_FS_ROOTS;
4605 task_start(ctx.info);
4609 * Just in case we made any changes to the extent tree that weren't
4610 * reflected into the free space cache yet.
4613 reset_cached_block_groups(fs_info);
4614 memset(&wc, 0, sizeof(wc));
4615 cache_tree_init(&wc.shared);
4616 btrfs_init_path(&path);
4621 key.type = BTRFS_ROOT_ITEM_KEY;
4622 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4627 tree_node = tree_root->node;
4629 if (tree_node != tree_root->node) {
4630 free_root_recs_tree(root_cache);
4631 btrfs_release_path(&path);
4634 leaf = path.nodes[0];
4635 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636 ret = btrfs_next_leaf(tree_root, &path);
4642 leaf = path.nodes[0];
4644 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646 fs_root_objectid(key.objectid)) {
4647 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648 tmp_root = btrfs_read_fs_root_no_cache(
4651 key.offset = (u64)-1;
4652 tmp_root = btrfs_read_fs_root(
4655 if (IS_ERR(tmp_root)) {
4659 ret = check_fs_root(tmp_root, root_cache, &wc);
4660 if (ret == -EAGAIN) {
4661 free_root_recs_tree(root_cache);
4662 btrfs_release_path(&path);
4667 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668 btrfs_free_fs_root(tmp_root);
4669 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670 key.type == BTRFS_ROOT_BACKREF_KEY) {
4671 process_root_ref(leaf, path.slots[0], &key,
4678 btrfs_release_path(&path);
4680 free_extent_cache_tree(&wc.shared);
4681 if (!cache_tree_empty(&wc.shared))
4682 fprintf(stderr, "warning line %d\n", __LINE__);
4684 task_stop(ctx.info);
4690 * Find the @index according by @ino and name.
4691 * Notice:time efficiency is O(N)
4693 * @root: the root of the fs/file tree
4694 * @index_ret: the index as return value
4695 * @namebuf: the name to match
4696 * @name_len: the length of name to match
4697 * @file_type: the file_type of INODE_ITEM to match
4699 * Returns 0 if found and *@index_ret will be modified with right value
4700 * Returns< 0 not found and *@index_ret will be (u64)-1
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703 u64 *index_ret, char *namebuf, u32 name_len,
4706 struct btrfs_path path;
4707 struct extent_buffer *node;
4708 struct btrfs_dir_item *di;
4709 struct btrfs_key key;
4710 struct btrfs_key location;
4711 char name[BTRFS_NAME_LEN] = {0};
4723 /* search from the last index */
4724 key.objectid = dirid;
4725 key.offset = (u64)-1;
4726 key.type = BTRFS_DIR_INDEX_KEY;
4728 btrfs_init_path(&path);
4729 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4734 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4737 *index_ret = (64)-1;
4740 /* Check whether inode_id/filetype/name match */
4741 node = path.nodes[0];
4742 slot = path.slots[0];
4743 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744 total = btrfs_item_size_nr(node, slot);
4745 while (cur < total) {
4747 len = btrfs_dir_name_len(node, di);
4748 data_len = btrfs_dir_data_len(node, di);
4750 btrfs_dir_item_key_to_cpu(node, di, &location);
4751 if (location.objectid != location_id ||
4752 location.type != BTRFS_INODE_ITEM_KEY ||
4753 location.offset != 0)
4756 filetype = btrfs_dir_type(node, di);
4757 if (file_type != filetype)
4760 if (len > BTRFS_NAME_LEN)
4761 len = BTRFS_NAME_LEN;
4763 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764 if (len != name_len || strncmp(namebuf, name, len))
4767 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768 *index_ret = key.offset;
4772 len += sizeof(*di) + data_len;
4773 di = (struct btrfs_dir_item *)((char *)di + len);
4779 btrfs_release_path(&path);
4784 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785 * INODE_REF/INODE_EXTREF match.
4787 * @root: the root of the fs/file tree
4788 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789 * value while find index
4790 * @location_key: location key of the struct btrfs_dir_item to match
4791 * @name: the name to match
4792 * @namelen: the length of name
4793 * @file_type: the type of file to math
4795 * Return 0 if no error occurred.
4796 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797 * DIR_ITEM/DIR_INDEX
4798 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799 * and DIR_ITEM/DIR_INDEX mismatch
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802 struct btrfs_key *location_key, char *name,
4803 u32 namelen, u8 file_type)
4805 struct btrfs_path path;
4806 struct extent_buffer *node;
4807 struct btrfs_dir_item *di;
4808 struct btrfs_key location;
4809 char namebuf[BTRFS_NAME_LEN] = {0};
4818 /* get the index by traversing all index */
4819 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820 ret = find_dir_index(root, key->objectid,
4821 location_key->objectid, &key->offset,
4822 name, namelen, file_type);
4824 ret = DIR_INDEX_MISSING;
4828 btrfs_init_path(&path);
4829 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4831 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4836 /* Check whether inode_id/filetype/name match */
4837 node = path.nodes[0];
4838 slot = path.slots[0];
4839 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840 total = btrfs_item_size_nr(node, slot);
4841 while (cur < total) {
4842 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4845 len = btrfs_dir_name_len(node, di);
4846 data_len = btrfs_dir_data_len(node, di);
4848 btrfs_dir_item_key_to_cpu(node, di, &location);
4849 if (location.objectid != location_key->objectid ||
4850 location.type != location_key->type ||
4851 location.offset != location_key->offset)
4854 filetype = btrfs_dir_type(node, di);
4855 if (file_type != filetype)
4858 if (len > BTRFS_NAME_LEN) {
4859 len = BTRFS_NAME_LEN;
4860 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4862 key->type == BTRFS_DIR_ITEM_KEY ?
4863 "DIR_ITEM" : "DIR_INDEX",
4864 key->objectid, key->offset, len);
4866 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4868 if (len != namelen || strncmp(namebuf, name, len))
4874 len += sizeof(*di) + data_len;
4875 di = (struct btrfs_dir_item *)((char *)di + len);
4880 btrfs_release_path(&path);
4885 * Prints inode ref error message
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888 u64 index, const char *namebuf, int name_len,
4889 u8 filetype, int err)
4894 /* root dir error */
4895 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4897 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898 root->objectid, key->objectid, key->offset, namebuf);
4903 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905 root->objectid, key->offset,
4906 btrfs_name_hash(namebuf, name_len),
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4909 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911 root->objectid, key->offset, index,
4912 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4917 * Insert the missing inode item.
4919 * Returns 0 means success.
4920 * Returns <0 means error.
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4925 struct btrfs_key key;
4926 struct btrfs_trans_handle *trans;
4927 struct btrfs_path path;
4931 key.type = BTRFS_INODE_ITEM_KEY;
4934 btrfs_init_path(&path);
4935 trans = btrfs_start_transaction(root, 1);
4936 if (IS_ERR(trans)) {
4941 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942 if (ret < 0 || !ret)
4945 /* insert inode item */
4946 create_inode_item_lowmem(trans, root, ino, filetype);
4949 btrfs_commit_transaction(trans, root);
4952 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953 root->objectid, ino);
4954 btrfs_release_path(&path);
4959 * The ternary means dir item, dir index and relative inode ref.
4960 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4963 * If two of three is missing or mismatched, delete the existing one.
4964 * If one of three is missing or mismatched, add the missing one.
4966 * returns 0 means success.
4967 * returns not 0 means on error;
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970 u64 index, char *name, int name_len, u8 filetype,
4973 struct btrfs_trans_handle *trans;
4978 * stage shall be one of following valild values:
4979 * 0: Fine, nothing to do.
4980 * 1: One of three is wrong, so add missing one.
4981 * 2: Two of three is wrong, so delete existed one.
4983 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4985 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4987 if (err & (INODE_REF_MISSING))
4990 /* stage must be smllarer than 3 */
4993 trans = btrfs_start_transaction(root, 1);
4995 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
5000 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001 filetype, &index, 1, 1);
5005 btrfs_commit_transaction(trans, root);
5008 error("fail to repair inode %llu name %s filetype %u",
5009 ino, name, filetype);
5011 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012 stage == 2 ? "Delete" : "Add",
5013 ino, name, filetype);
5019 * Traverse the given INODE_REF and call find_dir_item() to find related
5020 * DIR_ITEM/DIR_INDEX.
5022 * @root: the root of the fs/file tree
5023 * @ref_key: the key of the INODE_REF
5024 * @path the path provides node and slot
5025 * @refs: the count of INODE_REF
5026 * @mode: the st_mode of INODE_ITEM
5027 * @name_ret: returns with the first ref's name
5028 * @name_len_ret: len of the name_ret
5030 * Return 0 if no error occurred.
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033 struct btrfs_path *path, char *name_ret,
5034 u32 *namelen_ret, u64 *refs_ret, int mode)
5036 struct btrfs_key key;
5037 struct btrfs_key location;
5038 struct btrfs_inode_ref *ref;
5039 struct extent_buffer *node;
5040 char namebuf[BTRFS_NAME_LEN] = {0};
5050 int need_research = 0;
5058 /* since after repair, path and the dir item may be changed */
5059 if (need_research) {
5061 btrfs_release_path(path);
5062 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063 /* the item was deleted, let path point to the last checked item */
5065 if (path->slots[0] == 0)
5066 btrfs_prev_leaf(root, path);
5074 location.objectid = ref_key->objectid;
5075 location.type = BTRFS_INODE_ITEM_KEY;
5076 location.offset = 0;
5077 node = path->nodes[0];
5078 slot = path->slots[0];
5080 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082 total = btrfs_item_size_nr(node, slot);
5085 /* Update inode ref count */
5088 index = btrfs_inode_ref_index(node, ref);
5089 name_len = btrfs_inode_ref_name_len(node, ref);
5091 if (name_len <= BTRFS_NAME_LEN) {
5094 len = BTRFS_NAME_LEN;
5095 warning("root %llu INODE_REF[%llu %llu] name too long",
5096 root->objectid, ref_key->objectid, ref_key->offset);
5099 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5101 /* copy the first name found to name_ret */
5102 if (refs == 1 && name_ret) {
5103 memcpy(name_ret, namebuf, len);
5107 /* Check root dir ref */
5108 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109 if (index != 0 || len != strlen("..") ||
5110 strncmp("..", namebuf, len) ||
5111 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112 /* set err bits then repair will delete the ref */
5113 err |= DIR_INDEX_MISSING;
5114 err |= DIR_ITEM_MISSING;
5119 /* Find related DIR_INDEX */
5120 key.objectid = ref_key->offset;
5121 key.type = BTRFS_DIR_INDEX_KEY;
5123 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124 imode_to_type(mode));
5126 /* Find related dir_item */
5127 key.objectid = ref_key->offset;
5128 key.type = BTRFS_DIR_ITEM_KEY;
5129 key.offset = btrfs_name_hash(namebuf, len);
5130 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131 imode_to_type(mode));
5133 if (tmp_err && repair) {
5134 ret = repair_ternary_lowmem(root, ref_key->offset,
5135 ref_key->objectid, index, namebuf,
5136 name_len, imode_to_type(mode),
5143 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144 imode_to_type(mode), tmp_err);
5146 len = sizeof(*ref) + name_len;
5147 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5158 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159 * DIR_ITEM/DIR_INDEX.
5161 * @root: the root of the fs/file tree
5162 * @ref_key: the key of the INODE_EXTREF
5163 * @refs: the count of INODE_EXTREF
5164 * @mode: the st_mode of INODE_ITEM
5166 * Return 0 if no error occurred.
5168 static int check_inode_extref(struct btrfs_root *root,
5169 struct btrfs_key *ref_key,
5170 struct extent_buffer *node, int slot, u64 *refs,
5173 struct btrfs_key key;
5174 struct btrfs_key location;
5175 struct btrfs_inode_extref *extref;
5176 char namebuf[BTRFS_NAME_LEN] = {0};
5186 location.objectid = ref_key->objectid;
5187 location.type = BTRFS_INODE_ITEM_KEY;
5188 location.offset = 0;
5190 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191 total = btrfs_item_size_nr(node, slot);
5194 /* update inode ref count */
5196 name_len = btrfs_inode_extref_name_len(node, extref);
5197 index = btrfs_inode_extref_index(node, extref);
5198 parent = btrfs_inode_extref_parent(node, extref);
5199 if (name_len <= BTRFS_NAME_LEN) {
5202 len = BTRFS_NAME_LEN;
5203 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204 root->objectid, ref_key->objectid, ref_key->offset);
5206 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5208 /* Check root dir ref name */
5209 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211 root->objectid, ref_key->objectid, ref_key->offset,
5213 err |= ROOT_DIR_ERROR;
5216 /* find related dir_index */
5217 key.objectid = parent;
5218 key.type = BTRFS_DIR_INDEX_KEY;
5220 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223 /* find related dir_item */
5224 key.objectid = parent;
5225 key.type = BTRFS_DIR_ITEM_KEY;
5226 key.offset = btrfs_name_hash(namebuf, len);
5227 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5230 len = sizeof(*extref) + name_len;
5231 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5241 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242 * DIR_ITEM/DIR_INDEX match.
5243 * Return with @index_ret.
5245 * @root: the root of the fs/file tree
5246 * @key: the key of the INODE_REF/INODE_EXTREF
5247 * @name: the name in the INODE_REF/INODE_EXTREF
5248 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5249 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5250 * value (64)-1 means do not check index
5251 * @ext_ref: the EXTENDED_IREF feature
5253 * Return 0 if no error occurred.
5254 * Return >0 for error bitmap
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257 char *name, int namelen, u64 *index_ret,
5258 unsigned int ext_ref)
5260 struct btrfs_path path;
5261 struct btrfs_inode_ref *ref;
5262 struct btrfs_inode_extref *extref;
5263 struct extent_buffer *node;
5264 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5277 btrfs_init_path(&path);
5278 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5280 ret = INODE_REF_MISSING;
5284 node = path.nodes[0];
5285 slot = path.slots[0];
5287 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288 total = btrfs_item_size_nr(node, slot);
5290 /* Iterate all entry of INODE_REF */
5291 while (cur < total) {
5292 ret = INODE_REF_MISSING;
5294 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295 ref_index = btrfs_inode_ref_index(node, ref);
5296 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5299 if (cur + sizeof(*ref) + ref_namelen > total ||
5300 ref_namelen > BTRFS_NAME_LEN) {
5301 warning("root %llu INODE %s[%llu %llu] name too long",
5303 key->type == BTRFS_INODE_REF_KEY ?
5305 key->objectid, key->offset);
5307 if (cur + sizeof(*ref) > total)
5309 len = min_t(u32, total - cur - sizeof(*ref),
5315 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5318 if (len != namelen || strncmp(ref_namebuf, name, len))
5321 *index_ret = ref_index;
5325 len = sizeof(*ref) + ref_namelen;
5326 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5331 /* Skip if not support EXTENDED_IREF feature */
5335 btrfs_release_path(&path);
5336 btrfs_init_path(&path);
5338 dir_id = key->offset;
5339 key->type = BTRFS_INODE_EXTREF_KEY;
5340 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5342 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5344 ret = INODE_REF_MISSING;
5348 node = path.nodes[0];
5349 slot = path.slots[0];
5351 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5353 total = btrfs_item_size_nr(node, slot);
5355 /* Iterate all entry of INODE_EXTREF */
5356 while (cur < total) {
5357 ret = INODE_REF_MISSING;
5359 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360 ref_index = btrfs_inode_extref_index(node, extref);
5361 parent = btrfs_inode_extref_parent(node, extref);
5362 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5365 if (parent != dir_id)
5368 if (ref_namelen <= BTRFS_NAME_LEN) {
5371 len = BTRFS_NAME_LEN;
5372 warning("root %llu INODE %s[%llu %llu] name too long",
5374 key->type == BTRFS_INODE_REF_KEY ?
5376 key->objectid, key->offset);
5378 read_extent_buffer(node, ref_namebuf,
5379 (unsigned long)(extref + 1), len);
5381 if (len != namelen || strncmp(ref_namebuf, name, len))
5384 *index_ret = ref_index;
5389 len = sizeof(*extref) + ref_namelen;
5390 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5395 btrfs_release_path(&path);
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400 u64 ino, u64 index, const char *namebuf,
5401 int name_len, u8 filetype, int err)
5403 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405 root->objectid, key->objectid, key->offset, namebuf,
5407 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5410 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412 root->objectid, key->objectid, index, namebuf, filetype,
5413 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5416 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5418 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419 root->objectid, ino, index, namebuf, filetype,
5420 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5423 if (err & INODE_REF_MISSING)
5425 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426 root->objectid, ino, key->objectid, namebuf, filetype);
5431 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5433 * Returns error after repair
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436 u64 index, u8 filetype, char *namebuf, u32 name_len,
5441 if (err & INODE_ITEM_MISSING) {
5442 ret = repair_inode_item_missing(root, ino, filetype);
5444 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5447 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449 name_len, filetype, err);
5451 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453 err &= ~(INODE_REF_MISSING);
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5462 struct btrfs_key key;
5463 struct btrfs_path path;
5465 struct btrfs_dir_item *di;
5475 key.offset = (u64)-1;
5477 btrfs_init_path(&path);
5478 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5483 /* if found, go to spacial case */
5488 ret = btrfs_previous_item(root, &path, ino, type);
5496 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5498 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5500 while (cur < total) {
5501 len = btrfs_dir_name_len(path.nodes[0], di);
5502 if (len > BTRFS_NAME_LEN)
5503 len = BTRFS_NAME_LEN;
5506 len += btrfs_dir_data_len(path.nodes[0], di);
5508 di = (struct btrfs_dir_item *)((char *)di + len);
5514 btrfs_release_path(&path);
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5525 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5529 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5533 *size = item_size + index_size;
5537 error("failed to count root %llu INODE[%llu] root size",
5538 root->objectid, ino);
5543 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5546 * @root: the root of the fs/file tree
5547 * @key: the key of the INODE_REF/INODE_EXTREF
5549 * @size: the st_size of the INODE_ITEM
5550 * @ext_ref: the EXTENDED_IREF feature
5552 * Return 0 if no error occurred.
5553 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556 struct btrfs_path *path, u64 *size,
5557 unsigned int ext_ref)
5559 struct btrfs_dir_item *di;
5560 struct btrfs_inode_item *ii;
5561 struct btrfs_key key;
5562 struct btrfs_key location;
5563 struct extent_buffer *node;
5565 char namebuf[BTRFS_NAME_LEN] = {0};
5577 int need_research = 0;
5580 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581 * ignore index check.
5583 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584 index = di_key->offset;
5591 /* since after repair, path and the dir item may be changed */
5592 if (need_research) {
5594 err |= DIR_COUNT_AGAIN;
5595 btrfs_release_path(path);
5596 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597 /* the item was deleted, let path point the last checked item */
5599 if (path->slots[0] == 0)
5600 btrfs_prev_leaf(root, path);
5608 node = path->nodes[0];
5609 slot = path->slots[0];
5611 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612 total = btrfs_item_size_nr(node, slot);
5613 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5615 while (cur < total) {
5616 data_len = btrfs_dir_data_len(node, di);
5619 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5621 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622 di_key->objectid, di_key->offset, data_len);
5624 name_len = btrfs_dir_name_len(node, di);
5625 if (name_len <= BTRFS_NAME_LEN) {
5628 len = BTRFS_NAME_LEN;
5629 warning("root %llu %s[%llu %llu] name too long",
5631 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632 di_key->objectid, di_key->offset);
5634 (*size) += name_len;
5635 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5637 filetype = btrfs_dir_type(node, di);
5639 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640 di_key->offset != btrfs_name_hash(namebuf, len)) {
5642 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643 root->objectid, di_key->objectid, di_key->offset,
5644 namebuf, len, filetype, di_key->offset,
5645 btrfs_name_hash(namebuf, len));
5648 btrfs_dir_item_key_to_cpu(node, di, &location);
5649 /* Ignore related ROOT_ITEM check */
5650 if (location.type == BTRFS_ROOT_ITEM_KEY)
5653 btrfs_release_path(path);
5654 /* Check relative INODE_ITEM(existence/filetype) */
5655 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5657 tmp_err |= INODE_ITEM_MISSING;
5661 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662 struct btrfs_inode_item);
5663 mode = btrfs_inode_mode(path->nodes[0], ii);
5664 if (imode_to_type(mode) != filetype) {
5665 tmp_err |= INODE_ITEM_MISMATCH;
5669 /* Check relative INODE_REF/INODE_EXTREF */
5670 key.objectid = location.objectid;
5671 key.type = BTRFS_INODE_REF_KEY;
5672 key.offset = di_key->objectid;
5673 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5676 /* check relative INDEX/ITEM */
5677 key.objectid = di_key->objectid;
5678 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679 key.type = BTRFS_DIR_INDEX_KEY;
5682 key.type = BTRFS_DIR_ITEM_KEY;
5683 key.offset = btrfs_name_hash(namebuf, name_len);
5686 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687 name_len, filetype);
5688 /* find_dir_item may find index */
5689 if (key.type == BTRFS_DIR_INDEX_KEY)
5693 if (tmp_err && repair) {
5694 ret = repair_dir_item(root, di_key->objectid,
5695 location.objectid, index,
5696 imode_to_type(mode), namebuf,
5698 if (ret != tmp_err) {
5703 btrfs_release_path(path);
5704 print_dir_item_err(root, di_key, location.objectid, index,
5705 namebuf, name_len, filetype, tmp_err);
5707 len = sizeof(*di) + name_len + data_len;
5708 di = (struct btrfs_dir_item *)((char *)di + len);
5711 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713 root->objectid, di_key->objectid,
5720 btrfs_release_path(path);
5721 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5723 err |= ret > 0 ? -ENOENT : ret;
5728 * Wrapper function of btrfs_punch_hole.
5730 * Returns 0 means success.
5731 * Returns not 0 means error.
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5736 struct btrfs_trans_handle *trans;
5739 trans = btrfs_start_transaction(root, 1);
5741 return PTR_ERR(trans);
5743 ret = btrfs_punch_hole(trans, root, ino, start, len);
5745 error("failed to add hole [%llu, %llu] in inode [%llu]",
5748 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5751 btrfs_commit_transaction(trans, root);
5756 * Check file extent datasum/hole, update the size of the file extents,
5757 * check and update the last offset of the file extent.
5759 * @root: the root of fs/file tree.
5760 * @fkey: the key of the file extent.
5761 * @nodatasum: INODE_NODATASUM feature.
5762 * @size: the sum of all EXTENT_DATA items size for this inode.
5763 * @end: the offset of the last extent.
5765 * Return 0 if no error occurred.
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768 struct extent_buffer *node, int slot,
5769 unsigned int nodatasum, u64 *size, u64 *end)
5771 struct btrfs_file_extent_item *fi;
5774 u64 extent_num_bytes;
5776 u64 csum_found; /* In byte size, sectorsize aligned */
5777 u64 search_start; /* Logical range start we search for csum */
5778 u64 search_len; /* Logical range len we search for csum */
5779 unsigned int extent_type;
5780 unsigned int is_hole;
5785 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5787 /* Check inline extent */
5788 extent_type = btrfs_file_extent_type(node, fi);
5789 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790 struct btrfs_item *e = btrfs_item_nr(slot);
5791 u32 item_inline_len;
5793 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795 compressed = btrfs_file_extent_compression(node, fi);
5796 if (extent_num_bytes == 0) {
5798 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799 root->objectid, fkey->objectid, fkey->offset);
5800 err |= FILE_EXTENT_ERROR;
5802 if (!compressed && extent_num_bytes != item_inline_len) {
5804 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805 root->objectid, fkey->objectid, fkey->offset,
5806 extent_num_bytes, item_inline_len);
5807 err |= FILE_EXTENT_ERROR;
5809 *end += extent_num_bytes;
5810 *size += extent_num_bytes;
5814 /* Check extent type */
5815 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817 err |= FILE_EXTENT_ERROR;
5818 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819 root->objectid, fkey->objectid, fkey->offset);
5823 /* Check REG_EXTENT/PREALLOC_EXTENT */
5824 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827 extent_offset = btrfs_file_extent_offset(node, fi);
5828 compressed = btrfs_file_extent_compression(node, fi);
5829 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5832 * Check EXTENT_DATA csum
5834 * For plain (uncompressed) extent, we should only check the range
5835 * we're referring to, as it's possible that part of prealloc extent
5836 * has been written, and has csum:
5838 * |<--- Original large preallocated extent A ---->|
5839 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5842 * For compressed extent, we should check the whole range.
5845 search_start = disk_bytenr + extent_offset;
5846 search_len = extent_num_bytes;
5848 search_start = disk_bytenr;
5849 search_len = disk_num_bytes;
5851 ret = count_csum_range(root, search_start, search_len, &csum_found);
5852 if (csum_found > 0 && nodatasum) {
5853 err |= ODD_CSUM_ITEM;
5854 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855 root->objectid, fkey->objectid, fkey->offset);
5856 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857 !is_hole && (ret < 0 || csum_found < search_len)) {
5858 err |= CSUM_ITEM_MISSING;
5859 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860 root->objectid, fkey->objectid, fkey->offset,
5861 csum_found, search_len);
5862 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863 err |= ODD_CSUM_ITEM;
5864 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865 root->objectid, fkey->objectid, fkey->offset, csum_found);
5868 /* Check EXTENT_DATA hole */
5869 if (!no_holes && *end != fkey->offset) {
5871 ret = punch_extent_hole(root, fkey->objectid,
5872 *end, fkey->offset - *end);
5873 if (!repair || ret) {
5874 err |= FILE_EXTENT_ERROR;
5876 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5877 root->objectid, fkey->objectid, fkey->offset,
5878 fkey->objectid, *end);
5882 *end += extent_num_bytes;
5884 *size += extent_num_bytes;
5890 * Set inode item nbytes to @nbytes
5892 * Returns 0 on success
5893 * Returns != 0 on error
5895 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5896 struct btrfs_path *path,
5897 u64 ino, u64 nbytes)
5899 struct btrfs_trans_handle *trans;
5900 struct btrfs_inode_item *ii;
5901 struct btrfs_key key;
5902 struct btrfs_key research_key;
5906 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5909 key.type = BTRFS_INODE_ITEM_KEY;
5912 trans = btrfs_start_transaction(root, 1);
5913 if (IS_ERR(trans)) {
5914 ret = PTR_ERR(trans);
5919 btrfs_release_path(path);
5920 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5928 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5929 struct btrfs_inode_item);
5930 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5931 btrfs_mark_buffer_dirty(path->nodes[0]);
5933 btrfs_commit_transaction(trans, root);
5936 error("failed to set nbytes in inode %llu root %llu",
5937 ino, root->root_key.objectid);
5939 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5940 root->root_key.objectid, nbytes);
5943 btrfs_release_path(path);
5944 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5951 * Set directory inode isize to @isize.
5953 * Returns 0 on success.
5954 * Returns != 0 on error.
5956 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5957 struct btrfs_path *path,
5960 struct btrfs_trans_handle *trans;
5961 struct btrfs_inode_item *ii;
5962 struct btrfs_key key;
5963 struct btrfs_key research_key;
5967 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5970 key.type = BTRFS_INODE_ITEM_KEY;
5973 trans = btrfs_start_transaction(root, 1);
5974 if (IS_ERR(trans)) {
5975 ret = PTR_ERR(trans);
5980 btrfs_release_path(path);
5981 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5989 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5990 struct btrfs_inode_item);
5991 btrfs_set_inode_size(path->nodes[0], ii, isize);
5992 btrfs_mark_buffer_dirty(path->nodes[0]);
5994 btrfs_commit_transaction(trans, root);
5997 error("failed to set isize in inode %llu root %llu",
5998 ino, root->root_key.objectid);
6000 printf("Set isize in inode %llu root %llu to %llu\n",
6001 ino, root->root_key.objectid, isize);
6003 btrfs_release_path(path);
6004 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6011 * Wrapper function for btrfs_add_orphan_item().
6013 * Returns 0 on success.
6014 * Returns != 0 on error.
6016 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6017 struct btrfs_path *path, u64 ino)
6019 struct btrfs_trans_handle *trans;
6020 struct btrfs_key research_key;
6024 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6026 trans = btrfs_start_transaction(root, 1);
6027 if (IS_ERR(trans)) {
6028 ret = PTR_ERR(trans);
6033 btrfs_release_path(path);
6034 ret = btrfs_add_orphan_item(trans, root, path, ino);
6036 btrfs_commit_transaction(trans, root);
6039 error("failed to add inode %llu as orphan item root %llu",
6040 ino, root->root_key.objectid);
6042 printf("Added inode %llu as orphan item root %llu\n",
6043 ino, root->root_key.objectid);
6045 btrfs_release_path(path);
6046 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6052 /* Set inode_item nlink to @ref_count.
6053 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6055 * Returns 0 on success
6057 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6058 struct btrfs_path *path, u64 ino,
6059 const char *name, u32 namelen,
6060 u64 ref_count, u8 filetype, u64 *nlink)
6062 struct btrfs_trans_handle *trans;
6063 struct btrfs_inode_item *ii;
6064 struct btrfs_key key;
6065 struct btrfs_key old_key;
6066 char namebuf[BTRFS_NAME_LEN] = {0};
6072 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6074 if (name && namelen) {
6075 ASSERT(namelen <= BTRFS_NAME_LEN);
6076 memcpy(namebuf, name, namelen);
6079 sprintf(namebuf, "%llu", ino);
6080 name_len = count_digits(ino);
6081 printf("Can't find file name for inode %llu, use %s instead\n",
6085 trans = btrfs_start_transaction(root, 1);
6086 if (IS_ERR(trans)) {
6087 ret = PTR_ERR(trans);
6091 btrfs_release_path(path);
6092 /* if refs is 0, put it into lostfound */
6093 if (ref_count == 0) {
6094 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6095 name_len, filetype, &ref_count);
6100 /* reset inode_item's nlink to ref_count */
6102 key.type = BTRFS_INODE_ITEM_KEY;
6105 btrfs_release_path(path);
6106 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6112 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6113 struct btrfs_inode_item);
6114 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6115 btrfs_mark_buffer_dirty(path->nodes[0]);
6120 btrfs_commit_transaction(trans, root);
6124 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6125 root->objectid, ino, namebuf, filetype);
6127 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6128 root->objectid, ino, namebuf, filetype);
6131 btrfs_release_path(path);
6132 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6139 * Check INODE_ITEM and related ITEMs (the same inode number)
6140 * 1. check link count
6141 * 2. check inode ref/extref
6142 * 3. check dir item/index
6144 * @ext_ref: the EXTENDED_IREF feature
6146 * Return 0 if no error occurred.
6147 * Return >0 for error or hit the traversal is done(by error bitmap)
6149 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6150 unsigned int ext_ref)
6152 struct extent_buffer *node;
6153 struct btrfs_inode_item *ii;
6154 struct btrfs_key key;
6155 struct btrfs_key last_key;
6164 u64 extent_size = 0;
6166 unsigned int nodatasum;
6170 char namebuf[BTRFS_NAME_LEN] = {0};
6173 node = path->nodes[0];
6174 slot = path->slots[0];
6176 btrfs_item_key_to_cpu(node, &key, slot);
6177 inode_id = key.objectid;
6179 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6180 ret = btrfs_next_item(root, path);
6186 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6187 isize = btrfs_inode_size(node, ii);
6188 nbytes = btrfs_inode_nbytes(node, ii);
6189 mode = btrfs_inode_mode(node, ii);
6190 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6191 nlink = btrfs_inode_nlink(node, ii);
6192 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6195 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6196 ret = btrfs_next_item(root, path);
6198 /* out will fill 'err' rusing current statistics */
6200 } else if (ret > 0) {
6205 node = path->nodes[0];
6206 slot = path->slots[0];
6207 btrfs_item_key_to_cpu(node, &key, slot);
6208 if (key.objectid != inode_id)
6212 case BTRFS_INODE_REF_KEY:
6213 ret = check_inode_ref(root, &key, path, namebuf,
6214 &name_len, &refs, mode);
6217 case BTRFS_INODE_EXTREF_KEY:
6218 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6219 warning("root %llu EXTREF[%llu %llu] isn't supported",
6220 root->objectid, key.objectid,
6222 ret = check_inode_extref(root, &key, node, slot, &refs,
6226 case BTRFS_DIR_ITEM_KEY:
6227 case BTRFS_DIR_INDEX_KEY:
6229 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6230 root->objectid, inode_id,
6231 imode_to_type(mode), key.objectid,
6234 ret = check_dir_item(root, &key, path, &size, ext_ref);
6237 case BTRFS_EXTENT_DATA_KEY:
6239 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6240 root->objectid, inode_id, key.objectid,
6243 ret = check_file_extent(root, &key, node, slot,
6244 nodatasum, &extent_size,
6248 case BTRFS_XATTR_ITEM_KEY:
6251 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6252 key.objectid, key.type, key.offset);
6257 if (err & LAST_ITEM) {
6258 btrfs_release_path(path);
6259 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6264 /* verify INODE_ITEM nlink/isize/nbytes */
6266 if (repair && (err & DIR_COUNT_AGAIN)) {
6267 err &= ~DIR_COUNT_AGAIN;
6268 count_dir_isize(root, inode_id, &size);
6271 if ((nlink != 1 || refs != 1) && repair) {
6272 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6273 namebuf, name_len, refs, imode_to_type(mode),
6278 err |= LINK_COUNT_ERROR;
6279 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6280 root->objectid, inode_id, nlink);
6284 * Just a warning, as dir inode nbytes is just an
6285 * instructive value.
6287 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6288 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6289 root->objectid, inode_id,
6290 root->fs_info->nodesize);
6293 if (isize != size) {
6295 ret = repair_dir_isize_lowmem(root, path,
6297 if (!repair || ret) {
6300 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6301 root->objectid, inode_id, isize, size);
6305 if (nlink != refs) {
6307 ret = repair_inode_nlinks_lowmem(root, path,
6308 inode_id, namebuf, name_len, refs,
6309 imode_to_type(mode), &nlink);
6310 if (!repair || ret) {
6311 err |= LINK_COUNT_ERROR;
6313 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6314 root->objectid, inode_id, nlink, refs);
6316 } else if (!nlink) {
6318 ret = repair_inode_orphan_item_lowmem(root,
6320 if (!repair || ret) {
6322 error("root %llu INODE[%llu] is orphan item",
6323 root->objectid, inode_id);
6327 if (!nbytes && !no_holes && extent_end < isize) {
6329 ret = punch_extent_hole(root, inode_id,
6330 extent_end, isize - extent_end);
6331 if (!repair || ret) {
6332 err |= NBYTES_ERROR;
6334 "root %llu INODE[%llu] size %llu should have a file extent hole",
6335 root->objectid, inode_id, isize);
6339 if (nbytes != extent_size) {
6341 ret = repair_inode_nbytes_lowmem(root, path,
6342 inode_id, extent_size);
6343 if (!repair || ret) {
6344 err |= NBYTES_ERROR;
6346 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6347 root->objectid, inode_id, nbytes,
6353 if (err & LAST_ITEM)
6354 btrfs_next_item(root, path);
6359 * Insert the missing inode item and inode ref.
6361 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6362 * Root dir should be handled specially because root dir is the root of fs.
6364 * returns err (>0 or 0) after repair
6366 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6368 struct btrfs_trans_handle *trans;
6369 struct btrfs_key key;
6370 struct btrfs_path path;
6371 int filetype = BTRFS_FT_DIR;
6374 btrfs_init_path(&path);
6376 if (err & INODE_REF_MISSING) {
6377 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6378 key.type = BTRFS_INODE_REF_KEY;
6379 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6381 trans = btrfs_start_transaction(root, 1);
6382 if (IS_ERR(trans)) {
6383 ret = PTR_ERR(trans);
6387 btrfs_release_path(&path);
6388 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6392 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6393 BTRFS_FIRST_FREE_OBJECTID,
6394 BTRFS_FIRST_FREE_OBJECTID, 0);
6398 printf("Add INODE_REF[%llu %llu] name %s\n",
6399 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6401 err &= ~INODE_REF_MISSING;
6404 error("fail to insert first inode's ref");
6405 btrfs_commit_transaction(trans, root);
6408 if (err & INODE_ITEM_MISSING) {
6409 ret = repair_inode_item_missing(root,
6410 BTRFS_FIRST_FREE_OBJECTID, filetype);
6413 err &= ~INODE_ITEM_MISSING;
6417 error("fail to repair first inode");
6418 btrfs_release_path(&path);
6423 * check first root dir's inode_item and inode_ref
6425 * returns 0 means no error
6426 * returns >0 means error
6427 * returns <0 means fatal error
6429 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6431 struct btrfs_path path;
6432 struct btrfs_key key;
6433 struct btrfs_inode_item *ii;
6439 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6440 key.type = BTRFS_INODE_ITEM_KEY;
6443 /* For root being dropped, we don't need to check first inode */
6444 if (btrfs_root_refs(&root->root_item) == 0 &&
6445 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6446 BTRFS_FIRST_FREE_OBJECTID)
6449 btrfs_init_path(&path);
6450 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6455 err |= INODE_ITEM_MISSING;
6457 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6458 struct btrfs_inode_item);
6459 mode = btrfs_inode_mode(path.nodes[0], ii);
6460 if (imode_to_type(mode) != BTRFS_FT_DIR)
6461 err |= INODE_ITEM_MISMATCH;
6464 /* lookup first inode ref */
6465 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6466 key.type = BTRFS_INODE_REF_KEY;
6467 /* special index value */
6470 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6476 btrfs_release_path(&path);
6479 err = repair_fs_first_inode(root, err);
6481 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6482 error("root dir INODE_ITEM is %s",
6483 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6484 if (err & INODE_REF_MISSING)
6485 error("root dir INODE_REF is missing");
6487 return ret < 0 ? ret : err;
6490 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6491 u64 parent, u64 root)
6493 struct rb_node *node;
6494 struct tree_backref *back = NULL;
6495 struct tree_backref match = {
6502 match.parent = parent;
6503 match.node.full_backref = 1;
6508 node = rb_search(&rec->backref_tree, &match.node.node,
6509 (rb_compare_keys)compare_extent_backref, NULL);
6511 back = to_tree_backref(rb_node_to_extent_backref(node));
6516 static struct data_backref *find_data_backref(struct extent_record *rec,
6517 u64 parent, u64 root,
6518 u64 owner, u64 offset,
6520 u64 disk_bytenr, u64 bytes)
6522 struct rb_node *node;
6523 struct data_backref *back = NULL;
6524 struct data_backref match = {
6531 .found_ref = found_ref,
6532 .disk_bytenr = disk_bytenr,
6536 match.parent = parent;
6537 match.node.full_backref = 1;
6542 node = rb_search(&rec->backref_tree, &match.node.node,
6543 (rb_compare_keys)compare_extent_backref, NULL);
6545 back = to_data_backref(rb_node_to_extent_backref(node));
6550 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6551 * blocks and integrity of fs tree items.
6553 * @root: the root of the tree to be checked.
6554 * @ext_ref feature EXTENDED_IREF is enable or not.
6555 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6556 * otherwise means check fs tree(s) items relationship and
6557 * @root MUST be a fs tree root.
6558 * Returns 0 represents OK.
6559 * Returns not 0 represents error.
6561 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6562 struct btrfs_root *root, unsigned int ext_ref,
6566 struct btrfs_path path;
6567 struct node_refs nrefs;
6568 struct btrfs_root_item *root_item = &root->root_item;
6573 memset(&nrefs, 0, sizeof(nrefs));
6576 * We need to manually check the first inode item (256)
6577 * As the following traversal function will only start from
6578 * the first inode item in the leaf, if inode item (256) is
6579 * missing we will skip it forever.
6581 ret = check_fs_first_inode(root, ext_ref);
6587 level = btrfs_header_level(root->node);
6588 btrfs_init_path(&path);
6590 if (btrfs_root_refs(root_item) > 0 ||
6591 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6592 path.nodes[level] = root->node;
6593 path.slots[level] = 0;
6594 extent_buffer_get(root->node);
6596 struct btrfs_key key;
6598 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6599 level = root_item->drop_level;
6600 path.lowest_level = level;
6601 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6608 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6609 ext_ref, check_all);
6613 /* if ret is negative, walk shall stop */
6619 ret = walk_up_tree_v2(root, &path, &level);
6621 /* Normal exit, reset ret to err */
6628 btrfs_release_path(&path);
6633 * Iterate all items in the tree and call check_inode_item() to check.
6635 * @root: the root of the tree to be checked.
6636 * @ext_ref: the EXTENDED_IREF feature
6638 * Return 0 if no error found.
6639 * Return <0 for error.
6641 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6643 reset_cached_block_groups(root->fs_info);
6644 return check_btrfs_root(NULL, root, ext_ref, 0);
6648 * Find the relative ref for root_ref and root_backref.
6650 * @root: the root of the root tree.
6651 * @ref_key: the key of the root ref.
6653 * Return 0 if no error occurred.
6655 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6656 struct extent_buffer *node, int slot)
6658 struct btrfs_path path;
6659 struct btrfs_key key;
6660 struct btrfs_root_ref *ref;
6661 struct btrfs_root_ref *backref;
6662 char ref_name[BTRFS_NAME_LEN] = {0};
6663 char backref_name[BTRFS_NAME_LEN] = {0};
6669 u32 backref_namelen;
6674 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6675 ref_dirid = btrfs_root_ref_dirid(node, ref);
6676 ref_seq = btrfs_root_ref_sequence(node, ref);
6677 ref_namelen = btrfs_root_ref_name_len(node, ref);
6679 if (ref_namelen <= BTRFS_NAME_LEN) {
6682 len = BTRFS_NAME_LEN;
6683 warning("%s[%llu %llu] ref_name too long",
6684 ref_key->type == BTRFS_ROOT_REF_KEY ?
6685 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6688 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6690 /* Find relative root_ref */
6691 key.objectid = ref_key->offset;
6692 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6693 key.offset = ref_key->objectid;
6695 btrfs_init_path(&path);
6696 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6698 err |= ROOT_REF_MISSING;
6699 error("%s[%llu %llu] couldn't find relative ref",
6700 ref_key->type == BTRFS_ROOT_REF_KEY ?
6701 "ROOT_REF" : "ROOT_BACKREF",
6702 ref_key->objectid, ref_key->offset);
6706 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6707 struct btrfs_root_ref);
6708 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6709 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6710 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6712 if (backref_namelen <= BTRFS_NAME_LEN) {
6713 len = backref_namelen;
6715 len = BTRFS_NAME_LEN;
6716 warning("%s[%llu %llu] ref_name too long",
6717 key.type == BTRFS_ROOT_REF_KEY ?
6718 "ROOT_REF" : "ROOT_BACKREF",
6719 key.objectid, key.offset);
6721 read_extent_buffer(path.nodes[0], backref_name,
6722 (unsigned long)(backref + 1), len);
6724 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6725 ref_namelen != backref_namelen ||
6726 strncmp(ref_name, backref_name, len)) {
6727 err |= ROOT_REF_MISMATCH;
6728 error("%s[%llu %llu] mismatch relative ref",
6729 ref_key->type == BTRFS_ROOT_REF_KEY ?
6730 "ROOT_REF" : "ROOT_BACKREF",
6731 ref_key->objectid, ref_key->offset);
6734 btrfs_release_path(&path);
6739 * Check all fs/file tree in low_memory mode.
6741 * 1. for fs tree root item, call check_fs_root_v2()
6742 * 2. for fs tree root ref/backref, call check_root_ref()
6744 * Return 0 if no error occurred.
6746 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6748 struct btrfs_root *tree_root = fs_info->tree_root;
6749 struct btrfs_root *cur_root = NULL;
6750 struct btrfs_path path;
6751 struct btrfs_key key;
6752 struct extent_buffer *node;
6753 unsigned int ext_ref;
6758 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6760 btrfs_init_path(&path);
6761 key.objectid = BTRFS_FS_TREE_OBJECTID;
6763 key.type = BTRFS_ROOT_ITEM_KEY;
6765 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6769 } else if (ret > 0) {
6775 node = path.nodes[0];
6776 slot = path.slots[0];
6777 btrfs_item_key_to_cpu(node, &key, slot);
6778 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6780 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6781 fs_root_objectid(key.objectid)) {
6782 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6783 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6786 key.offset = (u64)-1;
6787 cur_root = btrfs_read_fs_root(fs_info, &key);
6790 if (IS_ERR(cur_root)) {
6791 error("Fail to read fs/subvol tree: %lld",
6797 ret = check_fs_root_v2(cur_root, ext_ref);
6800 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6801 btrfs_free_fs_root(cur_root);
6802 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6803 key.type == BTRFS_ROOT_BACKREF_KEY) {
6804 ret = check_root_ref(tree_root, &key, node, slot);
6808 ret = btrfs_next_item(tree_root, &path);
6818 btrfs_release_path(&path);
6822 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6823 struct cache_tree *root_cache)
6827 if (!ctx.progress_enabled)
6828 fprintf(stderr, "checking fs roots\n");
6829 if (check_mode == CHECK_MODE_LOWMEM)
6830 ret = check_fs_roots_v2(fs_info);
6832 ret = check_fs_roots(fs_info, root_cache);
6837 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6839 struct extent_backref *back, *tmp;
6840 struct tree_backref *tback;
6841 struct data_backref *dback;
6845 rbtree_postorder_for_each_entry_safe(back, tmp,
6846 &rec->backref_tree, node) {
6847 if (!back->found_extent_tree) {
6851 if (back->is_data) {
6852 dback = to_data_backref(back);
6853 fprintf(stderr, "Data backref %llu %s %llu"
6854 " owner %llu offset %llu num_refs %lu"
6855 " not found in extent tree\n",
6856 (unsigned long long)rec->start,
6857 back->full_backref ?
6859 back->full_backref ?
6860 (unsigned long long)dback->parent:
6861 (unsigned long long)dback->root,
6862 (unsigned long long)dback->owner,
6863 (unsigned long long)dback->offset,
6864 (unsigned long)dback->num_refs);
6866 tback = to_tree_backref(back);
6867 fprintf(stderr, "Tree backref %llu parent %llu"
6868 " root %llu not found in extent tree\n",
6869 (unsigned long long)rec->start,
6870 (unsigned long long)tback->parent,
6871 (unsigned long long)tback->root);
6874 if (!back->is_data && !back->found_ref) {
6878 tback = to_tree_backref(back);
6879 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6880 (unsigned long long)rec->start,
6881 back->full_backref ? "parent" : "root",
6882 back->full_backref ?
6883 (unsigned long long)tback->parent :
6884 (unsigned long long)tback->root, back);
6886 if (back->is_data) {
6887 dback = to_data_backref(back);
6888 if (dback->found_ref != dback->num_refs) {
6892 fprintf(stderr, "Incorrect local backref count"
6893 " on %llu %s %llu owner %llu"
6894 " offset %llu found %u wanted %u back %p\n",
6895 (unsigned long long)rec->start,
6896 back->full_backref ?
6898 back->full_backref ?
6899 (unsigned long long)dback->parent:
6900 (unsigned long long)dback->root,
6901 (unsigned long long)dback->owner,
6902 (unsigned long long)dback->offset,
6903 dback->found_ref, dback->num_refs, back);
6905 if (dback->disk_bytenr != rec->start) {
6909 fprintf(stderr, "Backref disk bytenr does not"
6910 " match extent record, bytenr=%llu, "
6911 "ref bytenr=%llu\n",
6912 (unsigned long long)rec->start,
6913 (unsigned long long)dback->disk_bytenr);
6916 if (dback->bytes != rec->nr) {
6920 fprintf(stderr, "Backref bytes do not match "
6921 "extent backref, bytenr=%llu, ref "
6922 "bytes=%llu, backref bytes=%llu\n",
6923 (unsigned long long)rec->start,
6924 (unsigned long long)rec->nr,
6925 (unsigned long long)dback->bytes);
6928 if (!back->is_data) {
6931 dback = to_data_backref(back);
6932 found += dback->found_ref;
6935 if (found != rec->refs) {
6939 fprintf(stderr, "Incorrect global backref count "
6940 "on %llu found %llu wanted %llu\n",
6941 (unsigned long long)rec->start,
6942 (unsigned long long)found,
6943 (unsigned long long)rec->refs);
6949 static void __free_one_backref(struct rb_node *node)
6951 struct extent_backref *back = rb_node_to_extent_backref(node);
6956 static void free_all_extent_backrefs(struct extent_record *rec)
6958 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6961 static void free_extent_record_cache(struct cache_tree *extent_cache)
6963 struct cache_extent *cache;
6964 struct extent_record *rec;
6967 cache = first_cache_extent(extent_cache);
6970 rec = container_of(cache, struct extent_record, cache);
6971 remove_cache_extent(extent_cache, cache);
6972 free_all_extent_backrefs(rec);
6977 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6978 struct extent_record *rec)
6980 if (rec->content_checked && rec->owner_ref_checked &&
6981 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6982 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6983 !rec->bad_full_backref && !rec->crossing_stripes &&
6984 !rec->wrong_chunk_type) {
6985 remove_cache_extent(extent_cache, &rec->cache);
6986 free_all_extent_backrefs(rec);
6987 list_del_init(&rec->list);
6993 static int check_owner_ref(struct btrfs_root *root,
6994 struct extent_record *rec,
6995 struct extent_buffer *buf)
6997 struct extent_backref *node, *tmp;
6998 struct tree_backref *back;
6999 struct btrfs_root *ref_root;
7000 struct btrfs_key key;
7001 struct btrfs_path path;
7002 struct extent_buffer *parent;
7007 rbtree_postorder_for_each_entry_safe(node, tmp,
7008 &rec->backref_tree, node) {
7011 if (!node->found_ref)
7013 if (node->full_backref)
7015 back = to_tree_backref(node);
7016 if (btrfs_header_owner(buf) == back->root)
7019 BUG_ON(rec->is_root);
7021 /* try to find the block by search corresponding fs tree */
7022 key.objectid = btrfs_header_owner(buf);
7023 key.type = BTRFS_ROOT_ITEM_KEY;
7024 key.offset = (u64)-1;
7026 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7027 if (IS_ERR(ref_root))
7030 level = btrfs_header_level(buf);
7032 btrfs_item_key_to_cpu(buf, &key, 0);
7034 btrfs_node_key_to_cpu(buf, &key, 0);
7036 btrfs_init_path(&path);
7037 path.lowest_level = level + 1;
7038 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7042 parent = path.nodes[level + 1];
7043 if (parent && buf->start == btrfs_node_blockptr(parent,
7044 path.slots[level + 1]))
7047 btrfs_release_path(&path);
7048 return found ? 0 : 1;
7051 static int is_extent_tree_record(struct extent_record *rec)
7053 struct extent_backref *node, *tmp;
7054 struct tree_backref *back;
7057 rbtree_postorder_for_each_entry_safe(node, tmp,
7058 &rec->backref_tree, node) {
7061 back = to_tree_backref(node);
7062 if (node->full_backref)
7064 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7071 static int record_bad_block_io(struct btrfs_fs_info *info,
7072 struct cache_tree *extent_cache,
7075 struct extent_record *rec;
7076 struct cache_extent *cache;
7077 struct btrfs_key key;
7079 cache = lookup_cache_extent(extent_cache, start, len);
7083 rec = container_of(cache, struct extent_record, cache);
7084 if (!is_extent_tree_record(rec))
7087 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7088 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7091 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7092 struct extent_buffer *buf, int slot)
7094 if (btrfs_header_level(buf)) {
7095 struct btrfs_key_ptr ptr1, ptr2;
7097 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7098 sizeof(struct btrfs_key_ptr));
7099 read_extent_buffer(buf, &ptr2,
7100 btrfs_node_key_ptr_offset(slot + 1),
7101 sizeof(struct btrfs_key_ptr));
7102 write_extent_buffer(buf, &ptr1,
7103 btrfs_node_key_ptr_offset(slot + 1),
7104 sizeof(struct btrfs_key_ptr));
7105 write_extent_buffer(buf, &ptr2,
7106 btrfs_node_key_ptr_offset(slot),
7107 sizeof(struct btrfs_key_ptr));
7109 struct btrfs_disk_key key;
7110 btrfs_node_key(buf, &key, 0);
7111 btrfs_fixup_low_keys(root, path, &key,
7112 btrfs_header_level(buf) + 1);
7115 struct btrfs_item *item1, *item2;
7116 struct btrfs_key k1, k2;
7117 char *item1_data, *item2_data;
7118 u32 item1_offset, item2_offset, item1_size, item2_size;
7120 item1 = btrfs_item_nr(slot);
7121 item2 = btrfs_item_nr(slot + 1);
7122 btrfs_item_key_to_cpu(buf, &k1, slot);
7123 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7124 item1_offset = btrfs_item_offset(buf, item1);
7125 item2_offset = btrfs_item_offset(buf, item2);
7126 item1_size = btrfs_item_size(buf, item1);
7127 item2_size = btrfs_item_size(buf, item2);
7129 item1_data = malloc(item1_size);
7132 item2_data = malloc(item2_size);
7138 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7139 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7141 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7142 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7146 btrfs_set_item_offset(buf, item1, item2_offset);
7147 btrfs_set_item_offset(buf, item2, item1_offset);
7148 btrfs_set_item_size(buf, item1, item2_size);
7149 btrfs_set_item_size(buf, item2, item1_size);
7151 path->slots[0] = slot;
7152 btrfs_set_item_key_unsafe(root, path, &k2);
7153 path->slots[0] = slot + 1;
7154 btrfs_set_item_key_unsafe(root, path, &k1);
7159 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7161 struct extent_buffer *buf;
7162 struct btrfs_key k1, k2;
7164 int level = path->lowest_level;
7167 buf = path->nodes[level];
7168 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7170 btrfs_node_key_to_cpu(buf, &k1, i);
7171 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7173 btrfs_item_key_to_cpu(buf, &k1, i);
7174 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7176 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7178 ret = swap_values(root, path, buf, i);
7181 btrfs_mark_buffer_dirty(buf);
7187 static int delete_bogus_item(struct btrfs_root *root,
7188 struct btrfs_path *path,
7189 struct extent_buffer *buf, int slot)
7191 struct btrfs_key key;
7192 int nritems = btrfs_header_nritems(buf);
7194 btrfs_item_key_to_cpu(buf, &key, slot);
7196 /* These are all the keys we can deal with missing. */
7197 if (key.type != BTRFS_DIR_INDEX_KEY &&
7198 key.type != BTRFS_EXTENT_ITEM_KEY &&
7199 key.type != BTRFS_METADATA_ITEM_KEY &&
7200 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7201 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7204 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7205 (unsigned long long)key.objectid, key.type,
7206 (unsigned long long)key.offset, slot, buf->start);
7207 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7208 btrfs_item_nr_offset(slot + 1),
7209 sizeof(struct btrfs_item) *
7210 (nritems - slot - 1));
7211 btrfs_set_header_nritems(buf, nritems - 1);
7213 struct btrfs_disk_key disk_key;
7215 btrfs_item_key(buf, &disk_key, 0);
7216 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7218 btrfs_mark_buffer_dirty(buf);
7222 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7224 struct extent_buffer *buf;
7228 /* We should only get this for leaves */
7229 BUG_ON(path->lowest_level);
7230 buf = path->nodes[0];
7232 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7233 unsigned int shift = 0, offset;
7235 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7236 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
7237 if (btrfs_item_end_nr(buf, i) >
7238 BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
7239 ret = delete_bogus_item(root, path, buf, i);
7242 fprintf(stderr, "item is off the end of the "
7243 "leaf, can't fix\n");
7247 shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
7248 btrfs_item_end_nr(buf, i);
7249 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7250 btrfs_item_offset_nr(buf, i - 1)) {
7251 if (btrfs_item_end_nr(buf, i) >
7252 btrfs_item_offset_nr(buf, i - 1)) {
7253 ret = delete_bogus_item(root, path, buf, i);
7256 fprintf(stderr, "items overlap, can't fix\n");
7260 shift = btrfs_item_offset_nr(buf, i - 1) -
7261 btrfs_item_end_nr(buf, i);
7266 printf("Shifting item nr %d by %u bytes in block %llu\n",
7267 i, shift, (unsigned long long)buf->start);
7268 offset = btrfs_item_offset_nr(buf, i);
7269 memmove_extent_buffer(buf,
7270 btrfs_leaf_data(buf) + offset + shift,
7271 btrfs_leaf_data(buf) + offset,
7272 btrfs_item_size_nr(buf, i));
7273 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7275 btrfs_mark_buffer_dirty(buf);
7279 * We may have moved things, in which case we want to exit so we don't
7280 * write those changes out. Once we have proper abort functionality in
7281 * progs this can be changed to something nicer.
7288 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7289 * then just return -EIO.
7291 static int try_to_fix_bad_block(struct btrfs_root *root,
7292 struct extent_buffer *buf,
7293 enum btrfs_tree_block_status status)
7295 struct btrfs_trans_handle *trans;
7296 struct ulist *roots;
7297 struct ulist_node *node;
7298 struct btrfs_root *search_root;
7299 struct btrfs_path path;
7300 struct ulist_iterator iter;
7301 struct btrfs_key root_key, key;
7304 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7305 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7308 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7312 btrfs_init_path(&path);
7313 ULIST_ITER_INIT(&iter);
7314 while ((node = ulist_next(roots, &iter))) {
7315 root_key.objectid = node->val;
7316 root_key.type = BTRFS_ROOT_ITEM_KEY;
7317 root_key.offset = (u64)-1;
7319 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7326 trans = btrfs_start_transaction(search_root, 0);
7327 if (IS_ERR(trans)) {
7328 ret = PTR_ERR(trans);
7332 path.lowest_level = btrfs_header_level(buf);
7333 path.skip_check_block = 1;
7334 if (path.lowest_level)
7335 btrfs_node_key_to_cpu(buf, &key, 0);
7337 btrfs_item_key_to_cpu(buf, &key, 0);
7338 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7341 btrfs_commit_transaction(trans, search_root);
7344 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7345 ret = fix_key_order(search_root, &path);
7346 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7347 ret = fix_item_offset(search_root, &path);
7349 btrfs_commit_transaction(trans, search_root);
7352 btrfs_release_path(&path);
7353 btrfs_commit_transaction(trans, search_root);
7356 btrfs_release_path(&path);
7360 static int check_block(struct btrfs_root *root,
7361 struct cache_tree *extent_cache,
7362 struct extent_buffer *buf, u64 flags)
7364 struct extent_record *rec;
7365 struct cache_extent *cache;
7366 struct btrfs_key key;
7367 enum btrfs_tree_block_status status;
7371 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7374 rec = container_of(cache, struct extent_record, cache);
7375 rec->generation = btrfs_header_generation(buf);
7377 level = btrfs_header_level(buf);
7378 if (btrfs_header_nritems(buf) > 0) {
7381 btrfs_item_key_to_cpu(buf, &key, 0);
7383 btrfs_node_key_to_cpu(buf, &key, 0);
7385 rec->info_objectid = key.objectid;
7387 rec->info_level = level;
7389 if (btrfs_is_leaf(buf))
7390 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7392 status = btrfs_check_node(root, &rec->parent_key, buf);
7394 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7396 status = try_to_fix_bad_block(root, buf, status);
7397 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7399 fprintf(stderr, "bad block %llu\n",
7400 (unsigned long long)buf->start);
7403 * Signal to callers we need to start the scan over
7404 * again since we'll have cowed blocks.
7409 rec->content_checked = 1;
7410 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7411 rec->owner_ref_checked = 1;
7413 ret = check_owner_ref(root, rec, buf);
7415 rec->owner_ref_checked = 1;
7419 maybe_free_extent_rec(extent_cache, rec);
7424 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7425 u64 parent, u64 root)
7427 struct list_head *cur = rec->backrefs.next;
7428 struct extent_backref *node;
7429 struct tree_backref *back;
7431 while(cur != &rec->backrefs) {
7432 node = to_extent_backref(cur);
7436 back = to_tree_backref(node);
7438 if (!node->full_backref)
7440 if (parent == back->parent)
7443 if (node->full_backref)
7445 if (back->root == root)
7453 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7454 u64 parent, u64 root)
7456 struct tree_backref *ref = malloc(sizeof(*ref));
7460 memset(&ref->node, 0, sizeof(ref->node));
7462 ref->parent = parent;
7463 ref->node.full_backref = 1;
7466 ref->node.full_backref = 0;
7473 static struct data_backref *find_data_backref(struct extent_record *rec,
7474 u64 parent, u64 root,
7475 u64 owner, u64 offset,
7477 u64 disk_bytenr, u64 bytes)
7479 struct list_head *cur = rec->backrefs.next;
7480 struct extent_backref *node;
7481 struct data_backref *back;
7483 while(cur != &rec->backrefs) {
7484 node = to_extent_backref(cur);
7488 back = to_data_backref(node);
7490 if (!node->full_backref)
7492 if (parent == back->parent)
7495 if (node->full_backref)
7497 if (back->root == root && back->owner == owner &&
7498 back->offset == offset) {
7499 if (found_ref && node->found_ref &&
7500 (back->bytes != bytes ||
7501 back->disk_bytenr != disk_bytenr))
7511 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7512 u64 parent, u64 root,
7513 u64 owner, u64 offset,
7516 struct data_backref *ref = malloc(sizeof(*ref));
7520 memset(&ref->node, 0, sizeof(ref->node));
7521 ref->node.is_data = 1;
7524 ref->parent = parent;
7527 ref->node.full_backref = 1;
7531 ref->offset = offset;
7532 ref->node.full_backref = 0;
7534 ref->bytes = max_size;
7537 if (max_size > rec->max_size)
7538 rec->max_size = max_size;
7542 /* Check if the type of extent matches with its chunk */
7543 static void check_extent_type(struct extent_record *rec)
7545 struct btrfs_block_group_cache *bg_cache;
7547 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7551 /* data extent, check chunk directly*/
7552 if (!rec->metadata) {
7553 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7554 rec->wrong_chunk_type = 1;
7558 /* metadata extent, check the obvious case first */
7559 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7560 BTRFS_BLOCK_GROUP_METADATA))) {
7561 rec->wrong_chunk_type = 1;
7566 * Check SYSTEM extent, as it's also marked as metadata, we can only
7567 * make sure it's a SYSTEM extent by its backref
7569 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7570 struct extent_backref *node;
7571 struct tree_backref *tback;
7574 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7575 if (node->is_data) {
7576 /* tree block shouldn't have data backref */
7577 rec->wrong_chunk_type = 1;
7580 tback = container_of(node, struct tree_backref, node);
7582 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7583 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7585 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7586 if (!(bg_cache->flags & bg_type))
7587 rec->wrong_chunk_type = 1;
7592 * Allocate a new extent record, fill default values from @tmpl and insert int
7593 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7594 * the cache, otherwise it fails.
7596 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7597 struct extent_record *tmpl)
7599 struct extent_record *rec;
7602 BUG_ON(tmpl->max_size == 0);
7603 rec = malloc(sizeof(*rec));
7606 rec->start = tmpl->start;
7607 rec->max_size = tmpl->max_size;
7608 rec->nr = max(tmpl->nr, tmpl->max_size);
7609 rec->found_rec = tmpl->found_rec;
7610 rec->content_checked = tmpl->content_checked;
7611 rec->owner_ref_checked = tmpl->owner_ref_checked;
7612 rec->num_duplicates = 0;
7613 rec->metadata = tmpl->metadata;
7614 rec->flag_block_full_backref = FLAG_UNSET;
7615 rec->bad_full_backref = 0;
7616 rec->crossing_stripes = 0;
7617 rec->wrong_chunk_type = 0;
7618 rec->is_root = tmpl->is_root;
7619 rec->refs = tmpl->refs;
7620 rec->extent_item_refs = tmpl->extent_item_refs;
7621 rec->parent_generation = tmpl->parent_generation;
7622 INIT_LIST_HEAD(&rec->backrefs);
7623 INIT_LIST_HEAD(&rec->dups);
7624 INIT_LIST_HEAD(&rec->list);
7625 rec->backref_tree = RB_ROOT;
7626 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7627 rec->cache.start = tmpl->start;
7628 rec->cache.size = tmpl->nr;
7629 ret = insert_cache_extent(extent_cache, &rec->cache);
7634 bytes_used += rec->nr;
7637 rec->crossing_stripes = check_crossing_stripes(global_info,
7638 rec->start, global_info->nodesize);
7639 check_extent_type(rec);
7644 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7646 * - refs - if found, increase refs
7647 * - is_root - if found, set
7648 * - content_checked - if found, set
7649 * - owner_ref_checked - if found, set
7651 * If not found, create a new one, initialize and insert.
7653 static int add_extent_rec(struct cache_tree *extent_cache,
7654 struct extent_record *tmpl)
7656 struct extent_record *rec;
7657 struct cache_extent *cache;
7661 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7663 rec = container_of(cache, struct extent_record, cache);
7667 rec->nr = max(tmpl->nr, tmpl->max_size);
7670 * We need to make sure to reset nr to whatever the extent
7671 * record says was the real size, this way we can compare it to
7674 if (tmpl->found_rec) {
7675 if (tmpl->start != rec->start || rec->found_rec) {
7676 struct extent_record *tmp;
7679 if (list_empty(&rec->list))
7680 list_add_tail(&rec->list,
7681 &duplicate_extents);
7684 * We have to do this song and dance in case we
7685 * find an extent record that falls inside of
7686 * our current extent record but does not have
7687 * the same objectid.
7689 tmp = malloc(sizeof(*tmp));
7692 tmp->start = tmpl->start;
7693 tmp->max_size = tmpl->max_size;
7696 tmp->metadata = tmpl->metadata;
7697 tmp->extent_item_refs = tmpl->extent_item_refs;
7698 INIT_LIST_HEAD(&tmp->list);
7699 list_add_tail(&tmp->list, &rec->dups);
7700 rec->num_duplicates++;
7707 if (tmpl->extent_item_refs && !dup) {
7708 if (rec->extent_item_refs) {
7709 fprintf(stderr, "block %llu rec "
7710 "extent_item_refs %llu, passed %llu\n",
7711 (unsigned long long)tmpl->start,
7712 (unsigned long long)
7713 rec->extent_item_refs,
7714 (unsigned long long)tmpl->extent_item_refs);
7716 rec->extent_item_refs = tmpl->extent_item_refs;
7720 if (tmpl->content_checked)
7721 rec->content_checked = 1;
7722 if (tmpl->owner_ref_checked)
7723 rec->owner_ref_checked = 1;
7724 memcpy(&rec->parent_key, &tmpl->parent_key,
7725 sizeof(tmpl->parent_key));
7726 if (tmpl->parent_generation)
7727 rec->parent_generation = tmpl->parent_generation;
7728 if (rec->max_size < tmpl->max_size)
7729 rec->max_size = tmpl->max_size;
7732 * A metadata extent can't cross stripe_len boundary, otherwise
7733 * kernel scrub won't be able to handle it.
7734 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7738 rec->crossing_stripes = check_crossing_stripes(
7739 global_info, rec->start,
7740 global_info->nodesize);
7741 check_extent_type(rec);
7742 maybe_free_extent_rec(extent_cache, rec);
7746 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7751 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7752 u64 parent, u64 root, int found_ref)
7754 struct extent_record *rec;
7755 struct tree_backref *back;
7756 struct cache_extent *cache;
7758 bool insert = false;
7760 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7762 struct extent_record tmpl;
7764 memset(&tmpl, 0, sizeof(tmpl));
7765 tmpl.start = bytenr;
7770 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7774 /* really a bug in cache_extent implement now */
7775 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7780 rec = container_of(cache, struct extent_record, cache);
7781 if (rec->start != bytenr) {
7783 * Several cause, from unaligned bytenr to over lapping extents
7788 back = find_tree_backref(rec, parent, root);
7790 back = alloc_tree_backref(rec, parent, root);
7797 if (back->node.found_ref) {
7798 fprintf(stderr, "Extent back ref already exists "
7799 "for %llu parent %llu root %llu \n",
7800 (unsigned long long)bytenr,
7801 (unsigned long long)parent,
7802 (unsigned long long)root);
7804 back->node.found_ref = 1;
7806 if (back->node.found_extent_tree) {
7807 fprintf(stderr, "Extent back ref already exists "
7808 "for %llu parent %llu root %llu \n",
7809 (unsigned long long)bytenr,
7810 (unsigned long long)parent,
7811 (unsigned long long)root);
7813 back->node.found_extent_tree = 1;
7816 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7817 compare_extent_backref));
7818 check_extent_type(rec);
7819 maybe_free_extent_rec(extent_cache, rec);
7823 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7824 u64 parent, u64 root, u64 owner, u64 offset,
7825 u32 num_refs, int found_ref, u64 max_size)
7827 struct extent_record *rec;
7828 struct data_backref *back;
7829 struct cache_extent *cache;
7831 bool insert = false;
7833 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7835 struct extent_record tmpl;
7837 memset(&tmpl, 0, sizeof(tmpl));
7838 tmpl.start = bytenr;
7840 tmpl.max_size = max_size;
7842 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7846 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7851 rec = container_of(cache, struct extent_record, cache);
7852 if (rec->max_size < max_size)
7853 rec->max_size = max_size;
7856 * If found_ref is set then max_size is the real size and must match the
7857 * existing refs. So if we have already found a ref then we need to
7858 * make sure that this ref matches the existing one, otherwise we need
7859 * to add a new backref so we can notice that the backrefs don't match
7860 * and we need to figure out who is telling the truth. This is to
7861 * account for that awful fsync bug I introduced where we'd end up with
7862 * a btrfs_file_extent_item that would have its length include multiple
7863 * prealloc extents or point inside of a prealloc extent.
7865 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7868 back = alloc_data_backref(rec, parent, root, owner, offset,
7875 BUG_ON(num_refs != 1);
7876 if (back->node.found_ref)
7877 BUG_ON(back->bytes != max_size);
7878 back->node.found_ref = 1;
7879 back->found_ref += 1;
7880 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7881 back->bytes = max_size;
7882 back->disk_bytenr = bytenr;
7884 /* Need to reinsert if not already in the tree */
7886 rb_erase(&back->node.node, &rec->backref_tree);
7891 rec->content_checked = 1;
7892 rec->owner_ref_checked = 1;
7894 if (back->node.found_extent_tree) {
7895 fprintf(stderr, "Extent back ref already exists "
7896 "for %llu parent %llu root %llu "
7897 "owner %llu offset %llu num_refs %lu\n",
7898 (unsigned long long)bytenr,
7899 (unsigned long long)parent,
7900 (unsigned long long)root,
7901 (unsigned long long)owner,
7902 (unsigned long long)offset,
7903 (unsigned long)num_refs);
7905 back->num_refs = num_refs;
7906 back->node.found_extent_tree = 1;
7909 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7910 compare_extent_backref));
7912 maybe_free_extent_rec(extent_cache, rec);
7916 static int add_pending(struct cache_tree *pending,
7917 struct cache_tree *seen, u64 bytenr, u32 size)
7920 ret = add_cache_extent(seen, bytenr, size);
7923 add_cache_extent(pending, bytenr, size);
7927 static int pick_next_pending(struct cache_tree *pending,
7928 struct cache_tree *reada,
7929 struct cache_tree *nodes,
7930 u64 last, struct block_info *bits, int bits_nr,
7933 unsigned long node_start = last;
7934 struct cache_extent *cache;
7937 cache = search_cache_extent(reada, 0);
7939 bits[0].start = cache->start;
7940 bits[0].size = cache->size;
7945 if (node_start > 32768)
7946 node_start -= 32768;
7948 cache = search_cache_extent(nodes, node_start);
7950 cache = search_cache_extent(nodes, 0);
7953 cache = search_cache_extent(pending, 0);
7958 bits[ret].start = cache->start;
7959 bits[ret].size = cache->size;
7960 cache = next_cache_extent(cache);
7962 } while (cache && ret < bits_nr);
7968 bits[ret].start = cache->start;
7969 bits[ret].size = cache->size;
7970 cache = next_cache_extent(cache);
7972 } while (cache && ret < bits_nr);
7974 if (bits_nr - ret > 8) {
7975 u64 lookup = bits[0].start + bits[0].size;
7976 struct cache_extent *next;
7977 next = search_cache_extent(pending, lookup);
7979 if (next->start - lookup > 32768)
7981 bits[ret].start = next->start;
7982 bits[ret].size = next->size;
7983 lookup = next->start + next->size;
7987 next = next_cache_extent(next);
7995 static void free_chunk_record(struct cache_extent *cache)
7997 struct chunk_record *rec;
7999 rec = container_of(cache, struct chunk_record, cache);
8000 list_del_init(&rec->list);
8001 list_del_init(&rec->dextents);
8005 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8007 cache_tree_free_extents(chunk_cache, free_chunk_record);
8010 static void free_device_record(struct rb_node *node)
8012 struct device_record *rec;
8014 rec = container_of(node, struct device_record, node);
8018 FREE_RB_BASED_TREE(device_cache, free_device_record);
8020 int insert_block_group_record(struct block_group_tree *tree,
8021 struct block_group_record *bg_rec)
8025 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8029 list_add_tail(&bg_rec->list, &tree->block_groups);
8033 static void free_block_group_record(struct cache_extent *cache)
8035 struct block_group_record *rec;
8037 rec = container_of(cache, struct block_group_record, cache);
8038 list_del_init(&rec->list);
8042 void free_block_group_tree(struct block_group_tree *tree)
8044 cache_tree_free_extents(&tree->tree, free_block_group_record);
8047 int insert_device_extent_record(struct device_extent_tree *tree,
8048 struct device_extent_record *de_rec)
8053 * Device extent is a bit different from the other extents, because
8054 * the extents which belong to the different devices may have the
8055 * same start and size, so we need use the special extent cache
8056 * search/insert functions.
8058 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8062 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8063 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8067 static void free_device_extent_record(struct cache_extent *cache)
8069 struct device_extent_record *rec;
8071 rec = container_of(cache, struct device_extent_record, cache);
8072 if (!list_empty(&rec->chunk_list))
8073 list_del_init(&rec->chunk_list);
8074 if (!list_empty(&rec->device_list))
8075 list_del_init(&rec->device_list);
8079 void free_device_extent_tree(struct device_extent_tree *tree)
8081 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8084 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8085 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8086 struct extent_buffer *leaf, int slot)
8088 struct btrfs_extent_ref_v0 *ref0;
8089 struct btrfs_key key;
8092 btrfs_item_key_to_cpu(leaf, &key, slot);
8093 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8094 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8095 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8098 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8099 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8105 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8106 struct btrfs_key *key,
8109 struct btrfs_chunk *ptr;
8110 struct chunk_record *rec;
8113 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8114 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8116 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8118 fprintf(stderr, "memory allocation failed\n");
8122 INIT_LIST_HEAD(&rec->list);
8123 INIT_LIST_HEAD(&rec->dextents);
8126 rec->cache.start = key->offset;
8127 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8129 rec->generation = btrfs_header_generation(leaf);
8131 rec->objectid = key->objectid;
8132 rec->type = key->type;
8133 rec->offset = key->offset;
8135 rec->length = rec->cache.size;
8136 rec->owner = btrfs_chunk_owner(leaf, ptr);
8137 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8138 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8139 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8140 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8141 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8142 rec->num_stripes = num_stripes;
8143 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8145 for (i = 0; i < rec->num_stripes; ++i) {
8146 rec->stripes[i].devid =
8147 btrfs_stripe_devid_nr(leaf, ptr, i);
8148 rec->stripes[i].offset =
8149 btrfs_stripe_offset_nr(leaf, ptr, i);
8150 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8151 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8158 static int process_chunk_item(struct cache_tree *chunk_cache,
8159 struct btrfs_key *key, struct extent_buffer *eb,
8162 struct chunk_record *rec;
8163 struct btrfs_chunk *chunk;
8166 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8168 * Do extra check for this chunk item,
8170 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8171 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8172 * and owner<->key_type check.
8174 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8177 error("chunk(%llu, %llu) is not valid, ignore it",
8178 key->offset, btrfs_chunk_length(eb, chunk));
8181 rec = btrfs_new_chunk_record(eb, key, slot);
8182 ret = insert_cache_extent(chunk_cache, &rec->cache);
8184 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8185 rec->offset, rec->length);
8192 static int process_device_item(struct rb_root *dev_cache,
8193 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8195 struct btrfs_dev_item *ptr;
8196 struct device_record *rec;
8199 ptr = btrfs_item_ptr(eb,
8200 slot, struct btrfs_dev_item);
8202 rec = malloc(sizeof(*rec));
8204 fprintf(stderr, "memory allocation failed\n");
8208 rec->devid = key->offset;
8209 rec->generation = btrfs_header_generation(eb);
8211 rec->objectid = key->objectid;
8212 rec->type = key->type;
8213 rec->offset = key->offset;
8215 rec->devid = btrfs_device_id(eb, ptr);
8216 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8217 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8219 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8221 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8228 struct block_group_record *
8229 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8232 struct btrfs_block_group_item *ptr;
8233 struct block_group_record *rec;
8235 rec = calloc(1, sizeof(*rec));
8237 fprintf(stderr, "memory allocation failed\n");
8241 rec->cache.start = key->objectid;
8242 rec->cache.size = key->offset;
8244 rec->generation = btrfs_header_generation(leaf);
8246 rec->objectid = key->objectid;
8247 rec->type = key->type;
8248 rec->offset = key->offset;
8250 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8251 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8253 INIT_LIST_HEAD(&rec->list);
8258 static int process_block_group_item(struct block_group_tree *block_group_cache,
8259 struct btrfs_key *key,
8260 struct extent_buffer *eb, int slot)
8262 struct block_group_record *rec;
8265 rec = btrfs_new_block_group_record(eb, key, slot);
8266 ret = insert_block_group_record(block_group_cache, rec);
8268 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8269 rec->objectid, rec->offset);
8276 struct device_extent_record *
8277 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8278 struct btrfs_key *key, int slot)
8280 struct device_extent_record *rec;
8281 struct btrfs_dev_extent *ptr;
8283 rec = calloc(1, sizeof(*rec));
8285 fprintf(stderr, "memory allocation failed\n");
8289 rec->cache.objectid = key->objectid;
8290 rec->cache.start = key->offset;
8292 rec->generation = btrfs_header_generation(leaf);
8294 rec->objectid = key->objectid;
8295 rec->type = key->type;
8296 rec->offset = key->offset;
8298 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8299 rec->chunk_objecteid =
8300 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8302 btrfs_dev_extent_chunk_offset(leaf, ptr);
8303 rec->length = btrfs_dev_extent_length(leaf, ptr);
8304 rec->cache.size = rec->length;
8306 INIT_LIST_HEAD(&rec->chunk_list);
8307 INIT_LIST_HEAD(&rec->device_list);
8313 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8314 struct btrfs_key *key, struct extent_buffer *eb,
8317 struct device_extent_record *rec;
8320 rec = btrfs_new_device_extent_record(eb, key, slot);
8321 ret = insert_device_extent_record(dev_extent_cache, rec);
8324 "Device extent[%llu, %llu, %llu] existed.\n",
8325 rec->objectid, rec->offset, rec->length);
8332 static int process_extent_item(struct btrfs_root *root,
8333 struct cache_tree *extent_cache,
8334 struct extent_buffer *eb, int slot)
8336 struct btrfs_extent_item *ei;
8337 struct btrfs_extent_inline_ref *iref;
8338 struct btrfs_extent_data_ref *dref;
8339 struct btrfs_shared_data_ref *sref;
8340 struct btrfs_key key;
8341 struct extent_record tmpl;
8346 u32 item_size = btrfs_item_size_nr(eb, slot);
8352 btrfs_item_key_to_cpu(eb, &key, slot);
8354 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8356 num_bytes = root->fs_info->nodesize;
8358 num_bytes = key.offset;
8361 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8362 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8363 key.objectid, root->fs_info->sectorsize);
8366 if (item_size < sizeof(*ei)) {
8367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8368 struct btrfs_extent_item_v0 *ei0;
8369 if (item_size != sizeof(*ei0)) {
8371 "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8372 key.objectid, key.type, key.offset,
8373 btrfs_header_bytenr(eb), slot);
8376 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8377 refs = btrfs_extent_refs_v0(eb, ei0);
8381 memset(&tmpl, 0, sizeof(tmpl));
8382 tmpl.start = key.objectid;
8383 tmpl.nr = num_bytes;
8384 tmpl.extent_item_refs = refs;
8385 tmpl.metadata = metadata;
8387 tmpl.max_size = num_bytes;
8389 return add_extent_rec(extent_cache, &tmpl);
8392 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8393 refs = btrfs_extent_refs(eb, ei);
8394 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8398 if (metadata && num_bytes != root->fs_info->nodesize) {
8399 error("ignore invalid metadata extent, length %llu does not equal to %u",
8400 num_bytes, root->fs_info->nodesize);
8403 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8404 error("ignore invalid data extent, length %llu is not aligned to %u",
8405 num_bytes, root->fs_info->sectorsize);
8409 memset(&tmpl, 0, sizeof(tmpl));
8410 tmpl.start = key.objectid;
8411 tmpl.nr = num_bytes;
8412 tmpl.extent_item_refs = refs;
8413 tmpl.metadata = metadata;
8415 tmpl.max_size = num_bytes;
8416 add_extent_rec(extent_cache, &tmpl);
8418 ptr = (unsigned long)(ei + 1);
8419 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8420 key.type == BTRFS_EXTENT_ITEM_KEY)
8421 ptr += sizeof(struct btrfs_tree_block_info);
8423 end = (unsigned long)ei + item_size;
8425 iref = (struct btrfs_extent_inline_ref *)ptr;
8426 type = btrfs_extent_inline_ref_type(eb, iref);
8427 offset = btrfs_extent_inline_ref_offset(eb, iref);
8429 case BTRFS_TREE_BLOCK_REF_KEY:
8430 ret = add_tree_backref(extent_cache, key.objectid,
8434 "add_tree_backref failed (extent items tree block): %s",
8437 case BTRFS_SHARED_BLOCK_REF_KEY:
8438 ret = add_tree_backref(extent_cache, key.objectid,
8442 "add_tree_backref failed (extent items shared block): %s",
8445 case BTRFS_EXTENT_DATA_REF_KEY:
8446 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8447 add_data_backref(extent_cache, key.objectid, 0,
8448 btrfs_extent_data_ref_root(eb, dref),
8449 btrfs_extent_data_ref_objectid(eb,
8451 btrfs_extent_data_ref_offset(eb, dref),
8452 btrfs_extent_data_ref_count(eb, dref),
8455 case BTRFS_SHARED_DATA_REF_KEY:
8456 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8457 add_data_backref(extent_cache, key.objectid, offset,
8459 btrfs_shared_data_ref_count(eb, sref),
8463 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8464 key.objectid, key.type, num_bytes);
8467 ptr += btrfs_extent_inline_ref_size(type);
8474 static int check_cache_range(struct btrfs_root *root,
8475 struct btrfs_block_group_cache *cache,
8476 u64 offset, u64 bytes)
8478 struct btrfs_free_space *entry;
8484 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8485 bytenr = btrfs_sb_offset(i);
8486 ret = btrfs_rmap_block(root->fs_info,
8487 cache->key.objectid, bytenr, 0,
8488 &logical, &nr, &stripe_len);
8493 if (logical[nr] + stripe_len <= offset)
8495 if (offset + bytes <= logical[nr])
8497 if (logical[nr] == offset) {
8498 if (stripe_len >= bytes) {
8502 bytes -= stripe_len;
8503 offset += stripe_len;
8504 } else if (logical[nr] < offset) {
8505 if (logical[nr] + stripe_len >=
8510 bytes = (offset + bytes) -
8511 (logical[nr] + stripe_len);
8512 offset = logical[nr] + stripe_len;
8515 * Could be tricky, the super may land in the
8516 * middle of the area we're checking. First
8517 * check the easiest case, it's at the end.
8519 if (logical[nr] + stripe_len >=
8521 bytes = logical[nr] - offset;
8525 /* Check the left side */
8526 ret = check_cache_range(root, cache,
8528 logical[nr] - offset);
8534 /* Now we continue with the right side */
8535 bytes = (offset + bytes) -
8536 (logical[nr] + stripe_len);
8537 offset = logical[nr] + stripe_len;
8544 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8546 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8547 offset, offset+bytes);
8551 if (entry->offset != offset) {
8552 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8557 if (entry->bytes != bytes) {
8558 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8559 bytes, entry->bytes, offset);
8563 unlink_free_space(cache->free_space_ctl, entry);
8568 static int verify_space_cache(struct btrfs_root *root,
8569 struct btrfs_block_group_cache *cache)
8571 struct btrfs_path path;
8572 struct extent_buffer *leaf;
8573 struct btrfs_key key;
8577 root = root->fs_info->extent_root;
8579 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8581 btrfs_init_path(&path);
8582 key.objectid = last;
8584 key.type = BTRFS_EXTENT_ITEM_KEY;
8585 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8590 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8591 ret = btrfs_next_leaf(root, &path);
8599 leaf = path.nodes[0];
8600 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8601 if (key.objectid >= cache->key.offset + cache->key.objectid)
8603 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8604 key.type != BTRFS_METADATA_ITEM_KEY) {
8609 if (last == key.objectid) {
8610 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8611 last = key.objectid + key.offset;
8613 last = key.objectid + root->fs_info->nodesize;
8618 ret = check_cache_range(root, cache, last,
8619 key.objectid - last);
8622 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8623 last = key.objectid + key.offset;
8625 last = key.objectid + root->fs_info->nodesize;
8629 if (last < cache->key.objectid + cache->key.offset)
8630 ret = check_cache_range(root, cache, last,
8631 cache->key.objectid +
8632 cache->key.offset - last);
8635 btrfs_release_path(&path);
8638 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8639 fprintf(stderr, "There are still entries left in the space "
8647 static int check_space_cache(struct btrfs_root *root)
8649 struct btrfs_block_group_cache *cache;
8650 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8654 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8655 btrfs_super_generation(root->fs_info->super_copy) !=
8656 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8657 printf("cache and super generation don't match, space cache "
8658 "will be invalidated\n");
8662 if (ctx.progress_enabled) {
8663 ctx.tp = TASK_FREE_SPACE;
8664 task_start(ctx.info);
8668 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8672 start = cache->key.objectid + cache->key.offset;
8673 if (!cache->free_space_ctl) {
8674 if (btrfs_init_free_space_ctl(cache,
8675 root->fs_info->sectorsize)) {
8680 btrfs_remove_free_space_cache(cache);
8683 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8684 ret = exclude_super_stripes(root, cache);
8686 fprintf(stderr, "could not exclude super stripes: %s\n",
8691 ret = load_free_space_tree(root->fs_info, cache);
8692 free_excluded_extents(root, cache);
8694 fprintf(stderr, "could not load free space tree: %s\n",
8701 ret = load_free_space_cache(root->fs_info, cache);
8706 ret = verify_space_cache(root, cache);
8708 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8709 cache->key.objectid);
8714 task_stop(ctx.info);
8716 return error ? -EINVAL : 0;
8719 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8720 u64 num_bytes, unsigned long leaf_offset,
8721 struct extent_buffer *eb) {
8723 struct btrfs_fs_info *fs_info = root->fs_info;
8725 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8727 unsigned long csum_offset;
8731 u64 data_checked = 0;
8737 if (num_bytes % fs_info->sectorsize)
8740 data = malloc(num_bytes);
8744 while (offset < num_bytes) {
8747 read_len = num_bytes - offset;
8748 /* read as much space once a time */
8749 ret = read_extent_data(fs_info, data + offset,
8750 bytenr + offset, &read_len, mirror);
8754 /* verify every 4k data's checksum */
8755 while (data_checked < read_len) {
8757 tmp = offset + data_checked;
8759 csum = btrfs_csum_data((char *)data + tmp,
8760 csum, fs_info->sectorsize);
8761 btrfs_csum_final(csum, (u8 *)&csum);
8763 csum_offset = leaf_offset +
8764 tmp / fs_info->sectorsize * csum_size;
8765 read_extent_buffer(eb, (char *)&csum_expected,
8766 csum_offset, csum_size);
8767 /* try another mirror */
8768 if (csum != csum_expected) {
8769 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8770 mirror, bytenr + tmp,
8771 csum, csum_expected);
8772 num_copies = btrfs_num_copies(root->fs_info,
8774 if (mirror < num_copies - 1) {
8779 data_checked += fs_info->sectorsize;
8788 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8791 struct btrfs_path path;
8792 struct extent_buffer *leaf;
8793 struct btrfs_key key;
8796 btrfs_init_path(&path);
8797 key.objectid = bytenr;
8798 key.type = BTRFS_EXTENT_ITEM_KEY;
8799 key.offset = (u64)-1;
8802 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8805 fprintf(stderr, "Error looking up extent record %d\n", ret);
8806 btrfs_release_path(&path);
8809 if (path.slots[0] > 0) {
8812 ret = btrfs_prev_leaf(root, &path);
8815 } else if (ret > 0) {
8822 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8825 * Block group items come before extent items if they have the same
8826 * bytenr, so walk back one more just in case. Dear future traveller,
8827 * first congrats on mastering time travel. Now if it's not too much
8828 * trouble could you go back to 2006 and tell Chris to make the
8829 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8830 * EXTENT_ITEM_KEY please?
8832 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8833 if (path.slots[0] > 0) {
8836 ret = btrfs_prev_leaf(root, &path);
8839 } else if (ret > 0) {
8844 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8848 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8849 ret = btrfs_next_leaf(root, &path);
8851 fprintf(stderr, "Error going to next leaf "
8853 btrfs_release_path(&path);
8859 leaf = path.nodes[0];
8860 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8861 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8865 if (key.objectid + key.offset < bytenr) {
8869 if (key.objectid > bytenr + num_bytes)
8872 if (key.objectid == bytenr) {
8873 if (key.offset >= num_bytes) {
8877 num_bytes -= key.offset;
8878 bytenr += key.offset;
8879 } else if (key.objectid < bytenr) {
8880 if (key.objectid + key.offset >= bytenr + num_bytes) {
8884 num_bytes = (bytenr + num_bytes) -
8885 (key.objectid + key.offset);
8886 bytenr = key.objectid + key.offset;
8888 if (key.objectid + key.offset < bytenr + num_bytes) {
8889 u64 new_start = key.objectid + key.offset;
8890 u64 new_bytes = bytenr + num_bytes - new_start;
8893 * Weird case, the extent is in the middle of
8894 * our range, we'll have to search one side
8895 * and then the other. Not sure if this happens
8896 * in real life, but no harm in coding it up
8897 * anyway just in case.
8899 btrfs_release_path(&path);
8900 ret = check_extent_exists(root, new_start,
8903 fprintf(stderr, "Right section didn't "
8907 num_bytes = key.objectid - bytenr;
8910 num_bytes = key.objectid - bytenr;
8917 if (num_bytes && !ret) {
8918 fprintf(stderr, "There are no extents for csum range "
8919 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8923 btrfs_release_path(&path);
8927 static int check_csums(struct btrfs_root *root)
8929 struct btrfs_path path;
8930 struct extent_buffer *leaf;
8931 struct btrfs_key key;
8932 u64 offset = 0, num_bytes = 0;
8933 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8937 unsigned long leaf_offset;
8939 root = root->fs_info->csum_root;
8940 if (!extent_buffer_uptodate(root->node)) {
8941 fprintf(stderr, "No valid csum tree found\n");
8945 btrfs_init_path(&path);
8946 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8947 key.type = BTRFS_EXTENT_CSUM_KEY;
8949 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8951 fprintf(stderr, "Error searching csum tree %d\n", ret);
8952 btrfs_release_path(&path);
8956 if (ret > 0 && path.slots[0])
8961 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8962 ret = btrfs_next_leaf(root, &path);
8964 fprintf(stderr, "Error going to next leaf "
8971 leaf = path.nodes[0];
8973 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8974 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8979 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8980 csum_size) * root->fs_info->sectorsize;
8981 if (!check_data_csum)
8982 goto skip_csum_check;
8983 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8984 ret = check_extent_csums(root, key.offset, data_len,
8990 offset = key.offset;
8991 } else if (key.offset != offset + num_bytes) {
8992 ret = check_extent_exists(root, offset, num_bytes);
8994 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8995 "there is no extent record\n",
8996 offset, offset+num_bytes);
8999 offset = key.offset;
9002 num_bytes += data_len;
9006 btrfs_release_path(&path);
9010 static int is_dropped_key(struct btrfs_key *key,
9011 struct btrfs_key *drop_key) {
9012 if (key->objectid < drop_key->objectid)
9014 else if (key->objectid == drop_key->objectid) {
9015 if (key->type < drop_key->type)
9017 else if (key->type == drop_key->type) {
9018 if (key->offset < drop_key->offset)
9026 * Here are the rules for FULL_BACKREF.
9028 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9029 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9031 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9032 * if it happened after the relocation occurred since we'll have dropped the
9033 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9034 * have no real way to know for sure.
9036 * We process the blocks one root at a time, and we start from the lowest root
9037 * objectid and go to the highest. So we can just lookup the owner backref for
9038 * the record and if we don't find it then we know it doesn't exist and we have
9041 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9042 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9043 * be set or not and then we can check later once we've gathered all the refs.
9045 static int calc_extent_flag(struct cache_tree *extent_cache,
9046 struct extent_buffer *buf,
9047 struct root_item_record *ri,
9050 struct extent_record *rec;
9051 struct cache_extent *cache;
9052 struct tree_backref *tback;
9055 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9056 /* we have added this extent before */
9060 rec = container_of(cache, struct extent_record, cache);
9063 * Except file/reloc tree, we can not have
9066 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9071 if (buf->start == ri->bytenr)
9074 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9077 owner = btrfs_header_owner(buf);
9078 if (owner == ri->objectid)
9081 tback = find_tree_backref(rec, 0, owner);
9086 if (rec->flag_block_full_backref != FLAG_UNSET &&
9087 rec->flag_block_full_backref != 0)
9088 rec->bad_full_backref = 1;
9091 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9092 if (rec->flag_block_full_backref != FLAG_UNSET &&
9093 rec->flag_block_full_backref != 1)
9094 rec->bad_full_backref = 1;
9098 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9100 fprintf(stderr, "Invalid key type(");
9101 print_key_type(stderr, 0, key_type);
9102 fprintf(stderr, ") found in root(");
9103 print_objectid(stderr, rootid, 0);
9104 fprintf(stderr, ")\n");
9108 * Check if the key is valid with its extent buffer.
9110 * This is a early check in case invalid key exists in a extent buffer
9111 * This is not comprehensive yet, but should prevent wrong key/item passed
9114 static int check_type_with_root(u64 rootid, u8 key_type)
9117 /* Only valid in chunk tree */
9118 case BTRFS_DEV_ITEM_KEY:
9119 case BTRFS_CHUNK_ITEM_KEY:
9120 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9123 /* valid in csum and log tree */
9124 case BTRFS_CSUM_TREE_OBJECTID:
9125 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9129 case BTRFS_EXTENT_ITEM_KEY:
9130 case BTRFS_METADATA_ITEM_KEY:
9131 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9132 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9135 case BTRFS_ROOT_ITEM_KEY:
9136 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9139 case BTRFS_DEV_EXTENT_KEY:
9140 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9146 report_mismatch_key_root(key_type, rootid);
9150 static int run_next_block(struct btrfs_root *root,
9151 struct block_info *bits,
9154 struct cache_tree *pending,
9155 struct cache_tree *seen,
9156 struct cache_tree *reada,
9157 struct cache_tree *nodes,
9158 struct cache_tree *extent_cache,
9159 struct cache_tree *chunk_cache,
9160 struct rb_root *dev_cache,
9161 struct block_group_tree *block_group_cache,
9162 struct device_extent_tree *dev_extent_cache,
9163 struct root_item_record *ri)
9165 struct btrfs_fs_info *fs_info = root->fs_info;
9166 struct extent_buffer *buf;
9167 struct extent_record *rec = NULL;
9178 struct btrfs_key key;
9179 struct cache_extent *cache;
9182 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9183 bits_nr, &reada_bits);
9188 for(i = 0; i < nritems; i++) {
9189 ret = add_cache_extent(reada, bits[i].start,
9194 /* fixme, get the parent transid */
9195 readahead_tree_block(fs_info, bits[i].start, 0);
9198 *last = bits[0].start;
9199 bytenr = bits[0].start;
9200 size = bits[0].size;
9202 cache = lookup_cache_extent(pending, bytenr, size);
9204 remove_cache_extent(pending, cache);
9207 cache = lookup_cache_extent(reada, bytenr, size);
9209 remove_cache_extent(reada, cache);
9212 cache = lookup_cache_extent(nodes, bytenr, size);
9214 remove_cache_extent(nodes, cache);
9217 cache = lookup_cache_extent(extent_cache, bytenr, size);
9219 rec = container_of(cache, struct extent_record, cache);
9220 gen = rec->parent_generation;
9223 /* fixme, get the real parent transid */
9224 buf = read_tree_block(root->fs_info, bytenr, gen);
9225 if (!extent_buffer_uptodate(buf)) {
9226 record_bad_block_io(root->fs_info,
9227 extent_cache, bytenr, size);
9231 nritems = btrfs_header_nritems(buf);
9234 if (!init_extent_tree) {
9235 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9236 btrfs_header_level(buf), 1, NULL,
9239 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9241 fprintf(stderr, "Couldn't calc extent flags\n");
9242 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9247 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9249 fprintf(stderr, "Couldn't calc extent flags\n");
9250 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9254 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9256 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9257 ri->objectid == btrfs_header_owner(buf)) {
9259 * Ok we got to this block from it's original owner and
9260 * we have FULL_BACKREF set. Relocation can leave
9261 * converted blocks over so this is altogether possible,
9262 * however it's not possible if the generation > the
9263 * last snapshot, so check for this case.
9265 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9266 btrfs_header_generation(buf) > ri->last_snapshot) {
9267 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9268 rec->bad_full_backref = 1;
9273 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9274 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9275 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9276 rec->bad_full_backref = 1;
9280 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9281 rec->flag_block_full_backref = 1;
9285 rec->flag_block_full_backref = 0;
9287 owner = btrfs_header_owner(buf);
9290 ret = check_block(root, extent_cache, buf, flags);
9294 if (btrfs_is_leaf(buf)) {
9295 btree_space_waste += btrfs_leaf_free_space(root, buf);
9296 for (i = 0; i < nritems; i++) {
9297 struct btrfs_file_extent_item *fi;
9298 btrfs_item_key_to_cpu(buf, &key, i);
9300 * Check key type against the leaf owner.
9301 * Could filter quite a lot of early error if
9304 if (check_type_with_root(btrfs_header_owner(buf),
9306 fprintf(stderr, "ignoring invalid key\n");
9309 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9310 process_extent_item(root, extent_cache, buf,
9314 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9315 process_extent_item(root, extent_cache, buf,
9319 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9321 btrfs_item_size_nr(buf, i);
9324 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9325 process_chunk_item(chunk_cache, &key, buf, i);
9328 if (key.type == BTRFS_DEV_ITEM_KEY) {
9329 process_device_item(dev_cache, &key, buf, i);
9332 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9333 process_block_group_item(block_group_cache,
9337 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9338 process_device_extent_item(dev_extent_cache,
9343 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9344 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9345 process_extent_ref_v0(extent_cache, buf, i);
9352 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9353 ret = add_tree_backref(extent_cache,
9354 key.objectid, 0, key.offset, 0);
9357 "add_tree_backref failed (leaf tree block): %s",
9361 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9362 ret = add_tree_backref(extent_cache,
9363 key.objectid, key.offset, 0, 0);
9366 "add_tree_backref failed (leaf shared block): %s",
9370 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9371 struct btrfs_extent_data_ref *ref;
9372 ref = btrfs_item_ptr(buf, i,
9373 struct btrfs_extent_data_ref);
9374 add_data_backref(extent_cache,
9376 btrfs_extent_data_ref_root(buf, ref),
9377 btrfs_extent_data_ref_objectid(buf,
9379 btrfs_extent_data_ref_offset(buf, ref),
9380 btrfs_extent_data_ref_count(buf, ref),
9381 0, root->fs_info->sectorsize);
9384 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9385 struct btrfs_shared_data_ref *ref;
9386 ref = btrfs_item_ptr(buf, i,
9387 struct btrfs_shared_data_ref);
9388 add_data_backref(extent_cache,
9389 key.objectid, key.offset, 0, 0, 0,
9390 btrfs_shared_data_ref_count(buf, ref),
9391 0, root->fs_info->sectorsize);
9394 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9395 struct bad_item *bad;
9397 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9401 bad = malloc(sizeof(struct bad_item));
9404 INIT_LIST_HEAD(&bad->list);
9405 memcpy(&bad->key, &key,
9406 sizeof(struct btrfs_key));
9407 bad->root_id = owner;
9408 list_add_tail(&bad->list, &delete_items);
9411 if (key.type != BTRFS_EXTENT_DATA_KEY)
9413 fi = btrfs_item_ptr(buf, i,
9414 struct btrfs_file_extent_item);
9415 if (btrfs_file_extent_type(buf, fi) ==
9416 BTRFS_FILE_EXTENT_INLINE)
9418 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9421 data_bytes_allocated +=
9422 btrfs_file_extent_disk_num_bytes(buf, fi);
9423 if (data_bytes_allocated < root->fs_info->sectorsize) {
9426 data_bytes_referenced +=
9427 btrfs_file_extent_num_bytes(buf, fi);
9428 add_data_backref(extent_cache,
9429 btrfs_file_extent_disk_bytenr(buf, fi),
9430 parent, owner, key.objectid, key.offset -
9431 btrfs_file_extent_offset(buf, fi), 1, 1,
9432 btrfs_file_extent_disk_num_bytes(buf, fi));
9436 struct btrfs_key first_key;
9438 first_key.objectid = 0;
9441 btrfs_item_key_to_cpu(buf, &first_key, 0);
9442 level = btrfs_header_level(buf);
9443 for (i = 0; i < nritems; i++) {
9444 struct extent_record tmpl;
9446 ptr = btrfs_node_blockptr(buf, i);
9447 size = root->fs_info->nodesize;
9448 btrfs_node_key_to_cpu(buf, &key, i);
9450 if ((level == ri->drop_level)
9451 && is_dropped_key(&key, &ri->drop_key)) {
9456 memset(&tmpl, 0, sizeof(tmpl));
9457 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9458 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9463 tmpl.max_size = size;
9464 ret = add_extent_rec(extent_cache, &tmpl);
9468 ret = add_tree_backref(extent_cache, ptr, parent,
9472 "add_tree_backref failed (non-leaf block): %s",
9478 add_pending(nodes, seen, ptr, size);
9480 add_pending(pending, seen, ptr, size);
9483 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9484 nritems) * sizeof(struct btrfs_key_ptr);
9486 total_btree_bytes += buf->len;
9487 if (fs_root_objectid(btrfs_header_owner(buf)))
9488 total_fs_tree_bytes += buf->len;
9489 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9490 total_extent_tree_bytes += buf->len;
9492 free_extent_buffer(buf);
9496 static int add_root_to_pending(struct extent_buffer *buf,
9497 struct cache_tree *extent_cache,
9498 struct cache_tree *pending,
9499 struct cache_tree *seen,
9500 struct cache_tree *nodes,
9503 struct extent_record tmpl;
9506 if (btrfs_header_level(buf) > 0)
9507 add_pending(nodes, seen, buf->start, buf->len);
9509 add_pending(pending, seen, buf->start, buf->len);
9511 memset(&tmpl, 0, sizeof(tmpl));
9512 tmpl.start = buf->start;
9517 tmpl.max_size = buf->len;
9518 add_extent_rec(extent_cache, &tmpl);
9520 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9521 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9522 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9525 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9530 /* as we fix the tree, we might be deleting blocks that
9531 * we're tracking for repair. This hook makes sure we
9532 * remove any backrefs for blocks as we are fixing them.
9534 static int free_extent_hook(struct btrfs_trans_handle *trans,
9535 struct btrfs_root *root,
9536 u64 bytenr, u64 num_bytes, u64 parent,
9537 u64 root_objectid, u64 owner, u64 offset,
9540 struct extent_record *rec;
9541 struct cache_extent *cache;
9543 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9545 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9546 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9550 rec = container_of(cache, struct extent_record, cache);
9552 struct data_backref *back;
9553 back = find_data_backref(rec, parent, root_objectid, owner,
9554 offset, 1, bytenr, num_bytes);
9557 if (back->node.found_ref) {
9558 back->found_ref -= refs_to_drop;
9560 rec->refs -= refs_to_drop;
9562 if (back->node.found_extent_tree) {
9563 back->num_refs -= refs_to_drop;
9564 if (rec->extent_item_refs)
9565 rec->extent_item_refs -= refs_to_drop;
9567 if (back->found_ref == 0)
9568 back->node.found_ref = 0;
9569 if (back->num_refs == 0)
9570 back->node.found_extent_tree = 0;
9572 if (!back->node.found_extent_tree && back->node.found_ref) {
9573 rb_erase(&back->node.node, &rec->backref_tree);
9577 struct tree_backref *back;
9578 back = find_tree_backref(rec, parent, root_objectid);
9581 if (back->node.found_ref) {
9584 back->node.found_ref = 0;
9586 if (back->node.found_extent_tree) {
9587 if (rec->extent_item_refs)
9588 rec->extent_item_refs--;
9589 back->node.found_extent_tree = 0;
9591 if (!back->node.found_extent_tree && back->node.found_ref) {
9592 rb_erase(&back->node.node, &rec->backref_tree);
9596 maybe_free_extent_rec(extent_cache, rec);
9601 static int delete_extent_records(struct btrfs_trans_handle *trans,
9602 struct btrfs_root *root,
9603 struct btrfs_path *path,
9606 struct btrfs_key key;
9607 struct btrfs_key found_key;
9608 struct extent_buffer *leaf;
9613 key.objectid = bytenr;
9615 key.offset = (u64)-1;
9618 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9625 if (path->slots[0] == 0)
9631 leaf = path->nodes[0];
9632 slot = path->slots[0];
9634 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9635 if (found_key.objectid != bytenr)
9638 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9639 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9640 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9641 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9642 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9643 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9644 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9645 btrfs_release_path(path);
9646 if (found_key.type == 0) {
9647 if (found_key.offset == 0)
9649 key.offset = found_key.offset - 1;
9650 key.type = found_key.type;
9652 key.type = found_key.type - 1;
9653 key.offset = (u64)-1;
9657 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9658 found_key.objectid, found_key.type, found_key.offset);
9660 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9663 btrfs_release_path(path);
9665 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9666 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9667 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9668 found_key.offset : root->fs_info->nodesize;
9670 ret = btrfs_update_block_group(root, bytenr,
9677 btrfs_release_path(path);
9682 * for a single backref, this will allocate a new extent
9683 * and add the backref to it.
9685 static int record_extent(struct btrfs_trans_handle *trans,
9686 struct btrfs_fs_info *info,
9687 struct btrfs_path *path,
9688 struct extent_record *rec,
9689 struct extent_backref *back,
9690 int allocated, u64 flags)
9693 struct btrfs_root *extent_root = info->extent_root;
9694 struct extent_buffer *leaf;
9695 struct btrfs_key ins_key;
9696 struct btrfs_extent_item *ei;
9697 struct data_backref *dback;
9698 struct btrfs_tree_block_info *bi;
9701 rec->max_size = max_t(u64, rec->max_size,
9705 u32 item_size = sizeof(*ei);
9708 item_size += sizeof(*bi);
9710 ins_key.objectid = rec->start;
9711 ins_key.offset = rec->max_size;
9712 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9714 ret = btrfs_insert_empty_item(trans, extent_root, path,
9715 &ins_key, item_size);
9719 leaf = path->nodes[0];
9720 ei = btrfs_item_ptr(leaf, path->slots[0],
9721 struct btrfs_extent_item);
9723 btrfs_set_extent_refs(leaf, ei, 0);
9724 btrfs_set_extent_generation(leaf, ei, rec->generation);
9726 if (back->is_data) {
9727 btrfs_set_extent_flags(leaf, ei,
9728 BTRFS_EXTENT_FLAG_DATA);
9730 struct btrfs_disk_key copy_key;;
9732 bi = (struct btrfs_tree_block_info *)(ei + 1);
9733 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9736 btrfs_set_disk_key_objectid(©_key,
9737 rec->info_objectid);
9738 btrfs_set_disk_key_type(©_key, 0);
9739 btrfs_set_disk_key_offset(©_key, 0);
9741 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9742 btrfs_set_tree_block_key(leaf, bi, ©_key);
9744 btrfs_set_extent_flags(leaf, ei,
9745 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9748 btrfs_mark_buffer_dirty(leaf);
9749 ret = btrfs_update_block_group(extent_root, rec->start,
9750 rec->max_size, 1, 0);
9753 btrfs_release_path(path);
9756 if (back->is_data) {
9760 dback = to_data_backref(back);
9761 if (back->full_backref)
9762 parent = dback->parent;
9766 for (i = 0; i < dback->found_ref; i++) {
9767 /* if parent != 0, we're doing a full backref
9768 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9769 * just makes the backref allocator create a data
9772 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9773 rec->start, rec->max_size,
9777 BTRFS_FIRST_FREE_OBJECTID :
9783 fprintf(stderr, "adding new data backref"
9784 " on %llu %s %llu owner %llu"
9785 " offset %llu found %d\n",
9786 (unsigned long long)rec->start,
9787 back->full_backref ?
9789 back->full_backref ?
9790 (unsigned long long)parent :
9791 (unsigned long long)dback->root,
9792 (unsigned long long)dback->owner,
9793 (unsigned long long)dback->offset,
9797 struct tree_backref *tback;
9799 tback = to_tree_backref(back);
9800 if (back->full_backref)
9801 parent = tback->parent;
9805 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9806 rec->start, rec->max_size,
9807 parent, tback->root, 0, 0);
9808 fprintf(stderr, "adding new tree backref on "
9809 "start %llu len %llu parent %llu root %llu\n",
9810 rec->start, rec->max_size, parent, tback->root);
9813 btrfs_release_path(path);
9817 static struct extent_entry *find_entry(struct list_head *entries,
9818 u64 bytenr, u64 bytes)
9820 struct extent_entry *entry = NULL;
9822 list_for_each_entry(entry, entries, list) {
9823 if (entry->bytenr == bytenr && entry->bytes == bytes)
9830 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9832 struct extent_entry *entry, *best = NULL, *prev = NULL;
9834 list_for_each_entry(entry, entries, list) {
9836 * If there are as many broken entries as entries then we know
9837 * not to trust this particular entry.
9839 if (entry->broken == entry->count)
9843 * Special case, when there are only two entries and 'best' is
9853 * If our current entry == best then we can't be sure our best
9854 * is really the best, so we need to keep searching.
9856 if (best && best->count == entry->count) {
9862 /* Prev == entry, not good enough, have to keep searching */
9863 if (!prev->broken && prev->count == entry->count)
9867 best = (prev->count > entry->count) ? prev : entry;
9868 else if (best->count < entry->count)
9876 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9877 struct data_backref *dback, struct extent_entry *entry)
9879 struct btrfs_trans_handle *trans;
9880 struct btrfs_root *root;
9881 struct btrfs_file_extent_item *fi;
9882 struct extent_buffer *leaf;
9883 struct btrfs_key key;
9887 key.objectid = dback->root;
9888 key.type = BTRFS_ROOT_ITEM_KEY;
9889 key.offset = (u64)-1;
9890 root = btrfs_read_fs_root(info, &key);
9892 fprintf(stderr, "Couldn't find root for our ref\n");
9897 * The backref points to the original offset of the extent if it was
9898 * split, so we need to search down to the offset we have and then walk
9899 * forward until we find the backref we're looking for.
9901 key.objectid = dback->owner;
9902 key.type = BTRFS_EXTENT_DATA_KEY;
9903 key.offset = dback->offset;
9904 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9906 fprintf(stderr, "Error looking up ref %d\n", ret);
9911 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9912 ret = btrfs_next_leaf(root, path);
9914 fprintf(stderr, "Couldn't find our ref, next\n");
9918 leaf = path->nodes[0];
9919 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9920 if (key.objectid != dback->owner ||
9921 key.type != BTRFS_EXTENT_DATA_KEY) {
9922 fprintf(stderr, "Couldn't find our ref, search\n");
9925 fi = btrfs_item_ptr(leaf, path->slots[0],
9926 struct btrfs_file_extent_item);
9927 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9928 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9930 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9935 btrfs_release_path(path);
9937 trans = btrfs_start_transaction(root, 1);
9939 return PTR_ERR(trans);
9942 * Ok we have the key of the file extent we want to fix, now we can cow
9943 * down to the thing and fix it.
9945 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9947 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9948 key.objectid, key.type, key.offset, ret);
9952 fprintf(stderr, "Well that's odd, we just found this key "
9953 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9958 leaf = path->nodes[0];
9959 fi = btrfs_item_ptr(leaf, path->slots[0],
9960 struct btrfs_file_extent_item);
9962 if (btrfs_file_extent_compression(leaf, fi) &&
9963 dback->disk_bytenr != entry->bytenr) {
9964 fprintf(stderr, "Ref doesn't match the record start and is "
9965 "compressed, please take a btrfs-image of this file "
9966 "system and send it to a btrfs developer so they can "
9967 "complete this functionality for bytenr %Lu\n",
9968 dback->disk_bytenr);
9973 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9974 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9975 } else if (dback->disk_bytenr > entry->bytenr) {
9976 u64 off_diff, offset;
9978 off_diff = dback->disk_bytenr - entry->bytenr;
9979 offset = btrfs_file_extent_offset(leaf, fi);
9980 if (dback->disk_bytenr + offset +
9981 btrfs_file_extent_num_bytes(leaf, fi) >
9982 entry->bytenr + entry->bytes) {
9983 fprintf(stderr, "Ref is past the entry end, please "
9984 "take a btrfs-image of this file system and "
9985 "send it to a btrfs developer, ref %Lu\n",
9986 dback->disk_bytenr);
9991 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9992 btrfs_set_file_extent_offset(leaf, fi, offset);
9993 } else if (dback->disk_bytenr < entry->bytenr) {
9996 offset = btrfs_file_extent_offset(leaf, fi);
9997 if (dback->disk_bytenr + offset < entry->bytenr) {
9998 fprintf(stderr, "Ref is before the entry start, please"
9999 " take a btrfs-image of this file system and "
10000 "send it to a btrfs developer, ref %Lu\n",
10001 dback->disk_bytenr);
10006 offset += dback->disk_bytenr;
10007 offset -= entry->bytenr;
10008 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10009 btrfs_set_file_extent_offset(leaf, fi, offset);
10012 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10015 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10016 * only do this if we aren't using compression, otherwise it's a
10019 if (!btrfs_file_extent_compression(leaf, fi))
10020 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10022 printf("ram bytes may be wrong?\n");
10023 btrfs_mark_buffer_dirty(leaf);
10025 err = btrfs_commit_transaction(trans, root);
10026 btrfs_release_path(path);
10027 return ret ? ret : err;
10030 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10031 struct extent_record *rec)
10033 struct extent_backref *back, *tmp;
10034 struct data_backref *dback;
10035 struct extent_entry *entry, *best = NULL;
10036 LIST_HEAD(entries);
10037 int nr_entries = 0;
10038 int broken_entries = 0;
10040 short mismatch = 0;
10043 * Metadata is easy and the backrefs should always agree on bytenr and
10044 * size, if not we've got bigger issues.
10049 rbtree_postorder_for_each_entry_safe(back, tmp,
10050 &rec->backref_tree, node) {
10051 if (back->full_backref || !back->is_data)
10054 dback = to_data_backref(back);
10057 * We only pay attention to backrefs that we found a real
10060 if (dback->found_ref == 0)
10064 * For now we only catch when the bytes don't match, not the
10065 * bytenr. We can easily do this at the same time, but I want
10066 * to have a fs image to test on before we just add repair
10067 * functionality willy-nilly so we know we won't screw up the
10071 entry = find_entry(&entries, dback->disk_bytenr,
10074 entry = malloc(sizeof(struct extent_entry));
10079 memset(entry, 0, sizeof(*entry));
10080 entry->bytenr = dback->disk_bytenr;
10081 entry->bytes = dback->bytes;
10082 list_add_tail(&entry->list, &entries);
10087 * If we only have on entry we may think the entries agree when
10088 * in reality they don't so we have to do some extra checking.
10090 if (dback->disk_bytenr != rec->start ||
10091 dback->bytes != rec->nr || back->broken)
10094 if (back->broken) {
10102 /* Yay all the backrefs agree, carry on good sir */
10103 if (nr_entries <= 1 && !mismatch)
10106 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10107 "%Lu\n", rec->start);
10110 * First we want to see if the backrefs can agree amongst themselves who
10111 * is right, so figure out which one of the entries has the highest
10114 best = find_most_right_entry(&entries);
10117 * Ok so we may have an even split between what the backrefs think, so
10118 * this is where we use the extent ref to see what it thinks.
10121 entry = find_entry(&entries, rec->start, rec->nr);
10122 if (!entry && (!broken_entries || !rec->found_rec)) {
10123 fprintf(stderr, "Backrefs don't agree with each other "
10124 "and extent record doesn't agree with anybody,"
10125 " so we can't fix bytenr %Lu bytes %Lu\n",
10126 rec->start, rec->nr);
10129 } else if (!entry) {
10131 * Ok our backrefs were broken, we'll assume this is the
10132 * correct value and add an entry for this range.
10134 entry = malloc(sizeof(struct extent_entry));
10139 memset(entry, 0, sizeof(*entry));
10140 entry->bytenr = rec->start;
10141 entry->bytes = rec->nr;
10142 list_add_tail(&entry->list, &entries);
10146 best = find_most_right_entry(&entries);
10148 fprintf(stderr, "Backrefs and extent record evenly "
10149 "split on who is right, this is going to "
10150 "require user input to fix bytenr %Lu bytes "
10151 "%Lu\n", rec->start, rec->nr);
10158 * I don't think this can happen currently as we'll abort() if we catch
10159 * this case higher up, but in case somebody removes that we still can't
10160 * deal with it properly here yet, so just bail out of that's the case.
10162 if (best->bytenr != rec->start) {
10163 fprintf(stderr, "Extent start and backref starts don't match, "
10164 "please use btrfs-image on this file system and send "
10165 "it to a btrfs developer so they can make fsck fix "
10166 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10167 rec->start, rec->nr);
10173 * Ok great we all agreed on an extent record, let's go find the real
10174 * references and fix up the ones that don't match.
10176 rbtree_postorder_for_each_entry_safe(back, tmp,
10177 &rec->backref_tree, node) {
10178 if (back->full_backref || !back->is_data)
10181 dback = to_data_backref(back);
10184 * Still ignoring backrefs that don't have a real ref attached
10187 if (dback->found_ref == 0)
10190 if (dback->bytes == best->bytes &&
10191 dback->disk_bytenr == best->bytenr)
10194 ret = repair_ref(info, path, dback, best);
10200 * Ok we messed with the actual refs, which means we need to drop our
10201 * entire cache and go back and rescan. I know this is a huge pain and
10202 * adds a lot of extra work, but it's the only way to be safe. Once all
10203 * the backrefs agree we may not need to do anything to the extent
10208 while (!list_empty(&entries)) {
10209 entry = list_entry(entries.next, struct extent_entry, list);
10210 list_del_init(&entry->list);
10216 static int process_duplicates(struct cache_tree *extent_cache,
10217 struct extent_record *rec)
10219 struct extent_record *good, *tmp;
10220 struct cache_extent *cache;
10224 * If we found a extent record for this extent then return, or if we
10225 * have more than one duplicate we are likely going to need to delete
10228 if (rec->found_rec || rec->num_duplicates > 1)
10231 /* Shouldn't happen but just in case */
10232 BUG_ON(!rec->num_duplicates);
10235 * So this happens if we end up with a backref that doesn't match the
10236 * actual extent entry. So either the backref is bad or the extent
10237 * entry is bad. Either way we want to have the extent_record actually
10238 * reflect what we found in the extent_tree, so we need to take the
10239 * duplicate out and use that as the extent_record since the only way we
10240 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10242 remove_cache_extent(extent_cache, &rec->cache);
10244 good = to_extent_record(rec->dups.next);
10245 list_del_init(&good->list);
10246 INIT_LIST_HEAD(&good->backrefs);
10247 INIT_LIST_HEAD(&good->dups);
10248 good->cache.start = good->start;
10249 good->cache.size = good->nr;
10250 good->content_checked = 0;
10251 good->owner_ref_checked = 0;
10252 good->num_duplicates = 0;
10253 good->refs = rec->refs;
10254 list_splice_init(&rec->backrefs, &good->backrefs);
10256 cache = lookup_cache_extent(extent_cache, good->start,
10260 tmp = container_of(cache, struct extent_record, cache);
10263 * If we find another overlapping extent and it's found_rec is
10264 * set then it's a duplicate and we need to try and delete
10267 if (tmp->found_rec || tmp->num_duplicates > 0) {
10268 if (list_empty(&good->list))
10269 list_add_tail(&good->list,
10270 &duplicate_extents);
10271 good->num_duplicates += tmp->num_duplicates + 1;
10272 list_splice_init(&tmp->dups, &good->dups);
10273 list_del_init(&tmp->list);
10274 list_add_tail(&tmp->list, &good->dups);
10275 remove_cache_extent(extent_cache, &tmp->cache);
10280 * Ok we have another non extent item backed extent rec, so lets
10281 * just add it to this extent and carry on like we did above.
10283 good->refs += tmp->refs;
10284 list_splice_init(&tmp->backrefs, &good->backrefs);
10285 remove_cache_extent(extent_cache, &tmp->cache);
10288 ret = insert_cache_extent(extent_cache, &good->cache);
10291 return good->num_duplicates ? 0 : 1;
10294 static int delete_duplicate_records(struct btrfs_root *root,
10295 struct extent_record *rec)
10297 struct btrfs_trans_handle *trans;
10298 LIST_HEAD(delete_list);
10299 struct btrfs_path path;
10300 struct extent_record *tmp, *good, *n;
10303 struct btrfs_key key;
10305 btrfs_init_path(&path);
10308 /* Find the record that covers all of the duplicates. */
10309 list_for_each_entry(tmp, &rec->dups, list) {
10310 if (good->start < tmp->start)
10312 if (good->nr > tmp->nr)
10315 if (tmp->start + tmp->nr < good->start + good->nr) {
10316 fprintf(stderr, "Ok we have overlapping extents that "
10317 "aren't completely covered by each other, this "
10318 "is going to require more careful thought. "
10319 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10320 tmp->start, tmp->nr, good->start, good->nr);
10327 list_add_tail(&rec->list, &delete_list);
10329 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10332 list_move_tail(&tmp->list, &delete_list);
10335 root = root->fs_info->extent_root;
10336 trans = btrfs_start_transaction(root, 1);
10337 if (IS_ERR(trans)) {
10338 ret = PTR_ERR(trans);
10342 list_for_each_entry(tmp, &delete_list, list) {
10343 if (tmp->found_rec == 0)
10345 key.objectid = tmp->start;
10346 key.type = BTRFS_EXTENT_ITEM_KEY;
10347 key.offset = tmp->nr;
10349 /* Shouldn't happen but just in case */
10350 if (tmp->metadata) {
10351 fprintf(stderr, "Well this shouldn't happen, extent "
10352 "record overlaps but is metadata? "
10353 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10357 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10363 ret = btrfs_del_item(trans, root, &path);
10366 btrfs_release_path(&path);
10369 err = btrfs_commit_transaction(trans, root);
10373 while (!list_empty(&delete_list)) {
10374 tmp = to_extent_record(delete_list.next);
10375 list_del_init(&tmp->list);
10381 while (!list_empty(&rec->dups)) {
10382 tmp = to_extent_record(rec->dups.next);
10383 list_del_init(&tmp->list);
10387 btrfs_release_path(&path);
10389 if (!ret && !nr_del)
10390 rec->num_duplicates = 0;
10392 return ret ? ret : nr_del;
10395 static int find_possible_backrefs(struct btrfs_fs_info *info,
10396 struct btrfs_path *path,
10397 struct cache_tree *extent_cache,
10398 struct extent_record *rec)
10400 struct btrfs_root *root;
10401 struct extent_backref *back, *tmp;
10402 struct data_backref *dback;
10403 struct cache_extent *cache;
10404 struct btrfs_file_extent_item *fi;
10405 struct btrfs_key key;
10409 rbtree_postorder_for_each_entry_safe(back, tmp,
10410 &rec->backref_tree, node) {
10411 /* Don't care about full backrefs (poor unloved backrefs) */
10412 if (back->full_backref || !back->is_data)
10415 dback = to_data_backref(back);
10417 /* We found this one, we don't need to do a lookup */
10418 if (dback->found_ref)
10421 key.objectid = dback->root;
10422 key.type = BTRFS_ROOT_ITEM_KEY;
10423 key.offset = (u64)-1;
10425 root = btrfs_read_fs_root(info, &key);
10427 /* No root, definitely a bad ref, skip */
10428 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10430 /* Other err, exit */
10432 return PTR_ERR(root);
10434 key.objectid = dback->owner;
10435 key.type = BTRFS_EXTENT_DATA_KEY;
10436 key.offset = dback->offset;
10437 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10439 btrfs_release_path(path);
10442 /* Didn't find it, we can carry on */
10447 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10448 struct btrfs_file_extent_item);
10449 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10450 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10451 btrfs_release_path(path);
10452 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10454 struct extent_record *tmp;
10455 tmp = container_of(cache, struct extent_record, cache);
10458 * If we found an extent record for the bytenr for this
10459 * particular backref then we can't add it to our
10460 * current extent record. We only want to add backrefs
10461 * that don't have a corresponding extent item in the
10462 * extent tree since they likely belong to this record
10463 * and we need to fix it if it doesn't match bytenrs.
10465 if (tmp->found_rec)
10469 dback->found_ref += 1;
10470 dback->disk_bytenr = bytenr;
10471 dback->bytes = bytes;
10474 * Set this so the verify backref code knows not to trust the
10475 * values in this backref.
10484 * Record orphan data ref into corresponding root.
10486 * Return 0 if the extent item contains data ref and recorded.
10487 * Return 1 if the extent item contains no useful data ref
10488 * On that case, it may contains only shared_dataref or metadata backref
10489 * or the file extent exists(this should be handled by the extent bytenr
10490 * recovery routine)
10491 * Return <0 if something goes wrong.
10493 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10494 struct extent_record *rec)
10496 struct btrfs_key key;
10497 struct btrfs_root *dest_root;
10498 struct extent_backref *back, *tmp;
10499 struct data_backref *dback;
10500 struct orphan_data_extent *orphan;
10501 struct btrfs_path path;
10502 int recorded_data_ref = 0;
10507 btrfs_init_path(&path);
10508 rbtree_postorder_for_each_entry_safe(back, tmp,
10509 &rec->backref_tree, node) {
10510 if (back->full_backref || !back->is_data ||
10511 !back->found_extent_tree)
10513 dback = to_data_backref(back);
10514 if (dback->found_ref)
10516 key.objectid = dback->root;
10517 key.type = BTRFS_ROOT_ITEM_KEY;
10518 key.offset = (u64)-1;
10520 dest_root = btrfs_read_fs_root(fs_info, &key);
10522 /* For non-exist root we just skip it */
10523 if (IS_ERR(dest_root) || !dest_root)
10526 key.objectid = dback->owner;
10527 key.type = BTRFS_EXTENT_DATA_KEY;
10528 key.offset = dback->offset;
10530 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10531 btrfs_release_path(&path);
10533 * For ret < 0, it's OK since the fs-tree may be corrupted,
10534 * we need to record it for inode/file extent rebuild.
10535 * For ret > 0, we record it only for file extent rebuild.
10536 * For ret == 0, the file extent exists but only bytenr
10537 * mismatch, let the original bytenr fix routine to handle,
10543 orphan = malloc(sizeof(*orphan));
10548 INIT_LIST_HEAD(&orphan->list);
10549 orphan->root = dback->root;
10550 orphan->objectid = dback->owner;
10551 orphan->offset = dback->offset;
10552 orphan->disk_bytenr = rec->cache.start;
10553 orphan->disk_len = rec->cache.size;
10554 list_add(&dest_root->orphan_data_extents, &orphan->list);
10555 recorded_data_ref = 1;
10558 btrfs_release_path(&path);
10560 return !recorded_data_ref;
10566 * when an incorrect extent item is found, this will delete
10567 * all of the existing entries for it and recreate them
10568 * based on what the tree scan found.
10570 static int fixup_extent_refs(struct btrfs_fs_info *info,
10571 struct cache_tree *extent_cache,
10572 struct extent_record *rec)
10574 struct btrfs_trans_handle *trans = NULL;
10576 struct btrfs_path path;
10577 struct cache_extent *cache;
10578 struct extent_backref *back, *tmp;
10582 if (rec->flag_block_full_backref)
10583 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10585 btrfs_init_path(&path);
10586 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10588 * Sometimes the backrefs themselves are so broken they don't
10589 * get attached to any meaningful rec, so first go back and
10590 * check any of our backrefs that we couldn't find and throw
10591 * them into the list if we find the backref so that
10592 * verify_backrefs can figure out what to do.
10594 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10599 /* step one, make sure all of the backrefs agree */
10600 ret = verify_backrefs(info, &path, rec);
10604 trans = btrfs_start_transaction(info->extent_root, 1);
10605 if (IS_ERR(trans)) {
10606 ret = PTR_ERR(trans);
10610 /* step two, delete all the existing records */
10611 ret = delete_extent_records(trans, info->extent_root, &path,
10617 /* was this block corrupt? If so, don't add references to it */
10618 cache = lookup_cache_extent(info->corrupt_blocks,
10619 rec->start, rec->max_size);
10625 /* step three, recreate all the refs we did find */
10626 rbtree_postorder_for_each_entry_safe(back, tmp,
10627 &rec->backref_tree, node) {
10629 * if we didn't find any references, don't create a
10630 * new extent record
10632 if (!back->found_ref)
10635 rec->bad_full_backref = 0;
10636 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10644 int err = btrfs_commit_transaction(trans, info->extent_root);
10650 fprintf(stderr, "Repaired extent references for %llu\n",
10651 (unsigned long long)rec->start);
10653 btrfs_release_path(&path);
10657 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10658 struct extent_record *rec)
10660 struct btrfs_trans_handle *trans;
10661 struct btrfs_root *root = fs_info->extent_root;
10662 struct btrfs_path path;
10663 struct btrfs_extent_item *ei;
10664 struct btrfs_key key;
10668 key.objectid = rec->start;
10669 if (rec->metadata) {
10670 key.type = BTRFS_METADATA_ITEM_KEY;
10671 key.offset = rec->info_level;
10673 key.type = BTRFS_EXTENT_ITEM_KEY;
10674 key.offset = rec->max_size;
10677 trans = btrfs_start_transaction(root, 0);
10679 return PTR_ERR(trans);
10681 btrfs_init_path(&path);
10682 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10684 btrfs_release_path(&path);
10685 btrfs_commit_transaction(trans, root);
10688 fprintf(stderr, "Didn't find extent for %llu\n",
10689 (unsigned long long)rec->start);
10690 btrfs_release_path(&path);
10691 btrfs_commit_transaction(trans, root);
10695 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10696 struct btrfs_extent_item);
10697 flags = btrfs_extent_flags(path.nodes[0], ei);
10698 if (rec->flag_block_full_backref) {
10699 fprintf(stderr, "setting full backref on %llu\n",
10700 (unsigned long long)key.objectid);
10701 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10703 fprintf(stderr, "clearing full backref on %llu\n",
10704 (unsigned long long)key.objectid);
10705 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10707 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10708 btrfs_mark_buffer_dirty(path.nodes[0]);
10709 btrfs_release_path(&path);
10710 ret = btrfs_commit_transaction(trans, root);
10712 fprintf(stderr, "Repaired extent flags for %llu\n",
10713 (unsigned long long)rec->start);
10718 /* right now we only prune from the extent allocation tree */
10719 static int prune_one_block(struct btrfs_trans_handle *trans,
10720 struct btrfs_fs_info *info,
10721 struct btrfs_corrupt_block *corrupt)
10724 struct btrfs_path path;
10725 struct extent_buffer *eb;
10729 int level = corrupt->level + 1;
10731 btrfs_init_path(&path);
10733 /* we want to stop at the parent to our busted block */
10734 path.lowest_level = level;
10736 ret = btrfs_search_slot(trans, info->extent_root,
10737 &corrupt->key, &path, -1, 1);
10742 eb = path.nodes[level];
10749 * hopefully the search gave us the block we want to prune,
10750 * lets try that first
10752 slot = path.slots[level];
10753 found = btrfs_node_blockptr(eb, slot);
10754 if (found == corrupt->cache.start)
10757 nritems = btrfs_header_nritems(eb);
10759 /* the search failed, lets scan this node and hope we find it */
10760 for (slot = 0; slot < nritems; slot++) {
10761 found = btrfs_node_blockptr(eb, slot);
10762 if (found == corrupt->cache.start)
10766 * we couldn't find the bad block. TODO, search all the nodes for pointers
10769 if (eb == info->extent_root->node) {
10774 btrfs_release_path(&path);
10779 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10780 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10783 btrfs_release_path(&path);
10787 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10789 struct btrfs_trans_handle *trans = NULL;
10790 struct cache_extent *cache;
10791 struct btrfs_corrupt_block *corrupt;
10794 cache = search_cache_extent(info->corrupt_blocks, 0);
10798 trans = btrfs_start_transaction(info->extent_root, 1);
10800 return PTR_ERR(trans);
10802 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10803 prune_one_block(trans, info, corrupt);
10804 remove_cache_extent(info->corrupt_blocks, cache);
10807 return btrfs_commit_transaction(trans, info->extent_root);
10811 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10813 struct btrfs_block_group_cache *cache;
10818 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10819 &start, &end, EXTENT_DIRTY);
10822 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10827 cache = btrfs_lookup_first_block_group(fs_info, start);
10832 start = cache->key.objectid + cache->key.offset;
10836 static int check_extent_refs(struct btrfs_root *root,
10837 struct cache_tree *extent_cache)
10839 struct extent_record *rec;
10840 struct cache_extent *cache;
10847 * if we're doing a repair, we have to make sure
10848 * we don't allocate from the problem extents.
10849 * In the worst case, this will be all the
10850 * extents in the FS
10852 cache = search_cache_extent(extent_cache, 0);
10854 rec = container_of(cache, struct extent_record, cache);
10855 set_extent_dirty(root->fs_info->excluded_extents,
10857 rec->start + rec->max_size - 1);
10858 cache = next_cache_extent(cache);
10861 /* pin down all the corrupted blocks too */
10862 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10864 set_extent_dirty(root->fs_info->excluded_extents,
10866 cache->start + cache->size - 1);
10867 cache = next_cache_extent(cache);
10869 prune_corrupt_blocks(root->fs_info);
10870 reset_cached_block_groups(root->fs_info);
10873 reset_cached_block_groups(root->fs_info);
10876 * We need to delete any duplicate entries we find first otherwise we
10877 * could mess up the extent tree when we have backrefs that actually
10878 * belong to a different extent item and not the weird duplicate one.
10880 while (repair && !list_empty(&duplicate_extents)) {
10881 rec = to_extent_record(duplicate_extents.next);
10882 list_del_init(&rec->list);
10884 /* Sometimes we can find a backref before we find an actual
10885 * extent, so we need to process it a little bit to see if there
10886 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10887 * if this is a backref screwup. If we need to delete stuff
10888 * process_duplicates() will return 0, otherwise it will return
10891 if (process_duplicates(extent_cache, rec))
10893 ret = delete_duplicate_records(root, rec);
10897 * delete_duplicate_records will return the number of entries
10898 * deleted, so if it's greater than 0 then we know we actually
10899 * did something and we need to remove.
10912 cache = search_cache_extent(extent_cache, 0);
10915 rec = container_of(cache, struct extent_record, cache);
10916 if (rec->num_duplicates) {
10917 fprintf(stderr, "extent item %llu has multiple extent "
10918 "items\n", (unsigned long long)rec->start);
10922 if (rec->refs != rec->extent_item_refs) {
10923 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10924 (unsigned long long)rec->start,
10925 (unsigned long long)rec->nr);
10926 fprintf(stderr, "extent item %llu, found %llu\n",
10927 (unsigned long long)rec->extent_item_refs,
10928 (unsigned long long)rec->refs);
10929 ret = record_orphan_data_extents(root->fs_info, rec);
10935 if (all_backpointers_checked(rec, 1)) {
10936 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10937 (unsigned long long)rec->start,
10938 (unsigned long long)rec->nr);
10942 if (!rec->owner_ref_checked) {
10943 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10944 (unsigned long long)rec->start,
10945 (unsigned long long)rec->nr);
10950 if (repair && fix) {
10951 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10957 if (rec->bad_full_backref) {
10958 fprintf(stderr, "bad full backref, on [%llu]\n",
10959 (unsigned long long)rec->start);
10961 ret = fixup_extent_flags(root->fs_info, rec);
10969 * Although it's not a extent ref's problem, we reuse this
10970 * routine for error reporting.
10971 * No repair function yet.
10973 if (rec->crossing_stripes) {
10975 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10976 rec->start, rec->start + rec->max_size);
10980 if (rec->wrong_chunk_type) {
10982 "bad extent [%llu, %llu), type mismatch with chunk\n",
10983 rec->start, rec->start + rec->max_size);
10988 remove_cache_extent(extent_cache, cache);
10989 free_all_extent_backrefs(rec);
10990 if (!init_extent_tree && repair && (!cur_err || fix))
10991 clear_extent_dirty(root->fs_info->excluded_extents,
10993 rec->start + rec->max_size - 1);
10998 if (ret && ret != -EAGAIN) {
10999 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
11002 struct btrfs_trans_handle *trans;
11004 root = root->fs_info->extent_root;
11005 trans = btrfs_start_transaction(root, 1);
11006 if (IS_ERR(trans)) {
11007 ret = PTR_ERR(trans);
11011 ret = btrfs_fix_block_accounting(trans, root);
11014 ret = btrfs_commit_transaction(trans, root);
11026 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11030 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11031 stripe_size = length;
11032 stripe_size /= num_stripes;
11033 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11034 stripe_size = length * 2;
11035 stripe_size /= num_stripes;
11036 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11037 stripe_size = length;
11038 stripe_size /= (num_stripes - 1);
11039 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11040 stripe_size = length;
11041 stripe_size /= (num_stripes - 2);
11043 stripe_size = length;
11045 return stripe_size;
11049 * Check the chunk with its block group/dev list ref:
11050 * Return 0 if all refs seems valid.
11051 * Return 1 if part of refs seems valid, need later check for rebuild ref
11052 * like missing block group and needs to search extent tree to rebuild them.
11053 * Return -1 if essential refs are missing and unable to rebuild.
11055 static int check_chunk_refs(struct chunk_record *chunk_rec,
11056 struct block_group_tree *block_group_cache,
11057 struct device_extent_tree *dev_extent_cache,
11060 struct cache_extent *block_group_item;
11061 struct block_group_record *block_group_rec;
11062 struct cache_extent *dev_extent_item;
11063 struct device_extent_record *dev_extent_rec;
11067 int metadump_v2 = 0;
11071 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11073 chunk_rec->length);
11074 if (block_group_item) {
11075 block_group_rec = container_of(block_group_item,
11076 struct block_group_record,
11078 if (chunk_rec->length != block_group_rec->offset ||
11079 chunk_rec->offset != block_group_rec->objectid ||
11081 chunk_rec->type_flags != block_group_rec->flags)) {
11084 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11085 chunk_rec->objectid,
11090 chunk_rec->type_flags,
11091 block_group_rec->objectid,
11092 block_group_rec->type,
11093 block_group_rec->offset,
11094 block_group_rec->offset,
11095 block_group_rec->objectid,
11096 block_group_rec->flags);
11099 list_del_init(&block_group_rec->list);
11100 chunk_rec->bg_rec = block_group_rec;
11105 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11106 chunk_rec->objectid,
11111 chunk_rec->type_flags);
11118 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11119 chunk_rec->num_stripes);
11120 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11121 devid = chunk_rec->stripes[i].devid;
11122 offset = chunk_rec->stripes[i].offset;
11123 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11124 devid, offset, length);
11125 if (dev_extent_item) {
11126 dev_extent_rec = container_of(dev_extent_item,
11127 struct device_extent_record,
11129 if (dev_extent_rec->objectid != devid ||
11130 dev_extent_rec->offset != offset ||
11131 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11132 dev_extent_rec->length != length) {
11135 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11136 chunk_rec->objectid,
11139 chunk_rec->stripes[i].devid,
11140 chunk_rec->stripes[i].offset,
11141 dev_extent_rec->objectid,
11142 dev_extent_rec->offset,
11143 dev_extent_rec->length);
11146 list_move(&dev_extent_rec->chunk_list,
11147 &chunk_rec->dextents);
11152 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11153 chunk_rec->objectid,
11156 chunk_rec->stripes[i].devid,
11157 chunk_rec->stripes[i].offset);
11164 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11165 int check_chunks(struct cache_tree *chunk_cache,
11166 struct block_group_tree *block_group_cache,
11167 struct device_extent_tree *dev_extent_cache,
11168 struct list_head *good, struct list_head *bad,
11169 struct list_head *rebuild, int silent)
11171 struct cache_extent *chunk_item;
11172 struct chunk_record *chunk_rec;
11173 struct block_group_record *bg_rec;
11174 struct device_extent_record *dext_rec;
11178 chunk_item = first_cache_extent(chunk_cache);
11179 while (chunk_item) {
11180 chunk_rec = container_of(chunk_item, struct chunk_record,
11182 err = check_chunk_refs(chunk_rec, block_group_cache,
11183 dev_extent_cache, silent);
11186 if (err == 0 && good)
11187 list_add_tail(&chunk_rec->list, good);
11188 if (err > 0 && rebuild)
11189 list_add_tail(&chunk_rec->list, rebuild);
11190 if (err < 0 && bad)
11191 list_add_tail(&chunk_rec->list, bad);
11192 chunk_item = next_cache_extent(chunk_item);
11195 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11198 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11206 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11210 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11211 dext_rec->objectid,
11221 static int check_device_used(struct device_record *dev_rec,
11222 struct device_extent_tree *dext_cache)
11224 struct cache_extent *cache;
11225 struct device_extent_record *dev_extent_rec;
11226 u64 total_byte = 0;
11228 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11230 dev_extent_rec = container_of(cache,
11231 struct device_extent_record,
11233 if (dev_extent_rec->objectid != dev_rec->devid)
11236 list_del_init(&dev_extent_rec->device_list);
11237 total_byte += dev_extent_rec->length;
11238 cache = next_cache_extent(cache);
11241 if (total_byte != dev_rec->byte_used) {
11243 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11244 total_byte, dev_rec->byte_used, dev_rec->objectid,
11245 dev_rec->type, dev_rec->offset);
11253 * Extra (optional) check for dev_item size to report possbile problem on a new
11256 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11258 if (!IS_ALIGNED(total_bytes, sectorsize)) {
11260 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11261 devid, total_bytes, sectorsize);
11263 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11264 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11269 * Unlike device size alignment check above, some super total_bytes check
11270 * failure can lead to mount failure for newer kernel.
11272 * So this function will return the error for a fatal super total_bytes problem.
11274 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11276 struct btrfs_device *dev;
11277 struct list_head *dev_list = &fs_info->fs_devices->devices;
11278 u64 total_bytes = 0;
11279 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11281 list_for_each_entry(dev, dev_list, dev_list)
11282 total_bytes += dev->total_bytes;
11284 /* Important check, which can cause unmountable fs */
11285 if (super_bytes < total_bytes) {
11286 error("super total bytes %llu smaller than real device(s) size %llu",
11287 super_bytes, total_bytes);
11288 error("mounting this fs may fail for newer kernels");
11289 error("this can be fixed by 'btrfs rescue fix-device-size'");
11294 * Optional check, just to make everything aligned and match with each
11297 * For a btrfs-image restored fs, we don't need to check it anyway.
11299 if (btrfs_super_flags(fs_info->super_copy) &
11300 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11302 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11303 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11304 super_bytes != total_bytes) {
11305 warning("minor unaligned/mismatch device size detected");
11307 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11312 /* check btrfs_dev_item -> btrfs_dev_extent */
11313 static int check_devices(struct rb_root *dev_cache,
11314 struct device_extent_tree *dev_extent_cache)
11316 struct rb_node *dev_node;
11317 struct device_record *dev_rec;
11318 struct device_extent_record *dext_rec;
11322 dev_node = rb_first(dev_cache);
11324 dev_rec = container_of(dev_node, struct device_record, node);
11325 err = check_device_used(dev_rec, dev_extent_cache);
11329 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11330 global_info->sectorsize);
11331 dev_node = rb_next(dev_node);
11333 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11336 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11337 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11344 static int add_root_item_to_list(struct list_head *head,
11345 u64 objectid, u64 bytenr, u64 last_snapshot,
11346 u8 level, u8 drop_level,
11347 struct btrfs_key *drop_key)
11350 struct root_item_record *ri_rec;
11351 ri_rec = malloc(sizeof(*ri_rec));
11354 ri_rec->bytenr = bytenr;
11355 ri_rec->objectid = objectid;
11356 ri_rec->level = level;
11357 ri_rec->drop_level = drop_level;
11358 ri_rec->last_snapshot = last_snapshot;
11360 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11361 list_add_tail(&ri_rec->list, head);
11366 static void free_root_item_list(struct list_head *list)
11368 struct root_item_record *ri_rec;
11370 while (!list_empty(list)) {
11371 ri_rec = list_first_entry(list, struct root_item_record,
11373 list_del_init(&ri_rec->list);
11378 static int deal_root_from_list(struct list_head *list,
11379 struct btrfs_root *root,
11380 struct block_info *bits,
11382 struct cache_tree *pending,
11383 struct cache_tree *seen,
11384 struct cache_tree *reada,
11385 struct cache_tree *nodes,
11386 struct cache_tree *extent_cache,
11387 struct cache_tree *chunk_cache,
11388 struct rb_root *dev_cache,
11389 struct block_group_tree *block_group_cache,
11390 struct device_extent_tree *dev_extent_cache)
11395 while (!list_empty(list)) {
11396 struct root_item_record *rec;
11397 struct extent_buffer *buf;
11398 rec = list_entry(list->next,
11399 struct root_item_record, list);
11401 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11402 if (!extent_buffer_uptodate(buf)) {
11403 free_extent_buffer(buf);
11407 ret = add_root_to_pending(buf, extent_cache, pending,
11408 seen, nodes, rec->objectid);
11412 * To rebuild extent tree, we need deal with snapshot
11413 * one by one, otherwise we deal with node firstly which
11414 * can maximize readahead.
11417 ret = run_next_block(root, bits, bits_nr, &last,
11418 pending, seen, reada, nodes,
11419 extent_cache, chunk_cache,
11420 dev_cache, block_group_cache,
11421 dev_extent_cache, rec);
11425 free_extent_buffer(buf);
11426 list_del(&rec->list);
11432 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11433 reada, nodes, extent_cache, chunk_cache,
11434 dev_cache, block_group_cache,
11435 dev_extent_cache, NULL);
11445 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11447 struct rb_root dev_cache;
11448 struct cache_tree chunk_cache;
11449 struct block_group_tree block_group_cache;
11450 struct device_extent_tree dev_extent_cache;
11451 struct cache_tree extent_cache;
11452 struct cache_tree seen;
11453 struct cache_tree pending;
11454 struct cache_tree reada;
11455 struct cache_tree nodes;
11456 struct extent_io_tree excluded_extents;
11457 struct cache_tree corrupt_blocks;
11458 struct btrfs_path path;
11459 struct btrfs_key key;
11460 struct btrfs_key found_key;
11462 struct block_info *bits;
11464 struct extent_buffer *leaf;
11466 struct btrfs_root_item ri;
11467 struct list_head dropping_trees;
11468 struct list_head normal_trees;
11469 struct btrfs_root *root1;
11470 struct btrfs_root *root;
11474 root = fs_info->fs_root;
11475 dev_cache = RB_ROOT;
11476 cache_tree_init(&chunk_cache);
11477 block_group_tree_init(&block_group_cache);
11478 device_extent_tree_init(&dev_extent_cache);
11480 cache_tree_init(&extent_cache);
11481 cache_tree_init(&seen);
11482 cache_tree_init(&pending);
11483 cache_tree_init(&nodes);
11484 cache_tree_init(&reada);
11485 cache_tree_init(&corrupt_blocks);
11486 extent_io_tree_init(&excluded_extents);
11487 INIT_LIST_HEAD(&dropping_trees);
11488 INIT_LIST_HEAD(&normal_trees);
11491 fs_info->excluded_extents = &excluded_extents;
11492 fs_info->fsck_extent_cache = &extent_cache;
11493 fs_info->free_extent_hook = free_extent_hook;
11494 fs_info->corrupt_blocks = &corrupt_blocks;
11498 bits = malloc(bits_nr * sizeof(struct block_info));
11504 if (ctx.progress_enabled) {
11505 ctx.tp = TASK_EXTENTS;
11506 task_start(ctx.info);
11510 root1 = fs_info->tree_root;
11511 level = btrfs_header_level(root1->node);
11512 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11513 root1->node->start, 0, level, 0, NULL);
11516 root1 = fs_info->chunk_root;
11517 level = btrfs_header_level(root1->node);
11518 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11519 root1->node->start, 0, level, 0, NULL);
11522 btrfs_init_path(&path);
11525 key.type = BTRFS_ROOT_ITEM_KEY;
11526 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11530 leaf = path.nodes[0];
11531 slot = path.slots[0];
11532 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11533 ret = btrfs_next_leaf(root, &path);
11536 leaf = path.nodes[0];
11537 slot = path.slots[0];
11539 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11540 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11541 unsigned long offset;
11544 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11545 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11546 last_snapshot = btrfs_root_last_snapshot(&ri);
11547 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11548 level = btrfs_root_level(&ri);
11549 ret = add_root_item_to_list(&normal_trees,
11550 found_key.objectid,
11551 btrfs_root_bytenr(&ri),
11552 last_snapshot, level,
11557 level = btrfs_root_level(&ri);
11558 objectid = found_key.objectid;
11559 btrfs_disk_key_to_cpu(&found_key,
11560 &ri.drop_progress);
11561 ret = add_root_item_to_list(&dropping_trees,
11563 btrfs_root_bytenr(&ri),
11564 last_snapshot, level,
11565 ri.drop_level, &found_key);
11572 btrfs_release_path(&path);
11575 * check_block can return -EAGAIN if it fixes something, please keep
11576 * this in mind when dealing with return values from these functions, if
11577 * we get -EAGAIN we want to fall through and restart the loop.
11579 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11580 &seen, &reada, &nodes, &extent_cache,
11581 &chunk_cache, &dev_cache, &block_group_cache,
11582 &dev_extent_cache);
11584 if (ret == -EAGAIN)
11588 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11589 &pending, &seen, &reada, &nodes,
11590 &extent_cache, &chunk_cache, &dev_cache,
11591 &block_group_cache, &dev_extent_cache);
11593 if (ret == -EAGAIN)
11598 ret = check_chunks(&chunk_cache, &block_group_cache,
11599 &dev_extent_cache, NULL, NULL, NULL, 0);
11601 if (ret == -EAGAIN)
11606 ret = check_extent_refs(root, &extent_cache);
11608 if (ret == -EAGAIN)
11613 ret = check_devices(&dev_cache, &dev_extent_cache);
11618 task_stop(ctx.info);
11620 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11621 extent_io_tree_cleanup(&excluded_extents);
11622 fs_info->fsck_extent_cache = NULL;
11623 fs_info->free_extent_hook = NULL;
11624 fs_info->corrupt_blocks = NULL;
11625 fs_info->excluded_extents = NULL;
11628 free_chunk_cache_tree(&chunk_cache);
11629 free_device_cache_tree(&dev_cache);
11630 free_block_group_tree(&block_group_cache);
11631 free_device_extent_tree(&dev_extent_cache);
11632 free_extent_cache_tree(&seen);
11633 free_extent_cache_tree(&pending);
11634 free_extent_cache_tree(&reada);
11635 free_extent_cache_tree(&nodes);
11636 free_root_item_list(&normal_trees);
11637 free_root_item_list(&dropping_trees);
11640 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11641 free_extent_cache_tree(&seen);
11642 free_extent_cache_tree(&pending);
11643 free_extent_cache_tree(&reada);
11644 free_extent_cache_tree(&nodes);
11645 free_chunk_cache_tree(&chunk_cache);
11646 free_block_group_tree(&block_group_cache);
11647 free_device_cache_tree(&dev_cache);
11648 free_device_extent_tree(&dev_extent_cache);
11649 free_extent_record_cache(&extent_cache);
11650 free_root_item_list(&normal_trees);
11651 free_root_item_list(&dropping_trees);
11652 extent_io_tree_cleanup(&excluded_extents);
11656 static int check_extent_inline_ref(struct extent_buffer *eb,
11657 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11660 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11663 case BTRFS_TREE_BLOCK_REF_KEY:
11664 case BTRFS_EXTENT_DATA_REF_KEY:
11665 case BTRFS_SHARED_BLOCK_REF_KEY:
11666 case BTRFS_SHARED_DATA_REF_KEY:
11670 error("extent[%llu %u %llu] has unknown ref type: %d",
11671 key->objectid, key->type, key->offset, type);
11672 ret = UNKNOWN_TYPE;
11680 * Check backrefs of a tree block given by @bytenr or @eb.
11682 * @root: the root containing the @bytenr or @eb
11683 * @eb: tree block extent buffer, can be NULL
11684 * @bytenr: bytenr of the tree block to search
11685 * @level: tree level of the tree block
11686 * @owner: owner of the tree block
11688 * Return >0 for any error found and output error message
11689 * Return 0 for no error found
11691 static int check_tree_block_ref(struct btrfs_root *root,
11692 struct extent_buffer *eb, u64 bytenr,
11693 int level, u64 owner, struct node_refs *nrefs)
11695 struct btrfs_key key;
11696 struct btrfs_root *extent_root = root->fs_info->extent_root;
11697 struct btrfs_path path;
11698 struct btrfs_extent_item *ei;
11699 struct btrfs_extent_inline_ref *iref;
11700 struct extent_buffer *leaf;
11705 int root_level = btrfs_header_level(root->node);
11707 u32 nodesize = root->fs_info->nodesize;
11716 btrfs_init_path(&path);
11717 key.objectid = bytenr;
11718 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11719 key.type = BTRFS_METADATA_ITEM_KEY;
11721 key.type = BTRFS_EXTENT_ITEM_KEY;
11722 key.offset = (u64)-1;
11724 /* Search for the backref in extent tree */
11725 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11727 err |= BACKREF_MISSING;
11730 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11732 err |= BACKREF_MISSING;
11736 leaf = path.nodes[0];
11737 slot = path.slots[0];
11738 btrfs_item_key_to_cpu(leaf, &key, slot);
11740 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11742 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11743 skinny_level = (int)key.offset;
11744 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11746 struct btrfs_tree_block_info *info;
11748 info = (struct btrfs_tree_block_info *)(ei + 1);
11749 skinny_level = btrfs_tree_block_level(leaf, info);
11750 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11759 * Due to the feature of shared tree blocks, if the upper node
11760 * is a fs root or shared node, the extent of checked node may
11761 * not be updated until the next CoW.
11764 strict = should_check_extent_strictly(root, nrefs,
11766 if (!(btrfs_extent_flags(leaf, ei) &
11767 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11769 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11770 key.objectid, nodesize,
11771 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11772 err = BACKREF_MISMATCH;
11774 header_gen = btrfs_header_generation(eb);
11775 extent_gen = btrfs_extent_generation(leaf, ei);
11776 if (header_gen != extent_gen) {
11778 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11779 key.objectid, nodesize, header_gen,
11781 err = BACKREF_MISMATCH;
11783 if (level != skinny_level) {
11785 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11786 key.objectid, nodesize, level, skinny_level);
11787 err = BACKREF_MISMATCH;
11789 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11791 "extent[%llu %u] is referred by other roots than %llu",
11792 key.objectid, nodesize, root->objectid);
11793 err = BACKREF_MISMATCH;
11798 * Iterate the extent/metadata item to find the exact backref
11800 item_size = btrfs_item_size_nr(leaf, slot);
11801 ptr = (unsigned long)iref;
11802 end = (unsigned long)ei + item_size;
11804 while (ptr < end) {
11805 iref = (struct btrfs_extent_inline_ref *)ptr;
11806 type = btrfs_extent_inline_ref_type(leaf, iref);
11807 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11809 ret = check_extent_inline_ref(leaf, &key, iref);
11814 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11815 if (offset == root->objectid)
11817 if (!strict && owner == offset)
11819 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11821 * Backref of tree reloc root points to itself, no need
11822 * to check backref any more.
11824 * This may be an error of loop backref, but extent tree
11825 * checker should have already handled it.
11826 * Here we only need to avoid infinite iteration.
11828 if (offset == bytenr) {
11832 * Check if the backref points to valid
11835 found_ref = !check_tree_block_ref( root, NULL,
11836 offset, level + 1, owner,
11843 ptr += btrfs_extent_inline_ref_size(type);
11847 * Inlined extent item doesn't have what we need, check
11848 * TREE_BLOCK_REF_KEY
11851 btrfs_release_path(&path);
11852 key.objectid = bytenr;
11853 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11854 key.offset = root->objectid;
11856 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11861 * Finally check SHARED BLOCK REF, any found will be good
11862 * Here we're not doing comprehensive extent backref checking,
11863 * only need to ensure there is some extent referring to this
11867 btrfs_release_path(&path);
11868 key.objectid = bytenr;
11869 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11870 key.offset = (u64)-1;
11872 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11874 err |= BACKREF_MISSING;
11877 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11879 err |= BACKREF_MISSING;
11885 err |= BACKREF_MISSING;
11887 btrfs_release_path(&path);
11888 if (nrefs && strict &&
11889 level < root_level && nrefs->full_backref[level + 1])
11890 parent = nrefs->bytenr[level + 1];
11891 if (eb && (err & BACKREF_MISSING))
11893 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11894 bytenr, nodesize, owner, level,
11895 parent ? "parent" : "root",
11896 parent ? parent : root->objectid);
11901 * If @err contains BACKREF_MISSING then add extent of the
11902 * file_extent_data_item.
11904 * Returns error bits after reapir.
11906 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11907 struct btrfs_root *root,
11908 struct btrfs_path *pathp,
11909 struct node_refs *nrefs,
11912 struct btrfs_file_extent_item *fi;
11913 struct btrfs_key fi_key;
11914 struct btrfs_key key;
11915 struct btrfs_extent_item *ei;
11916 struct btrfs_path path;
11917 struct btrfs_root *extent_root = root->fs_info->extent_root;
11918 struct extent_buffer *eb;
11930 eb = pathp->nodes[0];
11931 slot = pathp->slots[0];
11932 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11933 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11935 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11936 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11939 file_offset = fi_key.offset;
11940 generation = btrfs_file_extent_generation(eb, fi);
11941 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11942 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11943 extent_offset = btrfs_file_extent_offset(eb, fi);
11944 offset = file_offset - extent_offset;
11946 /* now repair only adds backref */
11947 if ((err & BACKREF_MISSING) == 0)
11950 /* search extent item */
11951 key.objectid = disk_bytenr;
11952 key.type = BTRFS_EXTENT_ITEM_KEY;
11953 key.offset = num_bytes;
11955 btrfs_init_path(&path);
11956 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11962 /* insert an extent item */
11964 key.objectid = disk_bytenr;
11965 key.type = BTRFS_EXTENT_ITEM_KEY;
11966 key.offset = num_bytes;
11967 size = sizeof(*ei);
11969 btrfs_release_path(&path);
11970 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11974 eb = path.nodes[0];
11975 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11977 btrfs_set_extent_refs(eb, ei, 0);
11978 btrfs_set_extent_generation(eb, ei, generation);
11979 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11981 btrfs_mark_buffer_dirty(eb);
11982 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11984 btrfs_release_path(&path);
11987 if (nrefs->full_backref[0])
11988 parent = btrfs_header_bytenr(eb);
11992 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11994 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11998 "failed to increase extent data backref[%llu %llu] root %llu",
11999 disk_bytenr, num_bytes, root->objectid);
12002 printf("Add one extent data backref [%llu %llu]\n",
12003 disk_bytenr, num_bytes);
12006 err &= ~BACKREF_MISSING;
12009 error("can't repair root %llu extent data item[%llu %llu]",
12010 root->objectid, disk_bytenr, num_bytes);
12015 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
12017 * Return >0 any error found and output error message
12018 * Return 0 for no error found
12020 static int check_extent_data_item(struct btrfs_root *root,
12021 struct btrfs_path *pathp,
12022 struct node_refs *nrefs, int account_bytes)
12024 struct btrfs_file_extent_item *fi;
12025 struct extent_buffer *eb = pathp->nodes[0];
12026 struct btrfs_path path;
12027 struct btrfs_root *extent_root = root->fs_info->extent_root;
12028 struct btrfs_key fi_key;
12029 struct btrfs_key dbref_key;
12030 struct extent_buffer *leaf;
12031 struct btrfs_extent_item *ei;
12032 struct btrfs_extent_inline_ref *iref;
12033 struct btrfs_extent_data_ref *dref;
12036 u64 disk_num_bytes;
12037 u64 extent_num_bytes;
12044 int found_dbackref = 0;
12045 int slot = pathp->slots[0];
12050 btrfs_item_key_to_cpu(eb, &fi_key, slot);
12051 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12053 /* Nothing to check for hole and inline data extents */
12054 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12055 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12058 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12059 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12060 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12061 offset = btrfs_file_extent_offset(eb, fi);
12063 /* Check unaligned disk_num_bytes and num_bytes */
12064 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12066 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12067 fi_key.objectid, fi_key.offset, disk_num_bytes,
12068 root->fs_info->sectorsize);
12069 err |= BYTES_UNALIGNED;
12070 } else if (account_bytes) {
12071 data_bytes_allocated += disk_num_bytes;
12073 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12075 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12076 fi_key.objectid, fi_key.offset, extent_num_bytes,
12077 root->fs_info->sectorsize);
12078 err |= BYTES_UNALIGNED;
12079 } else if (account_bytes) {
12080 data_bytes_referenced += extent_num_bytes;
12082 owner = btrfs_header_owner(eb);
12084 /* Check the extent item of the file extent in extent tree */
12085 btrfs_init_path(&path);
12086 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12087 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12088 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12090 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12094 leaf = path.nodes[0];
12095 slot = path.slots[0];
12096 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12098 extent_flags = btrfs_extent_flags(leaf, ei);
12100 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12102 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12103 disk_bytenr, disk_num_bytes,
12104 BTRFS_EXTENT_FLAG_DATA);
12105 err |= BACKREF_MISMATCH;
12108 /* Check data backref inside that extent item */
12109 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12110 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12111 ptr = (unsigned long)iref;
12112 end = (unsigned long)ei + item_size;
12113 strict = should_check_extent_strictly(root, nrefs, -1);
12115 while (ptr < end) {
12119 bool match = false;
12121 iref = (struct btrfs_extent_inline_ref *)ptr;
12122 type = btrfs_extent_inline_ref_type(leaf, iref);
12123 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12125 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12130 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12131 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12132 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12133 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12135 if (ref_objectid == fi_key.objectid &&
12136 ref_offset == fi_key.offset - offset)
12138 if (ref_root == root->objectid && match)
12139 found_dbackref = 1;
12140 else if (!strict && owner == ref_root && match)
12141 found_dbackref = 1;
12142 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12143 found_dbackref = !check_tree_block_ref(root, NULL,
12144 btrfs_extent_inline_ref_offset(leaf, iref),
12148 if (found_dbackref)
12150 ptr += btrfs_extent_inline_ref_size(type);
12153 if (!found_dbackref) {
12154 btrfs_release_path(&path);
12156 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12157 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12158 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12159 dbref_key.offset = hash_extent_data_ref(root->objectid,
12160 fi_key.objectid, fi_key.offset - offset);
12162 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12163 &dbref_key, &path, 0, 0);
12165 found_dbackref = 1;
12169 btrfs_release_path(&path);
12172 * Neither inlined nor EXTENT_DATA_REF found, try
12173 * SHARED_DATA_REF as last chance.
12175 dbref_key.objectid = disk_bytenr;
12176 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12177 dbref_key.offset = eb->start;
12179 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12180 &dbref_key, &path, 0, 0);
12182 found_dbackref = 1;
12188 if (!found_dbackref)
12189 err |= BACKREF_MISSING;
12190 btrfs_release_path(&path);
12191 if (err & BACKREF_MISSING) {
12192 error("data extent[%llu %llu] backref lost",
12193 disk_bytenr, disk_num_bytes);
12199 * Get real tree block level for the case like shared block
12200 * Return >= 0 as tree level
12201 * Return <0 for error
12203 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12205 struct extent_buffer *eb;
12206 struct btrfs_path path;
12207 struct btrfs_key key;
12208 struct btrfs_extent_item *ei;
12215 /* Search extent tree for extent generation and level */
12216 key.objectid = bytenr;
12217 key.type = BTRFS_METADATA_ITEM_KEY;
12218 key.offset = (u64)-1;
12220 btrfs_init_path(&path);
12221 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12224 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12232 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12233 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12234 struct btrfs_extent_item);
12235 flags = btrfs_extent_flags(path.nodes[0], ei);
12236 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12241 /* Get transid for later read_tree_block() check */
12242 transid = btrfs_extent_generation(path.nodes[0], ei);
12244 /* Get backref level as one source */
12245 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12246 backref_level = key.offset;
12248 struct btrfs_tree_block_info *info;
12250 info = (struct btrfs_tree_block_info *)(ei + 1);
12251 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12253 btrfs_release_path(&path);
12255 /* Get level from tree block as an alternative source */
12256 eb = read_tree_block(fs_info, bytenr, transid);
12257 if (!extent_buffer_uptodate(eb)) {
12258 free_extent_buffer(eb);
12261 header_level = btrfs_header_level(eb);
12262 free_extent_buffer(eb);
12264 if (header_level != backref_level)
12266 return header_level;
12269 btrfs_release_path(&path);
12274 * Check if a tree block backref is valid (points to a valid tree block)
12275 * if level == -1, level will be resolved
12276 * Return >0 for any error found and print error message
12278 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12279 u64 bytenr, int level)
12281 struct btrfs_root *root;
12282 struct btrfs_key key;
12283 struct btrfs_path path;
12284 struct extent_buffer *eb;
12285 struct extent_buffer *node;
12286 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12290 /* Query level for level == -1 special case */
12292 level = query_tree_block_level(fs_info, bytenr);
12294 err |= REFERENCER_MISSING;
12298 key.objectid = root_id;
12299 key.type = BTRFS_ROOT_ITEM_KEY;
12300 key.offset = (u64)-1;
12302 root = btrfs_read_fs_root(fs_info, &key);
12303 if (IS_ERR(root)) {
12304 err |= REFERENCER_MISSING;
12308 /* Read out the tree block to get item/node key */
12309 eb = read_tree_block(fs_info, bytenr, 0);
12310 if (!extent_buffer_uptodate(eb)) {
12311 err |= REFERENCER_MISSING;
12312 free_extent_buffer(eb);
12316 /* Empty tree, no need to check key */
12317 if (!btrfs_header_nritems(eb) && !level) {
12318 free_extent_buffer(eb);
12323 btrfs_node_key_to_cpu(eb, &key, 0);
12325 btrfs_item_key_to_cpu(eb, &key, 0);
12327 free_extent_buffer(eb);
12329 btrfs_init_path(&path);
12330 path.lowest_level = level;
12331 /* Search with the first key, to ensure we can reach it */
12332 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12334 err |= REFERENCER_MISSING;
12338 node = path.nodes[level];
12339 if (btrfs_header_bytenr(node) != bytenr) {
12341 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12342 bytenr, nodesize, bytenr,
12343 btrfs_header_bytenr(node));
12344 err |= REFERENCER_MISMATCH;
12346 if (btrfs_header_level(node) != level) {
12348 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12349 bytenr, nodesize, level,
12350 btrfs_header_level(node));
12351 err |= REFERENCER_MISMATCH;
12355 btrfs_release_path(&path);
12357 if (err & REFERENCER_MISSING) {
12359 error("extent [%llu %d] lost referencer (owner: %llu)",
12360 bytenr, nodesize, root_id);
12363 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12364 bytenr, nodesize, root_id, level);
12371 * Check if tree block @eb is tree reloc root.
12372 * Return 0 if it's not or any problem happens
12373 * Return 1 if it's a tree reloc root
12375 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12376 struct extent_buffer *eb)
12378 struct btrfs_root *tree_reloc_root;
12379 struct btrfs_key key;
12380 u64 bytenr = btrfs_header_bytenr(eb);
12381 u64 owner = btrfs_header_owner(eb);
12384 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12385 key.offset = owner;
12386 key.type = BTRFS_ROOT_ITEM_KEY;
12388 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12389 if (IS_ERR(tree_reloc_root))
12392 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12394 btrfs_free_fs_root(tree_reloc_root);
12399 * Check referencer for shared block backref
12400 * If level == -1, this function will resolve the level.
12402 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12403 u64 parent, u64 bytenr, int level)
12405 struct extent_buffer *eb;
12407 int found_parent = 0;
12410 eb = read_tree_block(fs_info, parent, 0);
12411 if (!extent_buffer_uptodate(eb))
12415 level = query_tree_block_level(fs_info, bytenr);
12419 /* It's possible it's a tree reloc root */
12420 if (parent == bytenr) {
12421 if (is_tree_reloc_root(fs_info, eb))
12426 if (level + 1 != btrfs_header_level(eb))
12429 nr = btrfs_header_nritems(eb);
12430 for (i = 0; i < nr; i++) {
12431 if (bytenr == btrfs_node_blockptr(eb, i)) {
12437 free_extent_buffer(eb);
12438 if (!found_parent) {
12440 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12441 bytenr, fs_info->nodesize, parent, level);
12442 return REFERENCER_MISSING;
12448 * Check referencer for normal (inlined) data ref
12449 * If len == 0, it will be resolved by searching in extent tree
12451 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12452 u64 root_id, u64 objectid, u64 offset,
12453 u64 bytenr, u64 len, u32 count)
12455 struct btrfs_root *root;
12456 struct btrfs_root *extent_root = fs_info->extent_root;
12457 struct btrfs_key key;
12458 struct btrfs_path path;
12459 struct extent_buffer *leaf;
12460 struct btrfs_file_extent_item *fi;
12461 u32 found_count = 0;
12466 key.objectid = bytenr;
12467 key.type = BTRFS_EXTENT_ITEM_KEY;
12468 key.offset = (u64)-1;
12470 btrfs_init_path(&path);
12471 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12474 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12477 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12478 if (key.objectid != bytenr ||
12479 key.type != BTRFS_EXTENT_ITEM_KEY)
12482 btrfs_release_path(&path);
12484 key.objectid = root_id;
12485 key.type = BTRFS_ROOT_ITEM_KEY;
12486 key.offset = (u64)-1;
12487 btrfs_init_path(&path);
12489 root = btrfs_read_fs_root(fs_info, &key);
12493 key.objectid = objectid;
12494 key.type = BTRFS_EXTENT_DATA_KEY;
12496 * It can be nasty as data backref offset is
12497 * file offset - file extent offset, which is smaller or
12498 * equal to original backref offset. The only special case is
12499 * overflow. So we need to special check and do further search.
12501 key.offset = offset & (1ULL << 63) ? 0 : offset;
12503 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12508 * Search afterwards to get correct one
12509 * NOTE: As we must do a comprehensive check on the data backref to
12510 * make sure the dref count also matches, we must iterate all file
12511 * extents for that inode.
12514 leaf = path.nodes[0];
12515 slot = path.slots[0];
12517 if (slot >= btrfs_header_nritems(leaf) ||
12518 btrfs_header_owner(leaf) != root_id)
12520 btrfs_item_key_to_cpu(leaf, &key, slot);
12521 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12525 * Except normal disk bytenr and disk num bytes, we still
12526 * need to do extra check on dbackref offset as
12527 * dbackref offset = file_offset - file_extent_offset
12529 * Also, we must check the leaf owner.
12530 * In case of shared tree blocks (snapshots) we can inherit
12531 * leaves from source snapshot.
12532 * In that case, reference from source snapshot should not
12535 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12536 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12537 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12538 offset && btrfs_header_owner(leaf) == root_id)
12542 ret = btrfs_next_item(root, &path);
12547 btrfs_release_path(&path);
12548 if (found_count != count) {
12550 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12551 bytenr, len, root_id, objectid, offset, count, found_count);
12552 return REFERENCER_MISSING;
12558 * Check if the referencer of a shared data backref exists
12560 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12561 u64 parent, u64 bytenr)
12563 struct extent_buffer *eb;
12564 struct btrfs_key key;
12565 struct btrfs_file_extent_item *fi;
12567 int found_parent = 0;
12570 eb = read_tree_block(fs_info, parent, 0);
12571 if (!extent_buffer_uptodate(eb))
12574 nr = btrfs_header_nritems(eb);
12575 for (i = 0; i < nr; i++) {
12576 btrfs_item_key_to_cpu(eb, &key, i);
12577 if (key.type != BTRFS_EXTENT_DATA_KEY)
12580 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12581 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12584 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12591 free_extent_buffer(eb);
12592 if (!found_parent) {
12593 error("shared extent %llu referencer lost (parent: %llu)",
12595 return REFERENCER_MISSING;
12601 * Only delete backref if REFERENCER_MISSING now
12603 * Returns <0 the extent was deleted
12604 * Returns >0 the backref was deleted but extent still exists, returned value
12605 * means error after repair
12606 * Returns 0 nothing happened
12608 static int repair_extent_item(struct btrfs_trans_handle *trans,
12609 struct btrfs_root *root, struct btrfs_path *path,
12610 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12611 u64 owner, u64 offset, int err)
12613 struct btrfs_key old_key;
12617 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12619 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12620 /* delete the backref */
12621 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12622 num_bytes, parent, root_objectid, owner, offset);
12625 err &= ~REFERENCER_MISSING;
12626 printf("Delete backref in extent [%llu %llu]\n",
12627 bytenr, num_bytes);
12629 error("fail to delete backref in extent [%llu %llu]",
12630 bytenr, num_bytes);
12634 /* btrfs_free_extent may delete the extent */
12635 btrfs_release_path(path);
12636 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12646 * This function will check a given extent item, including its backref and
12647 * itself (like crossing stripe boundary and type)
12649 * Since we don't use extent_record anymore, introduce new error bit
12651 static int check_extent_item(struct btrfs_trans_handle *trans,
12652 struct btrfs_fs_info *fs_info,
12653 struct btrfs_path *path)
12655 struct btrfs_extent_item *ei;
12656 struct btrfs_extent_inline_ref *iref;
12657 struct btrfs_extent_data_ref *dref;
12658 struct extent_buffer *eb = path->nodes[0];
12661 int slot = path->slots[0];
12663 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12664 u32 item_size = btrfs_item_size_nr(eb, slot);
12674 struct btrfs_key key;
12678 btrfs_item_key_to_cpu(eb, &key, slot);
12679 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12680 bytes_used += key.offset;
12681 num_bytes = key.offset;
12683 bytes_used += nodesize;
12684 num_bytes = nodesize;
12687 if (item_size < sizeof(*ei)) {
12689 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12690 * old thing when on disk format is still un-determined.
12691 * No need to care about it anymore
12693 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12697 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12698 flags = btrfs_extent_flags(eb, ei);
12700 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12702 if (metadata && check_crossing_stripes(global_info, key.objectid,
12704 error("bad metadata [%llu, %llu) crossing stripe boundary",
12705 key.objectid, key.objectid + nodesize);
12706 err |= CROSSING_STRIPE_BOUNDARY;
12709 ptr = (unsigned long)(ei + 1);
12711 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12712 /* Old EXTENT_ITEM metadata */
12713 struct btrfs_tree_block_info *info;
12715 info = (struct btrfs_tree_block_info *)ptr;
12716 level = btrfs_tree_block_level(eb, info);
12717 ptr += sizeof(struct btrfs_tree_block_info);
12719 /* New METADATA_ITEM */
12720 level = key.offset;
12722 end = (unsigned long)ei + item_size;
12725 /* Reached extent item end normally */
12729 /* Beyond extent item end, wrong item size */
12731 err |= ITEM_SIZE_MISMATCH;
12732 error("extent item at bytenr %llu slot %d has wrong size",
12741 /* Now check every backref in this extent item */
12742 iref = (struct btrfs_extent_inline_ref *)ptr;
12743 type = btrfs_extent_inline_ref_type(eb, iref);
12744 offset = btrfs_extent_inline_ref_offset(eb, iref);
12746 case BTRFS_TREE_BLOCK_REF_KEY:
12747 root_objectid = offset;
12749 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12753 case BTRFS_SHARED_BLOCK_REF_KEY:
12755 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12759 case BTRFS_EXTENT_DATA_REF_KEY:
12760 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12761 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12762 owner = btrfs_extent_data_ref_objectid(eb, dref);
12763 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12764 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12765 owner_offset, key.objectid, key.offset,
12766 btrfs_extent_data_ref_count(eb, dref));
12769 case BTRFS_SHARED_DATA_REF_KEY:
12771 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12775 error("extent[%llu %d %llu] has unknown ref type: %d",
12776 key.objectid, key.type, key.offset, type);
12777 ret = UNKNOWN_TYPE;
12782 if (err && repair) {
12783 ret = repair_extent_item(trans, fs_info->extent_root, path,
12784 key.objectid, num_bytes, parent, root_objectid,
12785 owner, owner_offset, ret);
12794 ptr += btrfs_extent_inline_ref_size(type);
12802 * Check if a dev extent item is referred correctly by its chunk
12804 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12805 struct extent_buffer *eb, int slot)
12807 struct btrfs_root *chunk_root = fs_info->chunk_root;
12808 struct btrfs_dev_extent *ptr;
12809 struct btrfs_path path;
12810 struct btrfs_key chunk_key;
12811 struct btrfs_key devext_key;
12812 struct btrfs_chunk *chunk;
12813 struct extent_buffer *l;
12817 int found_chunk = 0;
12820 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12821 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12822 length = btrfs_dev_extent_length(eb, ptr);
12824 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12825 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12826 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12828 btrfs_init_path(&path);
12829 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12834 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12835 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12840 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12843 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12844 for (i = 0; i < num_stripes; i++) {
12845 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12846 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12848 if (devid == devext_key.objectid &&
12849 offset == devext_key.offset) {
12855 btrfs_release_path(&path);
12856 if (!found_chunk) {
12858 "device extent[%llu, %llu, %llu] did not find the related chunk",
12859 devext_key.objectid, devext_key.offset, length);
12860 return REFERENCER_MISSING;
12866 * Check if the used space is correct with the dev item
12868 static int check_dev_item(struct btrfs_fs_info *fs_info,
12869 struct extent_buffer *eb, int slot)
12871 struct btrfs_root *dev_root = fs_info->dev_root;
12872 struct btrfs_dev_item *dev_item;
12873 struct btrfs_path path;
12874 struct btrfs_key key;
12875 struct btrfs_dev_extent *ptr;
12882 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12883 dev_id = btrfs_device_id(eb, dev_item);
12884 used = btrfs_device_bytes_used(eb, dev_item);
12885 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12887 key.objectid = dev_id;
12888 key.type = BTRFS_DEV_EXTENT_KEY;
12891 btrfs_init_path(&path);
12892 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12894 btrfs_item_key_to_cpu(eb, &key, slot);
12895 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12896 key.objectid, key.type, key.offset);
12897 btrfs_release_path(&path);
12898 return REFERENCER_MISSING;
12901 /* Iterate dev_extents to calculate the used space of a device */
12903 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12906 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12907 if (key.objectid > dev_id)
12909 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12912 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12913 struct btrfs_dev_extent);
12914 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12916 ret = btrfs_next_item(dev_root, &path);
12920 btrfs_release_path(&path);
12922 if (used != total) {
12923 btrfs_item_key_to_cpu(eb, &key, slot);
12925 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12926 total, used, BTRFS_ROOT_TREE_OBJECTID,
12927 BTRFS_DEV_EXTENT_KEY, dev_id);
12928 return ACCOUNTING_MISMATCH;
12930 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12936 * Check a block group item with its referener (chunk) and its used space
12937 * with extent/metadata item
12939 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12940 struct extent_buffer *eb, int slot)
12942 struct btrfs_root *extent_root = fs_info->extent_root;
12943 struct btrfs_root *chunk_root = fs_info->chunk_root;
12944 struct btrfs_block_group_item *bi;
12945 struct btrfs_block_group_item bg_item;
12946 struct btrfs_path path;
12947 struct btrfs_key bg_key;
12948 struct btrfs_key chunk_key;
12949 struct btrfs_key extent_key;
12950 struct btrfs_chunk *chunk;
12951 struct extent_buffer *leaf;
12952 struct btrfs_extent_item *ei;
12953 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12961 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12962 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12963 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12964 used = btrfs_block_group_used(&bg_item);
12965 bg_flags = btrfs_block_group_flags(&bg_item);
12967 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12968 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12969 chunk_key.offset = bg_key.objectid;
12971 btrfs_init_path(&path);
12972 /* Search for the referencer chunk */
12973 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12976 "block group[%llu %llu] did not find the related chunk item",
12977 bg_key.objectid, bg_key.offset);
12978 err |= REFERENCER_MISSING;
12980 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12981 struct btrfs_chunk);
12982 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12985 "block group[%llu %llu] related chunk item length does not match",
12986 bg_key.objectid, bg_key.offset);
12987 err |= REFERENCER_MISMATCH;
12990 btrfs_release_path(&path);
12992 /* Search from the block group bytenr */
12993 extent_key.objectid = bg_key.objectid;
12994 extent_key.type = 0;
12995 extent_key.offset = 0;
12997 btrfs_init_path(&path);
12998 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
13002 /* Iterate extent tree to account used space */
13004 leaf = path.nodes[0];
13006 /* Search slot can point to the last item beyond leaf nritems */
13007 if (path.slots[0] >= btrfs_header_nritems(leaf))
13010 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
13011 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
13014 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
13015 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
13017 if (extent_key.objectid < bg_key.objectid)
13020 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
13023 total += extent_key.offset;
13025 ei = btrfs_item_ptr(leaf, path.slots[0],
13026 struct btrfs_extent_item);
13027 flags = btrfs_extent_flags(leaf, ei);
13028 if (flags & BTRFS_EXTENT_FLAG_DATA) {
13029 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
13031 "bad extent[%llu, %llu) type mismatch with chunk",
13032 extent_key.objectid,
13033 extent_key.objectid + extent_key.offset);
13034 err |= CHUNK_TYPE_MISMATCH;
13036 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13037 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13038 BTRFS_BLOCK_GROUP_METADATA))) {
13040 "bad extent[%llu, %llu) type mismatch with chunk",
13041 extent_key.objectid,
13042 extent_key.objectid + nodesize);
13043 err |= CHUNK_TYPE_MISMATCH;
13047 ret = btrfs_next_item(extent_root, &path);
13053 btrfs_release_path(&path);
13055 if (total != used) {
13057 "block group[%llu %llu] used %llu but extent items used %llu",
13058 bg_key.objectid, bg_key.offset, used, total);
13059 err |= BG_ACCOUNTING_ERROR;
13065 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13066 * FIXME: We still need to repair error of dev_item.
13068 * Returns error after repair.
13070 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13071 struct btrfs_root *chunk_root,
13072 struct btrfs_path *path, int err)
13074 struct btrfs_chunk *chunk;
13075 struct btrfs_key chunk_key;
13076 struct extent_buffer *eb = path->nodes[0];
13078 int slot = path->slots[0];
13082 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13083 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13085 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13086 type = btrfs_chunk_type(path->nodes[0], chunk);
13087 length = btrfs_chunk_length(eb, chunk);
13089 if (err & REFERENCER_MISSING) {
13090 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13091 type, chunk_key.offset, length);
13093 error("fail to add block group item[%llu %llu]",
13094 chunk_key.offset, length);
13097 err &= ~REFERENCER_MISSING;
13098 printf("Added block group item[%llu %llu]\n",
13099 chunk_key.offset, length);
13108 * Check a chunk item.
13109 * Including checking all referred dev_extents and block group
13111 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13112 struct extent_buffer *eb, int slot)
13114 struct btrfs_root *extent_root = fs_info->extent_root;
13115 struct btrfs_root *dev_root = fs_info->dev_root;
13116 struct btrfs_path path;
13117 struct btrfs_key chunk_key;
13118 struct btrfs_key bg_key;
13119 struct btrfs_key devext_key;
13120 struct btrfs_chunk *chunk;
13121 struct extent_buffer *leaf;
13122 struct btrfs_block_group_item *bi;
13123 struct btrfs_block_group_item bg_item;
13124 struct btrfs_dev_extent *ptr;
13136 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13137 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13138 length = btrfs_chunk_length(eb, chunk);
13139 chunk_end = chunk_key.offset + length;
13140 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13143 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13145 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13148 type = btrfs_chunk_type(eb, chunk);
13150 bg_key.objectid = chunk_key.offset;
13151 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13152 bg_key.offset = length;
13154 btrfs_init_path(&path);
13155 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13158 "chunk[%llu %llu) did not find the related block group item",
13159 chunk_key.offset, chunk_end);
13160 err |= REFERENCER_MISSING;
13162 leaf = path.nodes[0];
13163 bi = btrfs_item_ptr(leaf, path.slots[0],
13164 struct btrfs_block_group_item);
13165 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13167 if (btrfs_block_group_flags(&bg_item) != type) {
13169 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13170 chunk_key.offset, chunk_end, type,
13171 btrfs_block_group_flags(&bg_item));
13172 err |= REFERENCER_MISSING;
13176 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13177 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13178 for (i = 0; i < num_stripes; i++) {
13179 btrfs_release_path(&path);
13180 btrfs_init_path(&path);
13181 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13182 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13183 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13185 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13188 goto not_match_dev;
13190 leaf = path.nodes[0];
13191 ptr = btrfs_item_ptr(leaf, path.slots[0],
13192 struct btrfs_dev_extent);
13193 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13194 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13195 if (objectid != chunk_key.objectid ||
13196 offset != chunk_key.offset ||
13197 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13198 goto not_match_dev;
13201 err |= BACKREF_MISSING;
13203 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13204 chunk_key.objectid, chunk_end, i);
13207 btrfs_release_path(&path);
13212 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13213 struct btrfs_root *root,
13214 struct btrfs_path *path)
13216 struct btrfs_key key;
13219 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13220 btrfs_release_path(path);
13221 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13227 ret = btrfs_del_item(trans, root, path);
13231 if (path->slots[0] == 0)
13232 btrfs_prev_leaf(root, path);
13237 error("failed to delete root %llu item[%llu, %u, %llu]",
13238 root->objectid, key.objectid, key.type, key.offset);
13240 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13241 root->objectid, key.objectid, key.type, key.offset);
13246 * Main entry function to check known items and update related accounting info
13248 static int check_leaf_items(struct btrfs_trans_handle *trans,
13249 struct btrfs_root *root, struct btrfs_path *path,
13250 struct node_refs *nrefs, int account_bytes)
13252 struct btrfs_fs_info *fs_info = root->fs_info;
13253 struct btrfs_key key;
13254 struct extent_buffer *eb;
13257 struct btrfs_extent_data_ref *dref;
13262 eb = path->nodes[0];
13263 slot = path->slots[0];
13264 if (slot >= btrfs_header_nritems(eb)) {
13266 error("empty leaf [%llu %u] root %llu", eb->start,
13267 root->fs_info->nodesize, root->objectid);
13273 btrfs_item_key_to_cpu(eb, &key, slot);
13277 case BTRFS_EXTENT_DATA_KEY:
13278 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13280 ret = repair_extent_data_item(trans, root, path, nrefs,
13284 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13285 ret = check_block_group_item(fs_info, eb, slot);
13287 ret & REFERENCER_MISSING)
13288 ret = delete_extent_tree_item(trans, root, path);
13291 case BTRFS_DEV_ITEM_KEY:
13292 ret = check_dev_item(fs_info, eb, slot);
13295 case BTRFS_CHUNK_ITEM_KEY:
13296 ret = check_chunk_item(fs_info, eb, slot);
13298 ret = repair_chunk_item(trans, root, path, ret);
13301 case BTRFS_DEV_EXTENT_KEY:
13302 ret = check_dev_extent_item(fs_info, eb, slot);
13305 case BTRFS_EXTENT_ITEM_KEY:
13306 case BTRFS_METADATA_ITEM_KEY:
13307 ret = check_extent_item(trans, fs_info, path);
13310 case BTRFS_EXTENT_CSUM_KEY:
13311 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13314 case BTRFS_TREE_BLOCK_REF_KEY:
13315 ret = check_tree_block_backref(fs_info, key.offset,
13318 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13319 ret = delete_extent_tree_item(trans, root, path);
13322 case BTRFS_EXTENT_DATA_REF_KEY:
13323 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13324 ret = check_extent_data_backref(fs_info,
13325 btrfs_extent_data_ref_root(eb, dref),
13326 btrfs_extent_data_ref_objectid(eb, dref),
13327 btrfs_extent_data_ref_offset(eb, dref),
13329 btrfs_extent_data_ref_count(eb, dref));
13331 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13332 ret = delete_extent_tree_item(trans, root, path);
13335 case BTRFS_SHARED_BLOCK_REF_KEY:
13336 ret = check_shared_block_backref(fs_info, key.offset,
13339 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13340 ret = delete_extent_tree_item(trans, root, path);
13343 case BTRFS_SHARED_DATA_REF_KEY:
13344 ret = check_shared_data_backref(fs_info, key.offset,
13347 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13348 ret = delete_extent_tree_item(trans, root, path);
13362 * Low memory usage version check_chunks_and_extents.
13364 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13366 struct btrfs_trans_handle *trans = NULL;
13367 struct btrfs_path path;
13368 struct btrfs_key old_key;
13369 struct btrfs_key key;
13370 struct btrfs_root *root1;
13371 struct btrfs_root *root;
13372 struct btrfs_root *cur_root;
13376 root = fs_info->fs_root;
13379 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13380 if (IS_ERR(trans)) {
13381 error("failed to start transaction before check");
13382 return PTR_ERR(trans);
13386 root1 = root->fs_info->chunk_root;
13387 ret = check_btrfs_root(trans, root1, 0, 1);
13390 root1 = root->fs_info->tree_root;
13391 ret = check_btrfs_root(trans, root1, 0, 1);
13394 btrfs_init_path(&path);
13395 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13397 key.type = BTRFS_ROOT_ITEM_KEY;
13399 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13401 error("cannot find extent tree in tree_root");
13406 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13407 if (key.type != BTRFS_ROOT_ITEM_KEY)
13410 key.offset = (u64)-1;
13412 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13413 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13416 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13417 if (IS_ERR(cur_root) || !cur_root) {
13418 error("failed to read tree: %lld", key.objectid);
13422 ret = check_btrfs_root(trans, cur_root, 0, 1);
13425 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13426 btrfs_free_fs_root(cur_root);
13428 btrfs_release_path(&path);
13429 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13430 &old_key, &path, 0, 0);
13434 ret = btrfs_next_item(root1, &path);
13440 /* if repair, update block accounting */
13442 ret = btrfs_fix_block_accounting(trans, root);
13446 err &= ~BG_ACCOUNTING_ERROR;
13450 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13452 btrfs_release_path(&path);
13457 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13461 if (!ctx.progress_enabled)
13462 fprintf(stderr, "checking extents\n");
13463 if (check_mode == CHECK_MODE_LOWMEM)
13464 ret = check_chunks_and_extents_v2(fs_info);
13466 ret = check_chunks_and_extents(fs_info);
13468 /* Also repair device size related problems */
13469 if (repair && !ret) {
13470 ret = btrfs_fix_device_and_super_size(fs_info);
13477 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13478 struct btrfs_root *root, int overwrite)
13480 struct extent_buffer *c;
13481 struct extent_buffer *old = root->node;
13484 struct btrfs_disk_key disk_key = {0,0,0};
13490 extent_buffer_get(c);
13493 c = btrfs_alloc_free_block(trans, root,
13494 root->fs_info->nodesize,
13495 root->root_key.objectid,
13496 &disk_key, level, 0, 0);
13499 extent_buffer_get(c);
13503 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13504 btrfs_set_header_level(c, level);
13505 btrfs_set_header_bytenr(c, c->start);
13506 btrfs_set_header_generation(c, trans->transid);
13507 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13508 btrfs_set_header_owner(c, root->root_key.objectid);
13510 write_extent_buffer(c, root->fs_info->fsid,
13511 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13513 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13514 btrfs_header_chunk_tree_uuid(c),
13517 btrfs_mark_buffer_dirty(c);
13519 * this case can happen in the following case:
13521 * 1.overwrite previous root.
13523 * 2.reinit reloc data root, this is because we skip pin
13524 * down reloc data tree before which means we can allocate
13525 * same block bytenr here.
13527 if (old->start == c->start) {
13528 btrfs_set_root_generation(&root->root_item,
13530 root->root_item.level = btrfs_header_level(root->node);
13531 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13532 &root->root_key, &root->root_item);
13534 free_extent_buffer(c);
13538 free_extent_buffer(old);
13540 add_root_to_dirty_list(root);
13544 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13545 struct extent_buffer *eb, int tree_root)
13547 struct extent_buffer *tmp;
13548 struct btrfs_root_item *ri;
13549 struct btrfs_key key;
13551 int level = btrfs_header_level(eb);
13557 * If we have pinned this block before, don't pin it again.
13558 * This can not only avoid forever loop with broken filesystem
13559 * but also give us some speedups.
13561 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13562 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13565 btrfs_pin_extent(fs_info, eb->start, eb->len);
13567 nritems = btrfs_header_nritems(eb);
13568 for (i = 0; i < nritems; i++) {
13570 btrfs_item_key_to_cpu(eb, &key, i);
13571 if (key.type != BTRFS_ROOT_ITEM_KEY)
13573 /* Skip the extent root and reloc roots */
13574 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13575 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13576 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13578 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13579 bytenr = btrfs_disk_root_bytenr(eb, ri);
13582 * If at any point we start needing the real root we
13583 * will have to build a stump root for the root we are
13584 * in, but for now this doesn't actually use the root so
13585 * just pass in extent_root.
13587 tmp = read_tree_block(fs_info, bytenr, 0);
13588 if (!extent_buffer_uptodate(tmp)) {
13589 fprintf(stderr, "Error reading root block\n");
13592 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13593 free_extent_buffer(tmp);
13597 bytenr = btrfs_node_blockptr(eb, i);
13599 /* If we aren't the tree root don't read the block */
13600 if (level == 1 && !tree_root) {
13601 btrfs_pin_extent(fs_info, bytenr,
13602 fs_info->nodesize);
13606 tmp = read_tree_block(fs_info, bytenr, 0);
13607 if (!extent_buffer_uptodate(tmp)) {
13608 fprintf(stderr, "Error reading tree block\n");
13611 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13612 free_extent_buffer(tmp);
13621 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13625 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13629 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13632 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13634 struct btrfs_block_group_cache *cache;
13635 struct btrfs_path path;
13636 struct extent_buffer *leaf;
13637 struct btrfs_chunk *chunk;
13638 struct btrfs_key key;
13642 btrfs_init_path(&path);
13644 key.type = BTRFS_CHUNK_ITEM_KEY;
13646 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13648 btrfs_release_path(&path);
13653 * We do this in case the block groups were screwed up and had alloc
13654 * bits that aren't actually set on the chunks. This happens with
13655 * restored images every time and could happen in real life I guess.
13657 fs_info->avail_data_alloc_bits = 0;
13658 fs_info->avail_metadata_alloc_bits = 0;
13659 fs_info->avail_system_alloc_bits = 0;
13661 /* First we need to create the in-memory block groups */
13663 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13664 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13666 btrfs_release_path(&path);
13674 leaf = path.nodes[0];
13675 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13676 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13681 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13682 btrfs_add_block_group(fs_info, 0,
13683 btrfs_chunk_type(leaf, chunk), key.offset,
13684 btrfs_chunk_length(leaf, chunk));
13685 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13686 key.offset + btrfs_chunk_length(leaf, chunk));
13691 cache = btrfs_lookup_first_block_group(fs_info, start);
13695 start = cache->key.objectid + cache->key.offset;
13698 btrfs_release_path(&path);
13702 static int reset_balance(struct btrfs_trans_handle *trans,
13703 struct btrfs_fs_info *fs_info)
13705 struct btrfs_root *root = fs_info->tree_root;
13706 struct btrfs_path path;
13707 struct extent_buffer *leaf;
13708 struct btrfs_key key;
13709 int del_slot, del_nr = 0;
13713 btrfs_init_path(&path);
13714 key.objectid = BTRFS_BALANCE_OBJECTID;
13715 key.type = BTRFS_BALANCE_ITEM_KEY;
13717 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13722 goto reinit_data_reloc;
13727 ret = btrfs_del_item(trans, root, &path);
13730 btrfs_release_path(&path);
13732 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13733 key.type = BTRFS_ROOT_ITEM_KEY;
13735 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13739 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13744 ret = btrfs_del_items(trans, root, &path,
13751 btrfs_release_path(&path);
13754 ret = btrfs_search_slot(trans, root, &key, &path,
13761 leaf = path.nodes[0];
13762 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13763 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13765 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13770 del_slot = path.slots[0];
13779 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13783 btrfs_release_path(&path);
13786 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13787 key.type = BTRFS_ROOT_ITEM_KEY;
13788 key.offset = (u64)-1;
13789 root = btrfs_read_fs_root(fs_info, &key);
13790 if (IS_ERR(root)) {
13791 fprintf(stderr, "Error reading data reloc tree\n");
13792 ret = PTR_ERR(root);
13795 record_root_in_trans(trans, root);
13796 ret = btrfs_fsck_reinit_root(trans, root, 0);
13799 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13801 btrfs_release_path(&path);
13805 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13806 struct btrfs_fs_info *fs_info)
13812 * The only reason we don't do this is because right now we're just
13813 * walking the trees we find and pinning down their bytes, we don't look
13814 * at any of the leaves. In order to do mixed groups we'd have to check
13815 * the leaves of any fs roots and pin down the bytes for any file
13816 * extents we find. Not hard but why do it if we don't have to?
13818 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13819 fprintf(stderr, "We don't support re-initing the extent tree "
13820 "for mixed block groups yet, please notify a btrfs "
13821 "developer you want to do this so they can add this "
13822 "functionality.\n");
13827 * first we need to walk all of the trees except the extent tree and pin
13828 * down the bytes that are in use so we don't overwrite any existing
13831 ret = pin_metadata_blocks(fs_info);
13833 fprintf(stderr, "error pinning down used bytes\n");
13838 * Need to drop all the block groups since we're going to recreate all
13841 btrfs_free_block_groups(fs_info);
13842 ret = reset_block_groups(fs_info);
13844 fprintf(stderr, "error resetting the block groups\n");
13848 /* Ok we can allocate now, reinit the extent root */
13849 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13851 fprintf(stderr, "extent root initialization failed\n");
13853 * When the transaction code is updated we should end the
13854 * transaction, but for now progs only knows about commit so
13855 * just return an error.
13861 * Now we have all the in-memory block groups setup so we can make
13862 * allocations properly, and the metadata we care about is safe since we
13863 * pinned all of it above.
13866 struct btrfs_block_group_cache *cache;
13868 cache = btrfs_lookup_first_block_group(fs_info, start);
13871 start = cache->key.objectid + cache->key.offset;
13872 ret = btrfs_insert_item(trans, fs_info->extent_root,
13873 &cache->key, &cache->item,
13874 sizeof(cache->item));
13876 fprintf(stderr, "Error adding block group\n");
13879 btrfs_extent_post_op(trans, fs_info->extent_root);
13882 ret = reset_balance(trans, fs_info);
13884 fprintf(stderr, "error resetting the pending balance\n");
13889 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13891 struct btrfs_path path;
13892 struct btrfs_trans_handle *trans;
13893 struct btrfs_key key;
13896 printf("Recowing metadata block %llu\n", eb->start);
13897 key.objectid = btrfs_header_owner(eb);
13898 key.type = BTRFS_ROOT_ITEM_KEY;
13899 key.offset = (u64)-1;
13901 root = btrfs_read_fs_root(root->fs_info, &key);
13902 if (IS_ERR(root)) {
13903 fprintf(stderr, "Couldn't find owner root %llu\n",
13905 return PTR_ERR(root);
13908 trans = btrfs_start_transaction(root, 1);
13910 return PTR_ERR(trans);
13912 btrfs_init_path(&path);
13913 path.lowest_level = btrfs_header_level(eb);
13914 if (path.lowest_level)
13915 btrfs_node_key_to_cpu(eb, &key, 0);
13917 btrfs_item_key_to_cpu(eb, &key, 0);
13919 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13920 btrfs_commit_transaction(trans, root);
13921 btrfs_release_path(&path);
13925 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13927 struct btrfs_path path;
13928 struct btrfs_trans_handle *trans;
13929 struct btrfs_key key;
13932 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13933 bad->key.type, bad->key.offset);
13934 key.objectid = bad->root_id;
13935 key.type = BTRFS_ROOT_ITEM_KEY;
13936 key.offset = (u64)-1;
13938 root = btrfs_read_fs_root(root->fs_info, &key);
13939 if (IS_ERR(root)) {
13940 fprintf(stderr, "Couldn't find owner root %llu\n",
13942 return PTR_ERR(root);
13945 trans = btrfs_start_transaction(root, 1);
13947 return PTR_ERR(trans);
13949 btrfs_init_path(&path);
13950 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13956 ret = btrfs_del_item(trans, root, &path);
13958 btrfs_commit_transaction(trans, root);
13959 btrfs_release_path(&path);
13963 static int zero_log_tree(struct btrfs_root *root)
13965 struct btrfs_trans_handle *trans;
13968 trans = btrfs_start_transaction(root, 1);
13969 if (IS_ERR(trans)) {
13970 ret = PTR_ERR(trans);
13973 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13974 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13975 ret = btrfs_commit_transaction(trans, root);
13979 static int populate_csum(struct btrfs_trans_handle *trans,
13980 struct btrfs_root *csum_root, char *buf, u64 start,
13983 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13988 while (offset < len) {
13989 sectorsize = fs_info->sectorsize;
13990 ret = read_extent_data(fs_info, buf, start + offset,
13994 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13995 start + offset, buf, sectorsize);
13998 offset += sectorsize;
14003 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
14004 struct btrfs_root *csum_root,
14005 struct btrfs_root *cur_root)
14007 struct btrfs_path path;
14008 struct btrfs_key key;
14009 struct extent_buffer *node;
14010 struct btrfs_file_extent_item *fi;
14017 buf = malloc(cur_root->fs_info->sectorsize);
14021 btrfs_init_path(&path);
14025 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
14028 /* Iterate all regular file extents and fill its csum */
14030 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
14032 if (key.type != BTRFS_EXTENT_DATA_KEY)
14034 node = path.nodes[0];
14035 slot = path.slots[0];
14036 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14037 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14039 start = btrfs_file_extent_disk_bytenr(node, fi);
14040 len = btrfs_file_extent_disk_num_bytes(node, fi);
14042 ret = populate_csum(trans, csum_root, buf, start, len);
14043 if (ret == -EEXIST)
14049 * TODO: if next leaf is corrupted, jump to nearest next valid
14052 ret = btrfs_next_item(cur_root, &path);
14062 btrfs_release_path(&path);
14067 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14068 struct btrfs_root *csum_root)
14070 struct btrfs_fs_info *fs_info = csum_root->fs_info;
14071 struct btrfs_path path;
14072 struct btrfs_root *tree_root = fs_info->tree_root;
14073 struct btrfs_root *cur_root;
14074 struct extent_buffer *node;
14075 struct btrfs_key key;
14079 btrfs_init_path(&path);
14080 key.objectid = BTRFS_FS_TREE_OBJECTID;
14082 key.type = BTRFS_ROOT_ITEM_KEY;
14083 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14092 node = path.nodes[0];
14093 slot = path.slots[0];
14094 btrfs_item_key_to_cpu(node, &key, slot);
14095 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14097 if (key.type != BTRFS_ROOT_ITEM_KEY)
14099 if (!is_fstree(key.objectid))
14101 key.offset = (u64)-1;
14103 cur_root = btrfs_read_fs_root(fs_info, &key);
14104 if (IS_ERR(cur_root) || !cur_root) {
14105 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14109 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14114 ret = btrfs_next_item(tree_root, &path);
14124 btrfs_release_path(&path);
14128 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14129 struct btrfs_root *csum_root)
14131 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14132 struct btrfs_path path;
14133 struct btrfs_extent_item *ei;
14134 struct extent_buffer *leaf;
14136 struct btrfs_key key;
14139 btrfs_init_path(&path);
14141 key.type = BTRFS_EXTENT_ITEM_KEY;
14143 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14145 btrfs_release_path(&path);
14149 buf = malloc(csum_root->fs_info->sectorsize);
14151 btrfs_release_path(&path);
14156 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14157 ret = btrfs_next_leaf(extent_root, &path);
14165 leaf = path.nodes[0];
14167 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14168 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14173 ei = btrfs_item_ptr(leaf, path.slots[0],
14174 struct btrfs_extent_item);
14175 if (!(btrfs_extent_flags(leaf, ei) &
14176 BTRFS_EXTENT_FLAG_DATA)) {
14181 ret = populate_csum(trans, csum_root, buf, key.objectid,
14188 btrfs_release_path(&path);
14194 * Recalculate the csum and put it into the csum tree.
14196 * Extent tree init will wipe out all the extent info, so in that case, we
14197 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14198 * will use fs/subvol trees to init the csum tree.
14200 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14201 struct btrfs_root *csum_root,
14202 int search_fs_tree)
14204 if (search_fs_tree)
14205 return fill_csum_tree_from_fs(trans, csum_root);
14207 return fill_csum_tree_from_extent(trans, csum_root);
14210 static void free_roots_info_cache(void)
14212 if (!roots_info_cache)
14215 while (!cache_tree_empty(roots_info_cache)) {
14216 struct cache_extent *entry;
14217 struct root_item_info *rii;
14219 entry = first_cache_extent(roots_info_cache);
14222 remove_cache_extent(roots_info_cache, entry);
14223 rii = container_of(entry, struct root_item_info, cache_extent);
14227 free(roots_info_cache);
14228 roots_info_cache = NULL;
14231 static int build_roots_info_cache(struct btrfs_fs_info *info)
14234 struct btrfs_key key;
14235 struct extent_buffer *leaf;
14236 struct btrfs_path path;
14238 if (!roots_info_cache) {
14239 roots_info_cache = malloc(sizeof(*roots_info_cache));
14240 if (!roots_info_cache)
14242 cache_tree_init(roots_info_cache);
14245 btrfs_init_path(&path);
14247 key.type = BTRFS_EXTENT_ITEM_KEY;
14249 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14252 leaf = path.nodes[0];
14255 struct btrfs_key found_key;
14256 struct btrfs_extent_item *ei;
14257 struct btrfs_extent_inline_ref *iref;
14258 int slot = path.slots[0];
14263 struct cache_extent *entry;
14264 struct root_item_info *rii;
14266 if (slot >= btrfs_header_nritems(leaf)) {
14267 ret = btrfs_next_leaf(info->extent_root, &path);
14274 leaf = path.nodes[0];
14275 slot = path.slots[0];
14278 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14280 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14281 found_key.type != BTRFS_METADATA_ITEM_KEY)
14284 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14285 flags = btrfs_extent_flags(leaf, ei);
14287 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14288 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14291 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14292 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14293 level = found_key.offset;
14295 struct btrfs_tree_block_info *binfo;
14297 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14298 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14299 level = btrfs_tree_block_level(leaf, binfo);
14303 * For a root extent, it must be of the following type and the
14304 * first (and only one) iref in the item.
14306 type = btrfs_extent_inline_ref_type(leaf, iref);
14307 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14310 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14311 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14313 rii = malloc(sizeof(struct root_item_info));
14318 rii->cache_extent.start = root_id;
14319 rii->cache_extent.size = 1;
14320 rii->level = (u8)-1;
14321 entry = &rii->cache_extent;
14322 ret = insert_cache_extent(roots_info_cache, entry);
14325 rii = container_of(entry, struct root_item_info,
14329 ASSERT(rii->cache_extent.start == root_id);
14330 ASSERT(rii->cache_extent.size == 1);
14332 if (level > rii->level || rii->level == (u8)-1) {
14333 rii->level = level;
14334 rii->bytenr = found_key.objectid;
14335 rii->gen = btrfs_extent_generation(leaf, ei);
14336 rii->node_count = 1;
14337 } else if (level == rii->level) {
14345 btrfs_release_path(&path);
14350 static int maybe_repair_root_item(struct btrfs_path *path,
14351 const struct btrfs_key *root_key,
14352 const int read_only_mode)
14354 const u64 root_id = root_key->objectid;
14355 struct cache_extent *entry;
14356 struct root_item_info *rii;
14357 struct btrfs_root_item ri;
14358 unsigned long offset;
14360 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14363 "Error: could not find extent items for root %llu\n",
14364 root_key->objectid);
14368 rii = container_of(entry, struct root_item_info, cache_extent);
14369 ASSERT(rii->cache_extent.start == root_id);
14370 ASSERT(rii->cache_extent.size == 1);
14372 if (rii->node_count != 1) {
14374 "Error: could not find btree root extent for root %llu\n",
14379 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14380 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14382 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14383 btrfs_root_level(&ri) != rii->level ||
14384 btrfs_root_generation(&ri) != rii->gen) {
14387 * If we're in repair mode but our caller told us to not update
14388 * the root item, i.e. just check if it needs to be updated, don't
14389 * print this message, since the caller will call us again shortly
14390 * for the same root item without read only mode (the caller will
14391 * open a transaction first).
14393 if (!(read_only_mode && repair))
14395 "%sroot item for root %llu,"
14396 " current bytenr %llu, current gen %llu, current level %u,"
14397 " new bytenr %llu, new gen %llu, new level %u\n",
14398 (read_only_mode ? "" : "fixing "),
14400 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14401 btrfs_root_level(&ri),
14402 rii->bytenr, rii->gen, rii->level);
14404 if (btrfs_root_generation(&ri) > rii->gen) {
14406 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14407 root_id, btrfs_root_generation(&ri), rii->gen);
14411 if (!read_only_mode) {
14412 btrfs_set_root_bytenr(&ri, rii->bytenr);
14413 btrfs_set_root_level(&ri, rii->level);
14414 btrfs_set_root_generation(&ri, rii->gen);
14415 write_extent_buffer(path->nodes[0], &ri,
14416 offset, sizeof(ri));
14426 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14427 * caused read-only snapshots to be corrupted if they were created at a moment
14428 * when the source subvolume/snapshot had orphan items. The issue was that the
14429 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14430 * node instead of the post orphan cleanup root node.
14431 * So this function, and its callees, just detects and fixes those cases. Even
14432 * though the regression was for read-only snapshots, this function applies to
14433 * any snapshot/subvolume root.
14434 * This must be run before any other repair code - not doing it so, makes other
14435 * repair code delete or modify backrefs in the extent tree for example, which
14436 * will result in an inconsistent fs after repairing the root items.
14438 static int repair_root_items(struct btrfs_fs_info *info)
14440 struct btrfs_path path;
14441 struct btrfs_key key;
14442 struct extent_buffer *leaf;
14443 struct btrfs_trans_handle *trans = NULL;
14446 int need_trans = 0;
14448 btrfs_init_path(&path);
14450 ret = build_roots_info_cache(info);
14454 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14455 key.type = BTRFS_ROOT_ITEM_KEY;
14460 * Avoid opening and committing transactions if a leaf doesn't have
14461 * any root items that need to be fixed, so that we avoid rotating
14462 * backup roots unnecessarily.
14465 trans = btrfs_start_transaction(info->tree_root, 1);
14466 if (IS_ERR(trans)) {
14467 ret = PTR_ERR(trans);
14472 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14476 leaf = path.nodes[0];
14479 struct btrfs_key found_key;
14481 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14482 int no_more_keys = find_next_key(&path, &key);
14484 btrfs_release_path(&path);
14486 ret = btrfs_commit_transaction(trans,
14498 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14500 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14502 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14505 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14509 if (!trans && repair) {
14512 btrfs_release_path(&path);
14522 free_roots_info_cache();
14523 btrfs_release_path(&path);
14525 btrfs_commit_transaction(trans, info->tree_root);
14532 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14534 struct btrfs_trans_handle *trans;
14535 struct btrfs_block_group_cache *bg_cache;
14539 /* Clear all free space cache inodes and its extent data */
14541 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14544 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14547 current = bg_cache->key.objectid + bg_cache->key.offset;
14550 /* Don't forget to set cache_generation to -1 */
14551 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14552 if (IS_ERR(trans)) {
14553 error("failed to update super block cache generation");
14554 return PTR_ERR(trans);
14556 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14557 btrfs_commit_transaction(trans, fs_info->tree_root);
14562 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14567 if (clear_version == 1) {
14568 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14570 "free space cache v2 detected, use --clear-space-cache v2");
14574 printf("Clearing free space cache\n");
14575 ret = clear_free_space_cache(fs_info);
14577 error("failed to clear free space cache");
14580 printf("Free space cache cleared\n");
14582 } else if (clear_version == 2) {
14583 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14584 printf("no free space cache v2 to clear\n");
14588 printf("Clear free space cache v2\n");
14589 ret = btrfs_clear_free_space_tree(fs_info);
14591 error("failed to clear free space cache v2: %d", ret);
14594 printf("free space cache v2 cleared\n");
14601 const char * const cmd_check_usage[] = {
14602 "btrfs check [options] <device>",
14603 "Check structural integrity of a filesystem (unmounted).",
14604 "Check structural integrity of an unmounted filesystem. Verify internal",
14605 "trees' consistency and item connectivity. In the repair mode try to",
14606 "fix the problems found. ",
14607 "WARNING: the repair mode is considered dangerous",
14609 "-s|--super <superblock> use this superblock copy",
14610 "-b|--backup use the first valid backup root copy",
14611 "--force skip mount checks, repair is not possible",
14612 "--repair try to repair the filesystem",
14613 "--readonly run in read-only mode (default)",
14614 "--init-csum-tree create a new CRC tree",
14615 "--init-extent-tree create a new extent tree",
14616 "--mode <MODE> allows choice of memory/IO trade-offs",
14617 " where MODE is one of:",
14618 " original - read inodes and extents to memory (requires",
14619 " more memory, does less IO)",
14620 " lowmem - try to use less memory but read blocks again",
14622 "--check-data-csum verify checksums of data blocks",
14623 "-Q|--qgroup-report print a report on qgroup consistency",
14624 "-E|--subvol-extents <subvolid>",
14625 " print subvolume extents and sharing state",
14626 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14627 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14628 "-p|--progress indicate progress",
14629 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14633 int cmd_check(int argc, char **argv)
14635 struct cache_tree root_cache;
14636 struct btrfs_root *root;
14637 struct btrfs_fs_info *info;
14640 u64 tree_root_bytenr = 0;
14641 u64 chunk_root_bytenr = 0;
14642 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14646 int init_csum_tree = 0;
14648 int clear_space_cache = 0;
14649 int qgroup_report = 0;
14650 int qgroups_repaired = 0;
14651 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14656 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14657 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14658 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14659 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14660 GETOPT_VAL_FORCE };
14661 static const struct option long_options[] = {
14662 { "super", required_argument, NULL, 's' },
14663 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14664 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14665 { "init-csum-tree", no_argument, NULL,
14666 GETOPT_VAL_INIT_CSUM },
14667 { "init-extent-tree", no_argument, NULL,
14668 GETOPT_VAL_INIT_EXTENT },
14669 { "check-data-csum", no_argument, NULL,
14670 GETOPT_VAL_CHECK_CSUM },
14671 { "backup", no_argument, NULL, 'b' },
14672 { "subvol-extents", required_argument, NULL, 'E' },
14673 { "qgroup-report", no_argument, NULL, 'Q' },
14674 { "tree-root", required_argument, NULL, 'r' },
14675 { "chunk-root", required_argument, NULL,
14676 GETOPT_VAL_CHUNK_TREE },
14677 { "progress", no_argument, NULL, 'p' },
14678 { "mode", required_argument, NULL,
14680 { "clear-space-cache", required_argument, NULL,
14681 GETOPT_VAL_CLEAR_SPACE_CACHE},
14682 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14683 { NULL, 0, NULL, 0}
14686 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14690 case 'a': /* ignored */ break;
14692 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14695 num = arg_strtou64(optarg);
14696 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14698 "super mirror should be less than %d",
14699 BTRFS_SUPER_MIRROR_MAX);
14702 bytenr = btrfs_sb_offset(((int)num));
14703 printf("using SB copy %llu, bytenr %llu\n", num,
14704 (unsigned long long)bytenr);
14710 subvolid = arg_strtou64(optarg);
14713 tree_root_bytenr = arg_strtou64(optarg);
14715 case GETOPT_VAL_CHUNK_TREE:
14716 chunk_root_bytenr = arg_strtou64(optarg);
14719 ctx.progress_enabled = true;
14723 usage(cmd_check_usage);
14724 case GETOPT_VAL_REPAIR:
14725 printf("enabling repair mode\n");
14727 ctree_flags |= OPEN_CTREE_WRITES;
14729 case GETOPT_VAL_READONLY:
14732 case GETOPT_VAL_INIT_CSUM:
14733 printf("Creating a new CRC tree\n");
14734 init_csum_tree = 1;
14736 ctree_flags |= OPEN_CTREE_WRITES;
14738 case GETOPT_VAL_INIT_EXTENT:
14739 init_extent_tree = 1;
14740 ctree_flags |= (OPEN_CTREE_WRITES |
14741 OPEN_CTREE_NO_BLOCK_GROUPS);
14744 case GETOPT_VAL_CHECK_CSUM:
14745 check_data_csum = 1;
14747 case GETOPT_VAL_MODE:
14748 check_mode = parse_check_mode(optarg);
14749 if (check_mode == CHECK_MODE_UNKNOWN) {
14750 error("unknown mode: %s", optarg);
14754 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14755 if (strcmp(optarg, "v1") == 0) {
14756 clear_space_cache = 1;
14757 } else if (strcmp(optarg, "v2") == 0) {
14758 clear_space_cache = 2;
14759 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14762 "invalid argument to --clear-space-cache, must be v1 or v2");
14765 ctree_flags |= OPEN_CTREE_WRITES;
14767 case GETOPT_VAL_FORCE:
14773 if (check_argc_exact(argc - optind, 1))
14774 usage(cmd_check_usage);
14776 if (ctx.progress_enabled) {
14777 ctx.tp = TASK_NOTHING;
14778 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14781 /* This check is the only reason for --readonly to exist */
14782 if (readonly && repair) {
14783 error("repair options are not compatible with --readonly");
14788 * experimental and dangerous
14790 if (repair && check_mode == CHECK_MODE_LOWMEM)
14791 warning("low-memory mode repair support is only partial");
14794 cache_tree_init(&root_cache);
14796 ret = check_mounted(argv[optind]);
14799 error("could not check mount status: %s",
14805 "%s is currently mounted, use --force if you really intend to check the filesystem",
14813 error("repair and --force is not yet supported");
14820 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14824 "filesystem mounted, continuing because of --force");
14826 /* A block device is mounted in exclusive mode by kernel */
14827 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14830 /* only allow partial opening under repair mode */
14832 ctree_flags |= OPEN_CTREE_PARTIAL;
14834 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14835 chunk_root_bytenr, ctree_flags);
14837 error("cannot open file system");
14843 global_info = info;
14844 root = info->fs_root;
14845 uuid_unparse(info->super_copy->fsid, uuidbuf);
14847 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14850 * Check the bare minimum before starting anything else that could rely
14851 * on it, namely the tree roots, any local consistency checks
14853 if (!extent_buffer_uptodate(info->tree_root->node) ||
14854 !extent_buffer_uptodate(info->dev_root->node) ||
14855 !extent_buffer_uptodate(info->chunk_root->node)) {
14856 error("critical roots corrupted, unable to check the filesystem");
14862 if (clear_space_cache) {
14863 ret = do_clear_free_space_cache(info, clear_space_cache);
14869 * repair mode will force us to commit transaction which
14870 * will make us fail to load log tree when mounting.
14872 if (repair && btrfs_super_log_root(info->super_copy)) {
14873 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14879 ret = zero_log_tree(root);
14882 error("failed to zero log tree: %d", ret);
14887 if (qgroup_report) {
14888 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14890 ret = qgroup_verify_all(info);
14897 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14898 subvolid, argv[optind], uuidbuf);
14899 ret = print_extent_state(info, subvolid);
14904 if (init_extent_tree || init_csum_tree) {
14905 struct btrfs_trans_handle *trans;
14907 trans = btrfs_start_transaction(info->extent_root, 0);
14908 if (IS_ERR(trans)) {
14909 error("error starting transaction");
14910 ret = PTR_ERR(trans);
14915 if (init_extent_tree) {
14916 printf("Creating a new extent tree\n");
14917 ret = reinit_extent_tree(trans, info);
14923 if (init_csum_tree) {
14924 printf("Reinitialize checksum tree\n");
14925 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14927 error("checksum tree initialization failed: %d",
14934 ret = fill_csum_tree(trans, info->csum_root,
14938 error("checksum tree refilling failed: %d", ret);
14943 * Ok now we commit and run the normal fsck, which will add
14944 * extent entries for all of the items it finds.
14946 ret = btrfs_commit_transaction(trans, info->extent_root);
14951 if (!extent_buffer_uptodate(info->extent_root->node)) {
14952 error("critical: extent_root, unable to check the filesystem");
14957 if (!extent_buffer_uptodate(info->csum_root->node)) {
14958 error("critical: csum_root, unable to check the filesystem");
14964 if (!init_extent_tree) {
14965 ret = repair_root_items(info);
14968 error("failed to repair root items: %s", strerror(-ret));
14972 fprintf(stderr, "Fixed %d roots.\n", ret);
14974 } else if (ret > 0) {
14976 "Found %d roots with an outdated root item.\n",
14979 "Please run a filesystem check with the option --repair to fix them.\n");
14986 ret = do_check_chunks_and_extents(info);
14990 "errors found in extent allocation tree or chunk allocation");
14992 /* Only re-check super size after we checked and repaired the fs */
14993 err |= !is_super_size_valid(info);
14995 if (!ctx.progress_enabled) {
14996 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14997 fprintf(stderr, "checking free space tree\n");
14999 fprintf(stderr, "checking free space cache\n");
15001 ret = check_space_cache(root);
15004 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
15005 error("errors found in free space tree");
15007 error("errors found in free space cache");
15012 * We used to have to have these hole extents in between our real
15013 * extents so if we don't have this flag set we need to make sure there
15014 * are no gaps in the file extents for inodes, otherwise we can just
15015 * ignore it when this happens.
15017 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
15018 ret = do_check_fs_roots(info, &root_cache);
15021 error("errors found in fs roots");
15025 fprintf(stderr, "checking csums\n");
15026 ret = check_csums(root);
15029 error("errors found in csum tree");
15033 fprintf(stderr, "checking root refs\n");
15034 /* For low memory mode, check_fs_roots_v2 handles root refs */
15035 if (check_mode != CHECK_MODE_LOWMEM) {
15036 ret = check_root_refs(root, &root_cache);
15039 error("errors found in root refs");
15044 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15045 struct extent_buffer *eb;
15047 eb = list_first_entry(&root->fs_info->recow_ebs,
15048 struct extent_buffer, recow);
15049 list_del_init(&eb->recow);
15050 ret = recow_extent_buffer(root, eb);
15053 error("fails to fix transid errors");
15058 while (!list_empty(&delete_items)) {
15059 struct bad_item *bad;
15061 bad = list_first_entry(&delete_items, struct bad_item, list);
15062 list_del_init(&bad->list);
15064 ret = delete_bad_item(root, bad);
15070 if (info->quota_enabled) {
15071 fprintf(stderr, "checking quota groups\n");
15072 ret = qgroup_verify_all(info);
15075 error("failed to check quota groups");
15079 ret = repair_qgroups(info, &qgroups_repaired);
15082 error("failed to repair quota groups");
15088 if (!list_empty(&root->fs_info->recow_ebs)) {
15089 error("transid errors in file system");
15094 printf("found %llu bytes used, ",
15095 (unsigned long long)bytes_used);
15097 printf("error(s) found\n");
15099 printf("no error found\n");
15100 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15101 printf("total tree bytes: %llu\n",
15102 (unsigned long long)total_btree_bytes);
15103 printf("total fs tree bytes: %llu\n",
15104 (unsigned long long)total_fs_tree_bytes);
15105 printf("total extent tree bytes: %llu\n",
15106 (unsigned long long)total_extent_tree_bytes);
15107 printf("btree space waste bytes: %llu\n",
15108 (unsigned long long)btree_space_waste);
15109 printf("file data blocks allocated: %llu\n referenced %llu\n",
15110 (unsigned long long)data_bytes_allocated,
15111 (unsigned long long)data_bytes_referenced);
15113 free_qgroup_counts();
15114 free_root_recs_tree(&root_cache);
15118 if (ctx.progress_enabled)
15119 task_deinit(ctx.info);