2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2153 * @roots can be empty if it belongs to tree reloc tree
2154 * In that case, we should always check the leaf, as we can't use
2155 * the tree owner to ensure some other root will check it.
2157 if (roots->nnodes == 1 || roots->nnodes == 0)
2160 node = rb_first(&roots->root);
2161 u = rb_entry(node, struct ulist_node, rb_node);
2163 * current root id is not smallest, we skip it and let it be checked
2164 * in the fs or file tree who hash the smallest root id.
2166 if (root->objectid != u->val)
2172 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2175 struct btrfs_root *extent_root = root->fs_info->extent_root;
2176 struct btrfs_root_item *ri = &root->root_item;
2177 struct btrfs_extent_inline_ref *iref;
2178 struct btrfs_extent_item *ei;
2179 struct btrfs_key key;
2180 struct btrfs_path *path = NULL;
2191 * Except file/reloc tree, we can not have FULL BACKREF MODE
2193 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2197 if (eb->start == btrfs_root_bytenr(ri))
2200 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2203 owner = btrfs_header_owner(eb);
2204 if (owner == root->objectid)
2207 path = btrfs_alloc_path();
2211 key.objectid = btrfs_header_bytenr(eb);
2213 key.offset = (u64)-1;
2215 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2222 ret = btrfs_previous_extent_item(extent_root, path,
2228 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2230 eb = path->nodes[0];
2231 slot = path->slots[0];
2232 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2234 flags = btrfs_extent_flags(eb, ei);
2235 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2238 ptr = (unsigned long)(ei + 1);
2239 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2241 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2242 ptr += sizeof(struct btrfs_tree_block_info);
2245 /* Reached extent item ends normally */
2249 /* Beyond extent item end, wrong item size */
2251 error("extent item at bytenr %llu slot %d has wrong size",
2256 iref = (struct btrfs_extent_inline_ref *)ptr;
2257 offset = btrfs_extent_inline_ref_offset(eb, iref);
2258 type = btrfs_extent_inline_ref_type(eb, iref);
2260 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2262 ptr += btrfs_extent_inline_ref_size(type);
2266 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2270 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2272 btrfs_free_path(path);
2277 * for a tree node or leaf, we record its reference count, so later if we still
2278 * process this node or leaf, don't need to compute its reference count again.
2280 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2282 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2283 struct extent_buffer *eb, struct node_refs *nrefs,
2284 u64 level, int check_all)
2286 struct ulist *roots;
2289 int root_level = btrfs_header_level(root->node);
2293 if (nrefs->bytenr[level] == bytenr)
2296 if (bytenr != (u64)-1) {
2297 /* the return value of this function seems a mistake */
2298 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2299 level, 1, &refs, &flags);
2301 if (ret < 0 && !check_all)
2304 nrefs->bytenr[level] = bytenr;
2305 nrefs->refs[level] = refs;
2306 nrefs->full_backref[level] = 0;
2307 nrefs->checked[level] = 0;
2310 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2315 check = need_check(root, roots);
2317 nrefs->need_check[level] = check;
2320 nrefs->need_check[level] = 1;
2322 if (level == root_level) {
2323 nrefs->need_check[level] = 1;
2326 * The node refs may have not been
2327 * updated if upper needs checking (the
2328 * lowest root_objectid) the node can
2331 nrefs->need_check[level] =
2332 nrefs->need_check[level + 1];
2338 if (check_all && eb) {
2339 calc_extent_flag_v2(root, eb, &flags);
2340 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2341 nrefs->full_backref[level] = 1;
2348 * @level if @level == -1 means extent data item
2349 * else normal treeblocl.
2351 static int should_check_extent_strictly(struct btrfs_root *root,
2352 struct node_refs *nrefs, int level)
2354 int root_level = btrfs_header_level(root->node);
2356 if (level > root_level || level < -1)
2358 if (level == root_level)
2361 * if the upper node is marked full backref, it should contain shared
2362 * backref of the parent (except owner == root->objectid).
2364 while (++level <= root_level)
2365 if (nrefs->refs[level] > 1)
2371 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2372 struct walk_control *wc, int *level,
2373 struct node_refs *nrefs)
2375 enum btrfs_tree_block_status status;
2378 struct btrfs_fs_info *fs_info = root->fs_info;
2379 struct extent_buffer *next;
2380 struct extent_buffer *cur;
2384 WARN_ON(*level < 0);
2385 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2387 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2388 refs = nrefs->refs[*level];
2391 ret = btrfs_lookup_extent_info(NULL, root,
2392 path->nodes[*level]->start,
2393 *level, 1, &refs, NULL);
2398 nrefs->bytenr[*level] = path->nodes[*level]->start;
2399 nrefs->refs[*level] = refs;
2403 ret = enter_shared_node(root, path->nodes[*level]->start,
2411 while (*level >= 0) {
2412 WARN_ON(*level < 0);
2413 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2414 cur = path->nodes[*level];
2416 if (btrfs_header_level(cur) != *level)
2419 if (path->slots[*level] >= btrfs_header_nritems(cur))
2422 ret = process_one_leaf(root, cur, wc);
2427 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2428 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2430 if (bytenr == nrefs->bytenr[*level - 1]) {
2431 refs = nrefs->refs[*level - 1];
2433 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2434 *level - 1, 1, &refs, NULL);
2438 nrefs->bytenr[*level - 1] = bytenr;
2439 nrefs->refs[*level - 1] = refs;
2444 ret = enter_shared_node(root, bytenr, refs,
2447 path->slots[*level]++;
2452 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2453 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2454 free_extent_buffer(next);
2455 reada_walk_down(root, cur, path->slots[*level]);
2456 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2457 if (!extent_buffer_uptodate(next)) {
2458 struct btrfs_key node_key;
2460 btrfs_node_key_to_cpu(path->nodes[*level],
2462 path->slots[*level]);
2463 btrfs_add_corrupt_extent_record(root->fs_info,
2465 path->nodes[*level]->start,
2466 root->fs_info->nodesize,
2473 ret = check_child_node(cur, path->slots[*level], next);
2475 free_extent_buffer(next);
2480 if (btrfs_is_leaf(next))
2481 status = btrfs_check_leaf(root, NULL, next);
2483 status = btrfs_check_node(root, NULL, next);
2484 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2485 free_extent_buffer(next);
2490 *level = *level - 1;
2491 free_extent_buffer(path->nodes[*level]);
2492 path->nodes[*level] = next;
2493 path->slots[*level] = 0;
2496 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2500 static int fs_root_objectid(u64 objectid);
2503 * Update global fs information.
2505 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2509 struct extent_buffer *eb = path->nodes[level];
2511 total_btree_bytes += eb->len;
2512 if (fs_root_objectid(root->objectid))
2513 total_fs_tree_bytes += eb->len;
2514 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2515 total_extent_tree_bytes += eb->len;
2518 btree_space_waste += btrfs_leaf_free_space(root, eb);
2520 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2521 btrfs_header_nritems(eb));
2522 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2527 * This function only handles BACKREF_MISSING,
2528 * If corresponding extent item exists, increase the ref, else insert an extent
2531 * Returns error bits after repair.
2533 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2534 struct btrfs_root *root,
2535 struct extent_buffer *node,
2536 struct node_refs *nrefs, int level, int err)
2538 struct btrfs_fs_info *fs_info = root->fs_info;
2539 struct btrfs_root *extent_root = fs_info->extent_root;
2540 struct btrfs_path path;
2541 struct btrfs_extent_item *ei;
2542 struct btrfs_tree_block_info *bi;
2543 struct btrfs_key key;
2544 struct extent_buffer *eb;
2545 u32 size = sizeof(*ei);
2546 u32 node_size = root->fs_info->nodesize;
2547 int insert_extent = 0;
2548 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2549 int root_level = btrfs_header_level(root->node);
2554 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2557 if ((err & BACKREF_MISSING) == 0)
2560 WARN_ON(level > BTRFS_MAX_LEVEL);
2563 btrfs_init_path(&path);
2564 bytenr = btrfs_header_bytenr(node);
2565 owner = btrfs_header_owner(node);
2566 generation = btrfs_header_generation(node);
2568 key.objectid = bytenr;
2570 key.offset = (u64)-1;
2572 /* Search for the extent item */
2573 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2579 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2583 /* calculate if the extent item flag is full backref or not */
2584 if (nrefs->full_backref[level] != 0)
2585 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2587 /* insert an extent item */
2588 if (insert_extent) {
2589 struct btrfs_disk_key copy_key;
2591 generation = btrfs_header_generation(node);
2593 if (level < root_level && nrefs->full_backref[level + 1] &&
2594 owner != root->objectid) {
2595 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2598 key.objectid = bytenr;
2599 if (!skinny_metadata) {
2600 key.type = BTRFS_EXTENT_ITEM_KEY;
2601 key.offset = node_size;
2602 size += sizeof(*bi);
2604 key.type = BTRFS_METADATA_ITEM_KEY;
2608 btrfs_release_path(&path);
2609 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2615 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2617 btrfs_set_extent_refs(eb, ei, 0);
2618 btrfs_set_extent_generation(eb, ei, generation);
2619 btrfs_set_extent_flags(eb, ei, flags);
2621 if (!skinny_metadata) {
2622 bi = (struct btrfs_tree_block_info *)(ei + 1);
2623 memset_extent_buffer(eb, 0, (unsigned long)bi,
2625 btrfs_set_disk_key_objectid(©_key, root->objectid);
2626 btrfs_set_disk_key_type(©_key, 0);
2627 btrfs_set_disk_key_offset(©_key, 0);
2629 btrfs_set_tree_block_level(eb, bi, level);
2630 btrfs_set_tree_block_key(eb, bi, ©_key);
2632 btrfs_mark_buffer_dirty(eb);
2633 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2634 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2637 nrefs->refs[level] = 0;
2638 nrefs->full_backref[level] =
2639 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640 btrfs_release_path(&path);
2643 if (level < root_level && nrefs->full_backref[level + 1] &&
2644 owner != root->objectid)
2645 parent = nrefs->bytenr[level + 1];
2647 /* increase the ref */
2648 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649 parent, root->objectid, level, 0);
2651 nrefs->refs[level]++;
2653 btrfs_release_path(&path);
2656 "failed to repair tree block ref start %llu root %llu due to %s",
2657 bytenr, root->objectid, strerror(-ret));
2659 printf("Added one tree block ref start %llu %s %llu\n",
2660 bytenr, parent ? "parent" : "root",
2661 parent ? parent : root->objectid);
2662 err &= ~BACKREF_MISSING;
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669 unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671 struct extent_buffer *eb, u64 bytenr,
2672 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674 struct btrfs_root *root, struct btrfs_path *path,
2675 struct node_refs *nrefs, int account_bytes);
2678 * @trans just for lowmem repair mode
2679 * @check all if not 0 then check all tree block backrefs and items
2680 * 0 then just check relationship of items in fs tree(s)
2682 * Returns >0 Found error, should continue
2683 * Returns <0 Fatal error, must exit the whole check
2684 * Returns 0 No errors found
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687 struct btrfs_root *root, struct btrfs_path *path,
2688 int *level, struct node_refs *nrefs, int ext_ref,
2692 enum btrfs_tree_block_status status;
2695 struct btrfs_fs_info *fs_info = root->fs_info;
2696 struct extent_buffer *next;
2697 struct extent_buffer *cur;
2701 int account_file_data = 0;
2703 WARN_ON(*level < 0);
2704 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2706 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707 path->nodes[*level], nrefs, *level, check_all);
2711 while (*level >= 0) {
2712 WARN_ON(*level < 0);
2713 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714 cur = path->nodes[*level];
2715 bytenr = btrfs_header_bytenr(cur);
2716 check = nrefs->need_check[*level];
2718 if (btrfs_header_level(cur) != *level)
2721 * Update bytes accounting and check tree block ref
2722 * NOTE: Doing accounting and check before checking nritems
2723 * is necessary because of empty node/leaf.
2725 if ((check_all && !nrefs->checked[*level]) ||
2726 (!check_all && nrefs->need_check[*level])) {
2727 ret = check_tree_block_ref(root, cur,
2728 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729 btrfs_header_owner(cur), nrefs);
2732 ret = repair_tree_block_ref(trans, root,
2733 path->nodes[*level], nrefs, *level, ret);
2736 if (check_all && nrefs->need_check[*level] &&
2737 nrefs->refs[*level]) {
2738 account_bytes(root, path, *level);
2739 account_file_data = 1;
2741 nrefs->checked[*level] = 1;
2744 if (path->slots[*level] >= btrfs_header_nritems(cur))
2747 /* Don't forgot to check leaf/node validation */
2749 /* skip duplicate check */
2750 if (check || !check_all) {
2751 ret = btrfs_check_leaf(root, NULL, cur);
2752 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2760 ret = process_one_leaf_v2(root, path, nrefs,
2763 ret = check_leaf_items(trans, root, path,
2764 nrefs, account_file_data);
2768 if (check || !check_all) {
2769 ret = btrfs_check_node(root, NULL, cur);
2770 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2777 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2780 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2785 * check all trees in check_chunks_and_extent_v2
2786 * check shared node once in check_fs_roots
2788 if (!check_all && !nrefs->need_check[*level - 1]) {
2789 path->slots[*level]++;
2793 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795 free_extent_buffer(next);
2796 reada_walk_down(root, cur, path->slots[*level]);
2797 next = read_tree_block(fs_info, bytenr, ptr_gen);
2798 if (!extent_buffer_uptodate(next)) {
2799 struct btrfs_key node_key;
2801 btrfs_node_key_to_cpu(path->nodes[*level],
2803 path->slots[*level]);
2804 btrfs_add_corrupt_extent_record(fs_info,
2805 &node_key, path->nodes[*level]->start,
2806 fs_info->nodesize, *level);
2812 ret = check_child_node(cur, path->slots[*level], next);
2817 if (btrfs_is_leaf(next))
2818 status = btrfs_check_leaf(root, NULL, next);
2820 status = btrfs_check_node(root, NULL, next);
2821 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822 free_extent_buffer(next);
2827 *level = *level - 1;
2828 free_extent_buffer(path->nodes[*level]);
2829 path->nodes[*level] = next;
2830 path->slots[*level] = 0;
2831 account_file_data = 0;
2833 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839 struct walk_control *wc, int *level)
2842 struct extent_buffer *leaf;
2844 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845 leaf = path->nodes[i];
2846 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2851 free_extent_buffer(path->nodes[*level]);
2852 path->nodes[*level] = NULL;
2853 BUG_ON(*level > wc->active_node);
2854 if (*level == wc->active_node)
2855 leave_shared_node(root, wc, *level);
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2866 struct extent_buffer *leaf;
2868 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869 leaf = path->nodes[i];
2870 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2875 free_extent_buffer(path->nodes[*level]);
2876 path->nodes[*level] = NULL;
2883 static int check_root_dir(struct inode_record *rec)
2885 struct inode_backref *backref;
2888 if (!rec->found_inode_item || rec->errors)
2890 if (rec->nlink != 1 || rec->found_link != 0)
2892 if (list_empty(&rec->backrefs))
2894 backref = to_inode_backref(rec->backrefs.next);
2895 if (!backref->found_inode_ref)
2897 if (backref->index != 0 || backref->namelen != 2 ||
2898 memcmp(backref->name, "..", 2))
2900 if (backref->found_dir_index || backref->found_dir_item)
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root, struct btrfs_path *path,
2909 struct inode_record *rec)
2911 struct btrfs_inode_item *ei;
2912 struct btrfs_key key;
2915 key.objectid = rec->ino;
2916 key.type = BTRFS_INODE_ITEM_KEY;
2917 key.offset = (u64)-1;
2919 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2923 if (!path->slots[0]) {
2930 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931 if (key.objectid != rec->ino) {
2936 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937 struct btrfs_inode_item);
2938 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939 btrfs_mark_buffer_dirty(path->nodes[0]);
2940 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942 root->root_key.objectid);
2944 btrfs_release_path(path);
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949 struct btrfs_root *root,
2950 struct btrfs_path *path,
2951 struct inode_record *rec)
2955 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956 btrfs_release_path(path);
2958 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963 struct btrfs_root *root,
2964 struct btrfs_path *path,
2965 struct inode_record *rec)
2967 struct btrfs_inode_item *ei;
2968 struct btrfs_key key;
2971 key.objectid = rec->ino;
2972 key.type = BTRFS_INODE_ITEM_KEY;
2975 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2982 /* Since ret == 0, no need to check anything */
2983 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984 struct btrfs_inode_item);
2985 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986 btrfs_mark_buffer_dirty(path->nodes[0]);
2987 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988 printf("reset nbytes for ino %llu root %llu\n",
2989 rec->ino, root->root_key.objectid);
2991 btrfs_release_path(path);
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996 struct cache_tree *inode_cache,
2997 struct inode_record *rec,
2998 struct inode_backref *backref)
3000 struct btrfs_path path;
3001 struct btrfs_trans_handle *trans;
3002 struct btrfs_dir_item *dir_item;
3003 struct extent_buffer *leaf;
3004 struct btrfs_key key;
3005 struct btrfs_disk_key disk_key;
3006 struct inode_record *dir_rec;
3007 unsigned long name_ptr;
3008 u32 data_size = sizeof(*dir_item) + backref->namelen;
3011 trans = btrfs_start_transaction(root, 1);
3013 return PTR_ERR(trans);
3015 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016 (unsigned long long)rec->ino);
3018 btrfs_init_path(&path);
3019 key.objectid = backref->dir;
3020 key.type = BTRFS_DIR_INDEX_KEY;
3021 key.offset = backref->index;
3022 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3025 leaf = path.nodes[0];
3026 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3028 disk_key.objectid = cpu_to_le64(rec->ino);
3029 disk_key.type = BTRFS_INODE_ITEM_KEY;
3030 disk_key.offset = 0;
3032 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034 btrfs_set_dir_data_len(leaf, dir_item, 0);
3035 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036 name_ptr = (unsigned long)(dir_item + 1);
3037 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038 btrfs_mark_buffer_dirty(leaf);
3039 btrfs_release_path(&path);
3040 btrfs_commit_transaction(trans, root);
3042 backref->found_dir_index = 1;
3043 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044 BUG_ON(IS_ERR(dir_rec));
3047 dir_rec->found_size += backref->namelen;
3048 if (dir_rec->found_size == dir_rec->isize &&
3049 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051 if (dir_rec->found_size != dir_rec->isize)
3052 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3057 static int delete_dir_index(struct btrfs_root *root,
3058 struct inode_backref *backref)
3060 struct btrfs_trans_handle *trans;
3061 struct btrfs_dir_item *di;
3062 struct btrfs_path path;
3065 trans = btrfs_start_transaction(root, 1);
3067 return PTR_ERR(trans);
3069 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070 (unsigned long long)backref->dir,
3071 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072 (unsigned long long)root->objectid);
3074 btrfs_init_path(&path);
3075 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076 backref->name, backref->namelen,
3077 backref->index, -1);
3080 btrfs_release_path(&path);
3081 btrfs_commit_transaction(trans, root);
3088 ret = btrfs_del_item(trans, root, &path);
3090 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3092 btrfs_release_path(&path);
3093 btrfs_commit_transaction(trans, root);
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098 struct btrfs_root *root, u64 ino, u64 size,
3099 u64 nbytes, u64 nlink, u32 mode)
3101 struct btrfs_inode_item ii;
3102 time_t now = time(NULL);
3105 btrfs_set_stack_inode_size(&ii, size);
3106 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107 btrfs_set_stack_inode_nlink(&ii, nlink);
3108 btrfs_set_stack_inode_mode(&ii, mode);
3109 btrfs_set_stack_inode_generation(&ii, trans->transid);
3110 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3118 ret = btrfs_insert_inode(trans, root, ino, &ii);
3121 warning("root %llu inode %llu recreating inode item, this may "
3122 "be incomplete, please check permissions and content after "
3123 "the fsck completes.\n", (unsigned long long)root->objectid,
3124 (unsigned long long)ino);
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130 struct btrfs_root *root, u64 ino,
3133 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3135 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3138 static int create_inode_item(struct btrfs_root *root,
3139 struct inode_record *rec, int root_dir)
3141 struct btrfs_trans_handle *trans;
3147 trans = btrfs_start_transaction(root, 1);
3148 if (IS_ERR(trans)) {
3149 ret = PTR_ERR(trans);
3153 nlink = root_dir ? 1 : rec->found_link;
3154 if (rec->found_dir_item) {
3155 if (rec->found_file_extent)
3156 fprintf(stderr, "root %llu inode %llu has both a dir "
3157 "item and extents, unsure if it is a dir or a "
3158 "regular file so setting it as a directory\n",
3159 (unsigned long long)root->objectid,
3160 (unsigned long long)rec->ino);
3161 mode = S_IFDIR | 0755;
3162 size = rec->found_size;
3163 } else if (!rec->found_dir_item) {
3164 size = rec->extent_end;
3165 mode = S_IFREG | 0755;
3168 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3170 btrfs_commit_transaction(trans, root);
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175 struct inode_record *rec,
3176 struct cache_tree *inode_cache,
3179 struct inode_backref *tmp, *backref;
3180 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3184 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185 if (!delete && rec->ino == root_dirid) {
3186 if (!rec->found_inode_item) {
3187 ret = create_inode_item(root, rec, 1);
3194 /* Index 0 for root dir's are special, don't mess with it */
3195 if (rec->ino == root_dirid && backref->index == 0)
3199 ((backref->found_dir_index && !backref->found_inode_ref) ||
3200 (backref->found_dir_index && backref->found_inode_ref &&
3201 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202 ret = delete_dir_index(root, backref);
3206 list_del(&backref->list);
3211 if (!delete && !backref->found_dir_index &&
3212 backref->found_dir_item && backref->found_inode_ref) {
3213 ret = add_missing_dir_index(root, inode_cache, rec,
3218 if (backref->found_dir_item &&
3219 backref->found_dir_index) {
3220 if (!backref->errors &&
3221 backref->found_inode_ref) {
3222 list_del(&backref->list);
3229 if (!delete && (!backref->found_dir_index &&
3230 !backref->found_dir_item &&
3231 backref->found_inode_ref)) {
3232 struct btrfs_trans_handle *trans;
3233 struct btrfs_key location;
3235 ret = check_dir_conflict(root, backref->name,
3241 * let nlink fixing routine to handle it,
3242 * which can do it better.
3247 location.objectid = rec->ino;
3248 location.type = BTRFS_INODE_ITEM_KEY;
3249 location.offset = 0;
3251 trans = btrfs_start_transaction(root, 1);
3252 if (IS_ERR(trans)) {
3253 ret = PTR_ERR(trans);
3256 fprintf(stderr, "adding missing dir index/item pair "
3258 (unsigned long long)rec->ino);
3259 ret = btrfs_insert_dir_item(trans, root, backref->name,
3261 backref->dir, &location,
3262 imode_to_type(rec->imode),
3265 btrfs_commit_transaction(trans, root);
3269 if (!delete && (backref->found_inode_ref &&
3270 backref->found_dir_index &&
3271 backref->found_dir_item &&
3272 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273 !rec->found_inode_item)) {
3274 ret = create_inode_item(root, rec, 0);
3281 return ret ? ret : repaired;
3285 * To determine the file type for nlink/inode_item repair
3287 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288 * Return -ENOENT if file type is not found.
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3292 struct inode_backref *backref;
3294 /* For inode item recovered case */
3295 if (rec->found_inode_item) {
3296 *type = imode_to_type(rec->imode);
3300 list_for_each_entry(backref, &rec->backrefs, list) {
3301 if (backref->found_dir_index || backref->found_dir_item) {
3302 *type = backref->filetype;
3310 * To determine the file name for nlink repair
3312 * Return 0 if file name is found, set name and namelen.
3313 * Return -ENOENT if file name is not found.
3315 static int find_file_name(struct inode_record *rec,
3316 char *name, int *namelen)
3318 struct inode_backref *backref;
3320 list_for_each_entry(backref, &rec->backrefs, list) {
3321 if (backref->found_dir_index || backref->found_dir_item ||
3322 backref->found_inode_ref) {
3323 memcpy(name, backref->name, backref->namelen);
3324 *namelen = backref->namelen;
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root,
3334 struct btrfs_path *path,
3335 struct inode_record *rec)
3337 struct inode_backref *backref;
3338 struct inode_backref *tmp;
3339 struct btrfs_key key;
3340 struct btrfs_inode_item *inode_item;
3343 /* We don't believe this either, reset it and iterate backref */
3344 rec->found_link = 0;
3346 /* Remove all backref including the valid ones */
3347 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349 backref->index, backref->name,
3350 backref->namelen, 0);
3354 /* remove invalid backref, so it won't be added back */
3355 if (!(backref->found_dir_index &&
3356 backref->found_dir_item &&
3357 backref->found_inode_ref)) {
3358 list_del(&backref->list);
3365 /* Set nlink to 0 */
3366 key.objectid = rec->ino;
3367 key.type = BTRFS_INODE_ITEM_KEY;
3369 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3376 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377 struct btrfs_inode_item);
3378 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379 btrfs_mark_buffer_dirty(path->nodes[0]);
3380 btrfs_release_path(path);
3383 * Add back valid inode_ref/dir_item/dir_index,
3384 * add_link() will handle the nlink inc, so new nlink must be correct
3386 list_for_each_entry(backref, &rec->backrefs, list) {
3387 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388 backref->name, backref->namelen,
3389 backref->filetype, &backref->index, 1, 0);
3394 btrfs_release_path(path);
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399 struct btrfs_root *root,
3400 struct btrfs_path *path,
3403 struct btrfs_key key, found_key;
3406 btrfs_init_path(path);
3407 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3409 key.type = BTRFS_INODE_ITEM_KEY;
3410 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3412 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413 path->slots[0] - 1);
3414 *highest_ino = found_key.objectid;
3417 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3419 btrfs_release_path(path);
3424 * Link inode to dir 'lost+found'. Increase @ref_count.
3426 * Returns 0 means success.
3427 * Returns <0 means failure.
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430 struct btrfs_root *root,
3431 struct btrfs_path *path,
3432 u64 ino, char *namebuf, u32 name_len,
3433 u8 filetype, u64 *ref_count)
3435 char *dir_name = "lost+found";
3440 btrfs_release_path(path);
3441 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3446 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3450 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3453 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454 namebuf, name_len, filetype, NULL, 1, 0);
3456 * Add ".INO" suffix several times to handle case where
3457 * "FILENAME.INO" is already taken by another file.
3459 while (ret == -EEXIST) {
3461 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3463 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3467 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3469 name_len += count_digits(ino) + 1;
3470 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471 name_len, filetype, NULL, 1, 0);
3474 error("failed to link the inode %llu to %s dir: %s",
3475 ino, dir_name, strerror(-ret));
3480 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481 name_len, namebuf, dir_name);
3483 btrfs_release_path(path);
3485 error("failed to move file '%.*s' to '%s' dir", name_len,
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491 struct btrfs_root *root,
3492 struct btrfs_path *path,
3493 struct inode_record *rec)
3495 char namebuf[BTRFS_NAME_LEN] = {0};
3498 int name_recovered = 0;
3499 int type_recovered = 0;
3503 * Get file name and type first before these invalid inode ref
3504 * are deleted by remove_all_invalid_backref()
3506 name_recovered = !find_file_name(rec, namebuf, &namelen);
3507 type_recovered = !find_file_type(rec, &type);
3509 if (!name_recovered) {
3510 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511 rec->ino, rec->ino);
3512 namelen = count_digits(rec->ino);
3513 sprintf(namebuf, "%llu", rec->ino);
3516 if (!type_recovered) {
3517 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3519 type = BTRFS_FT_REG_FILE;
3523 ret = reset_nlink(trans, root, path, rec);
3526 "Failed to reset nlink for inode %llu: %s\n",
3527 rec->ino, strerror(-ret));
3531 if (rec->found_link == 0) {
3532 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533 namebuf, namelen, type,
3534 (u64 *)&rec->found_link);
3538 printf("Fixed the nlink of inode %llu\n", rec->ino);
3541 * Clear the flag anyway, or we will loop forever for the same inode
3542 * as it will not be removed from the bad inode list and the dead loop
3545 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546 btrfs_release_path(path);
3551 * Check if there is any normal(reg or prealloc) file extent for given
3553 * This is used to determine the file type when neither its dir_index/item or
3554 * inode_item exists.
3556 * This will *NOT* report error, if any error happens, just consider it does
3557 * not have any normal file extent.
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3561 struct btrfs_path path;
3562 struct btrfs_key key;
3563 struct btrfs_key found_key;
3564 struct btrfs_file_extent_item *fi;
3568 btrfs_init_path(&path);
3570 key.type = BTRFS_EXTENT_DATA_KEY;
3573 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3578 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579 ret = btrfs_next_leaf(root, &path);
3586 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3588 if (found_key.objectid != ino ||
3589 found_key.type != BTRFS_EXTENT_DATA_KEY)
3591 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592 struct btrfs_file_extent_item);
3593 type = btrfs_file_extent_type(path.nodes[0], fi);
3594 if (type != BTRFS_FILE_EXTENT_INLINE) {
3600 btrfs_release_path(&path);
3604 static u32 btrfs_type_to_imode(u8 type)
3606 static u32 imode_by_btrfs_type[] = {
3607 [BTRFS_FT_REG_FILE] = S_IFREG,
3608 [BTRFS_FT_DIR] = S_IFDIR,
3609 [BTRFS_FT_CHRDEV] = S_IFCHR,
3610 [BTRFS_FT_BLKDEV] = S_IFBLK,
3611 [BTRFS_FT_FIFO] = S_IFIFO,
3612 [BTRFS_FT_SOCK] = S_IFSOCK,
3613 [BTRFS_FT_SYMLINK] = S_IFLNK,
3616 return imode_by_btrfs_type[(type)];
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620 struct btrfs_root *root,
3621 struct btrfs_path *path,
3622 struct inode_record *rec)
3626 int type_recovered = 0;
3629 printf("Trying to rebuild inode:%llu\n", rec->ino);
3631 type_recovered = !find_file_type(rec, &filetype);
3634 * Try to determine inode type if type not found.
3636 * For found regular file extent, it must be FILE.
3637 * For found dir_item/index, it must be DIR.
3639 * For undetermined one, use FILE as fallback.
3642 * 1. If found backref(inode_index/item is already handled) to it,
3644 * Need new inode-inode ref structure to allow search for that.
3646 if (!type_recovered) {
3647 if (rec->found_file_extent &&
3648 find_normal_file_extent(root, rec->ino)) {
3650 filetype = BTRFS_FT_REG_FILE;
3651 } else if (rec->found_dir_item) {
3653 filetype = BTRFS_FT_DIR;
3654 } else if (!list_empty(&rec->orphan_extents)) {
3656 filetype = BTRFS_FT_REG_FILE;
3658 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3661 filetype = BTRFS_FT_REG_FILE;
3665 ret = btrfs_new_inode(trans, root, rec->ino,
3666 mode | btrfs_type_to_imode(filetype));
3671 * Here inode rebuild is done, we only rebuild the inode item,
3672 * don't repair the nlink(like move to lost+found).
3673 * That is the job of nlink repair.
3675 * We just fill the record and return
3677 rec->found_dir_item = 1;
3678 rec->imode = mode | btrfs_type_to_imode(filetype);
3680 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681 /* Ensure the inode_nlinks repair function will be called */
3682 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688 struct btrfs_root *root,
3689 struct btrfs_path *path,
3690 struct inode_record *rec)
3692 struct orphan_data_extent *orphan;
3693 struct orphan_data_extent *tmp;
3696 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3698 * Check for conflicting file extents
3700 * Here we don't know whether the extents is compressed or not,
3701 * so we can only assume it not compressed nor data offset,
3702 * and use its disk_len as extent length.
3704 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705 orphan->offset, orphan->disk_len, 0);
3706 btrfs_release_path(path);
3711 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712 orphan->disk_bytenr, orphan->disk_len);
3713 ret = btrfs_free_extent(trans,
3714 root->fs_info->extent_root,
3715 orphan->disk_bytenr, orphan->disk_len,
3716 0, root->objectid, orphan->objectid,
3721 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722 orphan->offset, orphan->disk_bytenr,
3723 orphan->disk_len, orphan->disk_len);
3727 /* Update file size info */
3728 rec->found_size += orphan->disk_len;
3729 if (rec->found_size == rec->nbytes)
3730 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3732 /* Update the file extent hole info too */
3733 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3737 if (RB_EMPTY_ROOT(&rec->holes))
3738 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3740 list_del(&orphan->list);
3743 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749 struct btrfs_root *root,
3750 struct btrfs_path *path,
3751 struct inode_record *rec)
3753 struct rb_node *node;
3754 struct file_extent_hole *hole;
3758 node = rb_first(&rec->holes);
3762 hole = rb_entry(node, struct file_extent_hole, node);
3763 ret = btrfs_punch_hole(trans, root, rec->ino,
3764 hole->start, hole->len);
3767 ret = del_file_extent_hole(&rec->holes, hole->start,
3771 if (RB_EMPTY_ROOT(&rec->holes))
3772 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773 node = rb_first(&rec->holes);
3775 /* special case for a file losing all its file extent */
3777 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778 round_up(rec->isize,
3779 root->fs_info->sectorsize));
3783 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784 rec->ino, root->objectid);
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3791 struct btrfs_trans_handle *trans;
3792 struct btrfs_path path;
3795 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796 I_ERR_NO_ORPHAN_ITEM |
3797 I_ERR_LINK_COUNT_WRONG |
3798 I_ERR_NO_INODE_ITEM |
3799 I_ERR_FILE_EXTENT_ORPHAN |
3800 I_ERR_FILE_EXTENT_DISCOUNT|
3801 I_ERR_FILE_NBYTES_WRONG)))
3805 * For nlink repair, it may create a dir and add link, so
3806 * 2 for parent(256)'s dir_index and dir_item
3807 * 2 for lost+found dir's inode_item and inode_ref
3808 * 1 for the new inode_ref of the file
3809 * 2 for lost+found dir's dir_index and dir_item for the file
3811 trans = btrfs_start_transaction(root, 7);
3813 return PTR_ERR(trans);
3815 btrfs_init_path(&path);
3816 if (rec->errors & I_ERR_NO_INODE_ITEM)
3817 ret = repair_inode_no_item(trans, root, &path, rec);
3818 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823 ret = repair_inode_isize(trans, root, &path, rec);
3824 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827 ret = repair_inode_nlinks(trans, root, &path, rec);
3828 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829 ret = repair_inode_nbytes(trans, root, &path, rec);
3830 btrfs_commit_transaction(trans, root);
3831 btrfs_release_path(&path);
3835 static int check_inode_recs(struct btrfs_root *root,
3836 struct cache_tree *inode_cache)
3838 struct cache_extent *cache;
3839 struct ptr_node *node;
3840 struct inode_record *rec;
3841 struct inode_backref *backref;
3846 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3848 if (btrfs_root_refs(&root->root_item) == 0) {
3849 if (!cache_tree_empty(inode_cache))
3850 fprintf(stderr, "warning line %d\n", __LINE__);
3855 * We need to repair backrefs first because we could change some of the
3856 * errors in the inode recs.
3858 * We also need to go through and delete invalid backrefs first and then
3859 * add the correct ones second. We do this because we may get EEXIST
3860 * when adding back the correct index because we hadn't yet deleted the
3863 * For example, if we were missing a dir index then the directories
3864 * isize would be wrong, so if we fixed the isize to what we thought it
3865 * would be and then fixed the backref we'd still have a invalid fs, so
3866 * we need to add back the dir index and then check to see if the isize
3871 if (stage == 3 && !err)
3874 cache = search_cache_extent(inode_cache, 0);
3875 while (repair && cache) {
3876 node = container_of(cache, struct ptr_node, cache);
3878 cache = next_cache_extent(cache);
3880 /* Need to free everything up and rescan */
3882 remove_cache_extent(inode_cache, &node->cache);
3884 free_inode_rec(rec);
3888 if (list_empty(&rec->backrefs))
3891 ret = repair_inode_backrefs(root, rec, inode_cache,
3905 rec = get_inode_rec(inode_cache, root_dirid, 0);
3906 BUG_ON(IS_ERR(rec));
3908 ret = check_root_dir(rec);
3910 fprintf(stderr, "root %llu root dir %llu error\n",
3911 (unsigned long long)root->root_key.objectid,
3912 (unsigned long long)root_dirid);
3913 print_inode_error(root, rec);
3918 struct btrfs_trans_handle *trans;
3920 trans = btrfs_start_transaction(root, 1);
3921 if (IS_ERR(trans)) {
3922 err = PTR_ERR(trans);
3927 "root %llu missing its root dir, recreating\n",
3928 (unsigned long long)root->objectid);
3930 ret = btrfs_make_root_dir(trans, root, root_dirid);
3933 btrfs_commit_transaction(trans, root);
3937 fprintf(stderr, "root %llu root dir %llu not found\n",
3938 (unsigned long long)root->root_key.objectid,
3939 (unsigned long long)root_dirid);
3943 cache = search_cache_extent(inode_cache, 0);
3946 node = container_of(cache, struct ptr_node, cache);
3948 remove_cache_extent(inode_cache, &node->cache);
3950 if (rec->ino == root_dirid ||
3951 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952 free_inode_rec(rec);
3956 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957 ret = check_orphan_item(root, rec->ino);
3959 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960 if (can_free_inode_rec(rec)) {
3961 free_inode_rec(rec);
3966 if (!rec->found_inode_item)
3967 rec->errors |= I_ERR_NO_INODE_ITEM;
3968 if (rec->found_link != rec->nlink)
3969 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3971 ret = try_repair_inode(root, rec);
3972 if (ret == 0 && can_free_inode_rec(rec)) {
3973 free_inode_rec(rec);
3979 if (!(repair && ret == 0))
3981 print_inode_error(root, rec);
3982 list_for_each_entry(backref, &rec->backrefs, list) {
3983 if (!backref->found_dir_item)
3984 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985 if (!backref->found_dir_index)
3986 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987 if (!backref->found_inode_ref)
3988 backref->errors |= REF_ERR_NO_INODE_REF;
3989 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990 " namelen %u name %s filetype %d errors %x",
3991 (unsigned long long)backref->dir,
3992 (unsigned long long)backref->index,
3993 backref->namelen, backref->name,
3994 backref->filetype, backref->errors);
3995 print_ref_error(backref->errors);
3997 free_inode_rec(rec);
3999 return (error > 0) ? -1 : 0;
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4005 struct cache_extent *cache;
4006 struct root_record *rec = NULL;
4009 cache = lookup_cache_extent(root_cache, objectid, 1);
4011 rec = container_of(cache, struct root_record, cache);
4013 rec = calloc(1, sizeof(*rec));
4015 return ERR_PTR(-ENOMEM);
4016 rec->objectid = objectid;
4017 INIT_LIST_HEAD(&rec->backrefs);
4018 rec->cache.start = objectid;
4019 rec->cache.size = 1;
4021 ret = insert_cache_extent(root_cache, &rec->cache);
4023 return ERR_PTR(-EEXIST);
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029 u64 ref_root, u64 dir, u64 index,
4030 const char *name, int namelen)
4032 struct root_backref *backref;
4034 list_for_each_entry(backref, &rec->backrefs, list) {
4035 if (backref->ref_root != ref_root || backref->dir != dir ||
4036 backref->namelen != namelen)
4038 if (memcmp(name, backref->name, namelen))
4043 backref = calloc(1, sizeof(*backref) + namelen + 1);
4046 backref->ref_root = ref_root;
4048 backref->index = index;
4049 backref->namelen = namelen;
4050 memcpy(backref->name, name, namelen);
4051 backref->name[namelen] = '\0';
4052 list_add_tail(&backref->list, &rec->backrefs);
4056 static void free_root_record(struct cache_extent *cache)
4058 struct root_record *rec;
4059 struct root_backref *backref;
4061 rec = container_of(cache, struct root_record, cache);
4062 while (!list_empty(&rec->backrefs)) {
4063 backref = to_root_backref(rec->backrefs.next);
4064 list_del(&backref->list);
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4073 static int add_root_backref(struct cache_tree *root_cache,
4074 u64 root_id, u64 ref_root, u64 dir, u64 index,
4075 const char *name, int namelen,
4076 int item_type, int errors)
4078 struct root_record *rec;
4079 struct root_backref *backref;
4081 rec = get_root_rec(root_cache, root_id);
4082 BUG_ON(IS_ERR(rec));
4083 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4086 backref->errors |= errors;
4088 if (item_type != BTRFS_DIR_ITEM_KEY) {
4089 if (backref->found_dir_index || backref->found_back_ref ||
4090 backref->found_forward_ref) {
4091 if (backref->index != index)
4092 backref->errors |= REF_ERR_INDEX_UNMATCH;
4094 backref->index = index;
4098 if (item_type == BTRFS_DIR_ITEM_KEY) {
4099 if (backref->found_forward_ref)
4101 backref->found_dir_item = 1;
4102 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103 backref->found_dir_index = 1;
4104 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105 if (backref->found_forward_ref)
4106 backref->errors |= REF_ERR_DUP_ROOT_REF;
4107 else if (backref->found_dir_item)
4109 backref->found_forward_ref = 1;
4110 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111 if (backref->found_back_ref)
4112 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113 backref->found_back_ref = 1;
4118 if (backref->found_forward_ref && backref->found_dir_item)
4119 backref->reachable = 1;
4123 static int merge_root_recs(struct btrfs_root *root,
4124 struct cache_tree *src_cache,
4125 struct cache_tree *dst_cache)
4127 struct cache_extent *cache;
4128 struct ptr_node *node;
4129 struct inode_record *rec;
4130 struct inode_backref *backref;
4133 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134 free_inode_recs_tree(src_cache);
4139 cache = search_cache_extent(src_cache, 0);
4142 node = container_of(cache, struct ptr_node, cache);
4144 remove_cache_extent(src_cache, &node->cache);
4147 ret = is_child_root(root, root->objectid, rec->ino);
4153 list_for_each_entry(backref, &rec->backrefs, list) {
4154 BUG_ON(backref->found_inode_ref);
4155 if (backref->found_dir_item)
4156 add_root_backref(dst_cache, rec->ino,
4157 root->root_key.objectid, backref->dir,
4158 backref->index, backref->name,
4159 backref->namelen, BTRFS_DIR_ITEM_KEY,
4161 if (backref->found_dir_index)
4162 add_root_backref(dst_cache, rec->ino,
4163 root->root_key.objectid, backref->dir,
4164 backref->index, backref->name,
4165 backref->namelen, BTRFS_DIR_INDEX_KEY,
4169 free_inode_rec(rec);
4176 static int check_root_refs(struct btrfs_root *root,
4177 struct cache_tree *root_cache)
4179 struct root_record *rec;
4180 struct root_record *ref_root;
4181 struct root_backref *backref;
4182 struct cache_extent *cache;
4188 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189 BUG_ON(IS_ERR(rec));
4192 /* fixme: this can not detect circular references */
4195 cache = search_cache_extent(root_cache, 0);
4199 rec = container_of(cache, struct root_record, cache);
4200 cache = next_cache_extent(cache);
4202 if (rec->found_ref == 0)
4205 list_for_each_entry(backref, &rec->backrefs, list) {
4206 if (!backref->reachable)
4209 ref_root = get_root_rec(root_cache,
4211 BUG_ON(IS_ERR(ref_root));
4212 if (ref_root->found_ref > 0)
4215 backref->reachable = 0;
4217 if (rec->found_ref == 0)
4223 cache = search_cache_extent(root_cache, 0);
4227 rec = container_of(cache, struct root_record, cache);
4228 cache = next_cache_extent(cache);
4230 if (rec->found_ref == 0 &&
4231 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233 ret = check_orphan_item(root->fs_info->tree_root,
4239 * If we don't have a root item then we likely just have
4240 * a dir item in a snapshot for this root but no actual
4241 * ref key or anything so it's meaningless.
4243 if (!rec->found_root_item)
4246 fprintf(stderr, "fs tree %llu not referenced\n",
4247 (unsigned long long)rec->objectid);
4251 if (rec->found_ref > 0 && !rec->found_root_item)
4253 list_for_each_entry(backref, &rec->backrefs, list) {
4254 if (!backref->found_dir_item)
4255 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256 if (!backref->found_dir_index)
4257 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258 if (!backref->found_back_ref)
4259 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260 if (!backref->found_forward_ref)
4261 backref->errors |= REF_ERR_NO_ROOT_REF;
4262 if (backref->reachable && backref->errors)
4269 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270 (unsigned long long)rec->objectid, rec->found_ref,
4271 rec->found_root_item ? "" : "not found");
4273 list_for_each_entry(backref, &rec->backrefs, list) {
4274 if (!backref->reachable)
4276 if (!backref->errors && rec->found_root_item)
4278 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279 " index %llu namelen %u name %s errors %x\n",
4280 (unsigned long long)backref->ref_root,
4281 (unsigned long long)backref->dir,
4282 (unsigned long long)backref->index,
4283 backref->namelen, backref->name,
4285 print_ref_error(backref->errors);
4288 return errors > 0 ? 1 : 0;
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292 struct btrfs_key *key,
4293 struct cache_tree *root_cache)
4299 struct btrfs_root_ref *ref;
4300 char namebuf[BTRFS_NAME_LEN];
4303 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4305 dirid = btrfs_root_ref_dirid(eb, ref);
4306 index = btrfs_root_ref_sequence(eb, ref);
4307 name_len = btrfs_root_ref_name_len(eb, ref);
4309 if (name_len <= BTRFS_NAME_LEN) {
4313 len = BTRFS_NAME_LEN;
4314 error = REF_ERR_NAME_TOO_LONG;
4316 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4318 if (key->type == BTRFS_ROOT_REF_KEY) {
4319 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320 index, namebuf, len, key->type, error);
4322 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323 index, namebuf, len, key->type, error);
4328 static void free_corrupt_block(struct cache_extent *cache)
4330 struct btrfs_corrupt_block *corrupt;
4332 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4339 * Repair the btree of the given root.
4341 * The fix is to remove the node key in corrupt_blocks cache_tree.
4342 * and rebalance the tree.
4343 * After the fix, the btree should be writeable.
4345 static int repair_btree(struct btrfs_root *root,
4346 struct cache_tree *corrupt_blocks)
4348 struct btrfs_trans_handle *trans;
4349 struct btrfs_path path;
4350 struct btrfs_corrupt_block *corrupt;
4351 struct cache_extent *cache;
4352 struct btrfs_key key;
4357 if (cache_tree_empty(corrupt_blocks))
4360 trans = btrfs_start_transaction(root, 1);
4361 if (IS_ERR(trans)) {
4362 ret = PTR_ERR(trans);
4363 fprintf(stderr, "Error starting transaction: %s\n",
4367 btrfs_init_path(&path);
4368 cache = first_cache_extent(corrupt_blocks);
4370 corrupt = container_of(cache, struct btrfs_corrupt_block,
4372 level = corrupt->level;
4373 path.lowest_level = level;
4374 key.objectid = corrupt->key.objectid;
4375 key.type = corrupt->key.type;
4376 key.offset = corrupt->key.offset;
4379 * Here we don't want to do any tree balance, since it may
4380 * cause a balance with corrupted brother leaf/node,
4381 * so ins_len set to 0 here.
4382 * Balance will be done after all corrupt node/leaf is deleted.
4384 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4387 offset = btrfs_node_blockptr(path.nodes[level],
4390 /* Remove the ptr */
4391 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4395 * Remove the corresponding extent
4396 * return value is not concerned.
4398 btrfs_release_path(&path);
4399 ret = btrfs_free_extent(trans, root, offset,
4400 root->fs_info->nodesize, 0,
4401 root->root_key.objectid, level - 1, 0);
4402 cache = next_cache_extent(cache);
4405 /* Balance the btree using btrfs_search_slot() */
4406 cache = first_cache_extent(corrupt_blocks);
4408 corrupt = container_of(cache, struct btrfs_corrupt_block,
4410 memcpy(&key, &corrupt->key, sizeof(key));
4411 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4414 /* return will always >0 since it won't find the item */
4416 btrfs_release_path(&path);
4417 cache = next_cache_extent(cache);
4420 btrfs_commit_transaction(trans, root);
4421 btrfs_release_path(&path);
4425 static int check_fs_root(struct btrfs_root *root,
4426 struct cache_tree *root_cache,
4427 struct walk_control *wc)
4433 struct btrfs_path path;
4434 struct shared_node root_node;
4435 struct root_record *rec;
4436 struct btrfs_root_item *root_item = &root->root_item;
4437 struct cache_tree corrupt_blocks;
4438 struct orphan_data_extent *orphan;
4439 struct orphan_data_extent *tmp;
4440 enum btrfs_tree_block_status status;
4441 struct node_refs nrefs;
4444 * Reuse the corrupt_block cache tree to record corrupted tree block
4446 * Unlike the usage in extent tree check, here we do it in a per
4447 * fs/subvol tree base.
4449 cache_tree_init(&corrupt_blocks);
4450 root->fs_info->corrupt_blocks = &corrupt_blocks;
4452 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453 rec = get_root_rec(root_cache, root->root_key.objectid);
4454 BUG_ON(IS_ERR(rec));
4455 if (btrfs_root_refs(root_item) > 0)
4456 rec->found_root_item = 1;
4459 btrfs_init_path(&path);
4460 memset(&root_node, 0, sizeof(root_node));
4461 cache_tree_init(&root_node.root_cache);
4462 cache_tree_init(&root_node.inode_cache);
4463 memset(&nrefs, 0, sizeof(nrefs));
4465 /* Move the orphan extent record to corresponding inode_record */
4466 list_for_each_entry_safe(orphan, tmp,
4467 &root->orphan_data_extents, list) {
4468 struct inode_record *inode;
4470 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4472 BUG_ON(IS_ERR(inode));
4473 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474 list_move(&orphan->list, &inode->orphan_extents);
4477 level = btrfs_header_level(root->node);
4478 memset(wc->nodes, 0, sizeof(wc->nodes));
4479 wc->nodes[level] = &root_node;
4480 wc->active_node = level;
4481 wc->root_level = level;
4483 /* We may not have checked the root block, lets do that now */
4484 if (btrfs_is_leaf(root->node))
4485 status = btrfs_check_leaf(root, NULL, root->node);
4487 status = btrfs_check_node(root, NULL, root->node);
4488 if (status != BTRFS_TREE_BLOCK_CLEAN)
4491 if (btrfs_root_refs(root_item) > 0 ||
4492 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493 path.nodes[level] = root->node;
4494 extent_buffer_get(root->node);
4495 path.slots[level] = 0;
4497 struct btrfs_key key;
4498 struct btrfs_disk_key found_key;
4500 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501 level = root_item->drop_level;
4502 path.lowest_level = level;
4503 if (level > btrfs_header_level(root->node) ||
4504 level >= BTRFS_MAX_LEVEL) {
4505 error("ignoring invalid drop level: %u", level);
4508 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4511 btrfs_node_key(path.nodes[level], &found_key,
4513 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514 sizeof(found_key)));
4518 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4524 wret = walk_up_tree(root, &path, wc, &level);
4531 btrfs_release_path(&path);
4533 if (!cache_tree_empty(&corrupt_blocks)) {
4534 struct cache_extent *cache;
4535 struct btrfs_corrupt_block *corrupt;
4537 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538 root->root_key.objectid);
4539 cache = first_cache_extent(&corrupt_blocks);
4541 corrupt = container_of(cache,
4542 struct btrfs_corrupt_block,
4544 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545 cache->start, corrupt->level,
4546 corrupt->key.objectid, corrupt->key.type,
4547 corrupt->key.offset);
4548 cache = next_cache_extent(cache);
4551 printf("Try to repair the btree for root %llu\n",
4552 root->root_key.objectid);
4553 ret = repair_btree(root, &corrupt_blocks);
4555 fprintf(stderr, "Failed to repair btree: %s\n",
4558 printf("Btree for root %llu is fixed\n",
4559 root->root_key.objectid);
4563 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4567 if (root_node.current) {
4568 root_node.current->checked = 1;
4569 maybe_free_inode_rec(&root_node.inode_cache,
4573 err = check_inode_recs(root, &root_node.inode_cache);
4577 free_corrupt_blocks_tree(&corrupt_blocks);
4578 root->fs_info->corrupt_blocks = NULL;
4579 free_orphan_data_extents(&root->orphan_data_extents);
4583 static int fs_root_objectid(u64 objectid)
4585 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4588 return is_fstree(objectid);
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592 struct cache_tree *root_cache)
4594 struct btrfs_path path;
4595 struct btrfs_key key;
4596 struct walk_control wc;
4597 struct extent_buffer *leaf, *tree_node;
4598 struct btrfs_root *tmp_root;
4599 struct btrfs_root *tree_root = fs_info->tree_root;
4603 if (ctx.progress_enabled) {
4604 ctx.tp = TASK_FS_ROOTS;
4605 task_start(ctx.info);
4609 * Just in case we made any changes to the extent tree that weren't
4610 * reflected into the free space cache yet.
4613 reset_cached_block_groups(fs_info);
4614 memset(&wc, 0, sizeof(wc));
4615 cache_tree_init(&wc.shared);
4616 btrfs_init_path(&path);
4621 key.type = BTRFS_ROOT_ITEM_KEY;
4622 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4627 tree_node = tree_root->node;
4629 if (tree_node != tree_root->node) {
4630 free_root_recs_tree(root_cache);
4631 btrfs_release_path(&path);
4634 leaf = path.nodes[0];
4635 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636 ret = btrfs_next_leaf(tree_root, &path);
4642 leaf = path.nodes[0];
4644 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646 fs_root_objectid(key.objectid)) {
4647 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648 tmp_root = btrfs_read_fs_root_no_cache(
4651 key.offset = (u64)-1;
4652 tmp_root = btrfs_read_fs_root(
4655 if (IS_ERR(tmp_root)) {
4659 ret = check_fs_root(tmp_root, root_cache, &wc);
4660 if (ret == -EAGAIN) {
4661 free_root_recs_tree(root_cache);
4662 btrfs_release_path(&path);
4667 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668 btrfs_free_fs_root(tmp_root);
4669 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670 key.type == BTRFS_ROOT_BACKREF_KEY) {
4671 process_root_ref(leaf, path.slots[0], &key,
4678 btrfs_release_path(&path);
4680 free_extent_cache_tree(&wc.shared);
4681 if (!cache_tree_empty(&wc.shared))
4682 fprintf(stderr, "warning line %d\n", __LINE__);
4684 task_stop(ctx.info);
4690 * Find the @index according by @ino and name.
4691 * Notice:time efficiency is O(N)
4693 * @root: the root of the fs/file tree
4694 * @index_ret: the index as return value
4695 * @namebuf: the name to match
4696 * @name_len: the length of name to match
4697 * @file_type: the file_type of INODE_ITEM to match
4699 * Returns 0 if found and *@index_ret will be modified with right value
4700 * Returns< 0 not found and *@index_ret will be (u64)-1
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703 u64 *index_ret, char *namebuf, u32 name_len,
4706 struct btrfs_path path;
4707 struct extent_buffer *node;
4708 struct btrfs_dir_item *di;
4709 struct btrfs_key key;
4710 struct btrfs_key location;
4711 char name[BTRFS_NAME_LEN] = {0};
4723 /* search from the last index */
4724 key.objectid = dirid;
4725 key.offset = (u64)-1;
4726 key.type = BTRFS_DIR_INDEX_KEY;
4728 btrfs_init_path(&path);
4729 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4734 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4737 *index_ret = (64)-1;
4740 /* Check whether inode_id/filetype/name match */
4741 node = path.nodes[0];
4742 slot = path.slots[0];
4743 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744 total = btrfs_item_size_nr(node, slot);
4745 while (cur < total) {
4747 len = btrfs_dir_name_len(node, di);
4748 data_len = btrfs_dir_data_len(node, di);
4750 btrfs_dir_item_key_to_cpu(node, di, &location);
4751 if (location.objectid != location_id ||
4752 location.type != BTRFS_INODE_ITEM_KEY ||
4753 location.offset != 0)
4756 filetype = btrfs_dir_type(node, di);
4757 if (file_type != filetype)
4760 if (len > BTRFS_NAME_LEN)
4761 len = BTRFS_NAME_LEN;
4763 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764 if (len != name_len || strncmp(namebuf, name, len))
4767 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768 *index_ret = key.offset;
4772 len += sizeof(*di) + data_len;
4773 di = (struct btrfs_dir_item *)((char *)di + len);
4779 btrfs_release_path(&path);
4784 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785 * INODE_REF/INODE_EXTREF match.
4787 * @root: the root of the fs/file tree
4788 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789 * value while find index
4790 * @location_key: location key of the struct btrfs_dir_item to match
4791 * @name: the name to match
4792 * @namelen: the length of name
4793 * @file_type: the type of file to math
4795 * Return 0 if no error occurred.
4796 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797 * DIR_ITEM/DIR_INDEX
4798 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799 * and DIR_ITEM/DIR_INDEX mismatch
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802 struct btrfs_key *location_key, char *name,
4803 u32 namelen, u8 file_type)
4805 struct btrfs_path path;
4806 struct extent_buffer *node;
4807 struct btrfs_dir_item *di;
4808 struct btrfs_key location;
4809 char namebuf[BTRFS_NAME_LEN] = {0};
4818 /* get the index by traversing all index */
4819 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820 ret = find_dir_index(root, key->objectid,
4821 location_key->objectid, &key->offset,
4822 name, namelen, file_type);
4824 ret = DIR_INDEX_MISSING;
4828 btrfs_init_path(&path);
4829 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4831 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4836 /* Check whether inode_id/filetype/name match */
4837 node = path.nodes[0];
4838 slot = path.slots[0];
4839 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840 total = btrfs_item_size_nr(node, slot);
4841 while (cur < total) {
4842 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4845 len = btrfs_dir_name_len(node, di);
4846 data_len = btrfs_dir_data_len(node, di);
4848 btrfs_dir_item_key_to_cpu(node, di, &location);
4849 if (location.objectid != location_key->objectid ||
4850 location.type != location_key->type ||
4851 location.offset != location_key->offset)
4854 filetype = btrfs_dir_type(node, di);
4855 if (file_type != filetype)
4858 if (len > BTRFS_NAME_LEN) {
4859 len = BTRFS_NAME_LEN;
4860 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4862 key->type == BTRFS_DIR_ITEM_KEY ?
4863 "DIR_ITEM" : "DIR_INDEX",
4864 key->objectid, key->offset, len);
4866 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4868 if (len != namelen || strncmp(namebuf, name, len))
4874 len += sizeof(*di) + data_len;
4875 di = (struct btrfs_dir_item *)((char *)di + len);
4880 btrfs_release_path(&path);
4885 * Prints inode ref error message
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888 u64 index, const char *namebuf, int name_len,
4889 u8 filetype, int err)
4894 /* root dir error */
4895 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4897 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898 root->objectid, key->objectid, key->offset, namebuf);
4903 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905 root->objectid, key->offset,
4906 btrfs_name_hash(namebuf, name_len),
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4909 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911 root->objectid, key->offset, index,
4912 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4917 * Insert the missing inode item.
4919 * Returns 0 means success.
4920 * Returns <0 means error.
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4925 struct btrfs_key key;
4926 struct btrfs_trans_handle *trans;
4927 struct btrfs_path path;
4931 key.type = BTRFS_INODE_ITEM_KEY;
4934 btrfs_init_path(&path);
4935 trans = btrfs_start_transaction(root, 1);
4936 if (IS_ERR(trans)) {
4941 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942 if (ret < 0 || !ret)
4945 /* insert inode item */
4946 create_inode_item_lowmem(trans, root, ino, filetype);
4949 btrfs_commit_transaction(trans, root);
4952 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953 root->objectid, ino);
4954 btrfs_release_path(&path);
4959 * The ternary means dir item, dir index and relative inode ref.
4960 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4963 * If two of three is missing or mismatched, delete the existing one.
4964 * If one of three is missing or mismatched, add the missing one.
4966 * returns 0 means success.
4967 * returns not 0 means on error;
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970 u64 index, char *name, int name_len, u8 filetype,
4973 struct btrfs_trans_handle *trans;
4978 * stage shall be one of following valild values:
4979 * 0: Fine, nothing to do.
4980 * 1: One of three is wrong, so add missing one.
4981 * 2: Two of three is wrong, so delete existed one.
4983 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4985 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4987 if (err & (INODE_REF_MISSING))
4990 /* stage must be smllarer than 3 */
4993 trans = btrfs_start_transaction(root, 1);
4995 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
5000 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001 filetype, &index, 1, 1);
5005 btrfs_commit_transaction(trans, root);
5008 error("fail to repair inode %llu name %s filetype %u",
5009 ino, name, filetype);
5011 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012 stage == 2 ? "Delete" : "Add",
5013 ino, name, filetype);
5019 * Traverse the given INODE_REF and call find_dir_item() to find related
5020 * DIR_ITEM/DIR_INDEX.
5022 * @root: the root of the fs/file tree
5023 * @ref_key: the key of the INODE_REF
5024 * @path the path provides node and slot
5025 * @refs: the count of INODE_REF
5026 * @mode: the st_mode of INODE_ITEM
5027 * @name_ret: returns with the first ref's name
5028 * @name_len_ret: len of the name_ret
5030 * Return 0 if no error occurred.
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033 struct btrfs_path *path, char *name_ret,
5034 u32 *namelen_ret, u64 *refs_ret, int mode)
5036 struct btrfs_key key;
5037 struct btrfs_key location;
5038 struct btrfs_inode_ref *ref;
5039 struct extent_buffer *node;
5040 char namebuf[BTRFS_NAME_LEN] = {0};
5050 int need_research = 0;
5058 /* since after repair, path and the dir item may be changed */
5059 if (need_research) {
5061 btrfs_release_path(path);
5062 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063 /* the item was deleted, let path point to the last checked item */
5065 if (path->slots[0] == 0)
5066 btrfs_prev_leaf(root, path);
5074 location.objectid = ref_key->objectid;
5075 location.type = BTRFS_INODE_ITEM_KEY;
5076 location.offset = 0;
5077 node = path->nodes[0];
5078 slot = path->slots[0];
5080 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082 total = btrfs_item_size_nr(node, slot);
5085 /* Update inode ref count */
5088 index = btrfs_inode_ref_index(node, ref);
5089 name_len = btrfs_inode_ref_name_len(node, ref);
5091 if (name_len <= BTRFS_NAME_LEN) {
5094 len = BTRFS_NAME_LEN;
5095 warning("root %llu INODE_REF[%llu %llu] name too long",
5096 root->objectid, ref_key->objectid, ref_key->offset);
5099 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5101 /* copy the first name found to name_ret */
5102 if (refs == 1 && name_ret) {
5103 memcpy(name_ret, namebuf, len);
5107 /* Check root dir ref */
5108 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109 if (index != 0 || len != strlen("..") ||
5110 strncmp("..", namebuf, len) ||
5111 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112 /* set err bits then repair will delete the ref */
5113 err |= DIR_INDEX_MISSING;
5114 err |= DIR_ITEM_MISSING;
5119 /* Find related DIR_INDEX */
5120 key.objectid = ref_key->offset;
5121 key.type = BTRFS_DIR_INDEX_KEY;
5123 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124 imode_to_type(mode));
5126 /* Find related dir_item */
5127 key.objectid = ref_key->offset;
5128 key.type = BTRFS_DIR_ITEM_KEY;
5129 key.offset = btrfs_name_hash(namebuf, len);
5130 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131 imode_to_type(mode));
5133 if (tmp_err && repair) {
5134 ret = repair_ternary_lowmem(root, ref_key->offset,
5135 ref_key->objectid, index, namebuf,
5136 name_len, imode_to_type(mode),
5143 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144 imode_to_type(mode), tmp_err);
5146 len = sizeof(*ref) + name_len;
5147 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5158 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159 * DIR_ITEM/DIR_INDEX.
5161 * @root: the root of the fs/file tree
5162 * @ref_key: the key of the INODE_EXTREF
5163 * @refs: the count of INODE_EXTREF
5164 * @mode: the st_mode of INODE_ITEM
5166 * Return 0 if no error occurred.
5168 static int check_inode_extref(struct btrfs_root *root,
5169 struct btrfs_key *ref_key,
5170 struct extent_buffer *node, int slot, u64 *refs,
5173 struct btrfs_key key;
5174 struct btrfs_key location;
5175 struct btrfs_inode_extref *extref;
5176 char namebuf[BTRFS_NAME_LEN] = {0};
5186 location.objectid = ref_key->objectid;
5187 location.type = BTRFS_INODE_ITEM_KEY;
5188 location.offset = 0;
5190 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191 total = btrfs_item_size_nr(node, slot);
5194 /* update inode ref count */
5196 name_len = btrfs_inode_extref_name_len(node, extref);
5197 index = btrfs_inode_extref_index(node, extref);
5198 parent = btrfs_inode_extref_parent(node, extref);
5199 if (name_len <= BTRFS_NAME_LEN) {
5202 len = BTRFS_NAME_LEN;
5203 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204 root->objectid, ref_key->objectid, ref_key->offset);
5206 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5208 /* Check root dir ref name */
5209 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211 root->objectid, ref_key->objectid, ref_key->offset,
5213 err |= ROOT_DIR_ERROR;
5216 /* find related dir_index */
5217 key.objectid = parent;
5218 key.type = BTRFS_DIR_INDEX_KEY;
5220 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223 /* find related dir_item */
5224 key.objectid = parent;
5225 key.type = BTRFS_DIR_ITEM_KEY;
5226 key.offset = btrfs_name_hash(namebuf, len);
5227 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5230 len = sizeof(*extref) + name_len;
5231 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5241 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242 * DIR_ITEM/DIR_INDEX match.
5243 * Return with @index_ret.
5245 * @root: the root of the fs/file tree
5246 * @key: the key of the INODE_REF/INODE_EXTREF
5247 * @name: the name in the INODE_REF/INODE_EXTREF
5248 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5249 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5250 * value (64)-1 means do not check index
5251 * @ext_ref: the EXTENDED_IREF feature
5253 * Return 0 if no error occurred.
5254 * Return >0 for error bitmap
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257 char *name, int namelen, u64 *index_ret,
5258 unsigned int ext_ref)
5260 struct btrfs_path path;
5261 struct btrfs_inode_ref *ref;
5262 struct btrfs_inode_extref *extref;
5263 struct extent_buffer *node;
5264 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5277 btrfs_init_path(&path);
5278 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5280 ret = INODE_REF_MISSING;
5284 node = path.nodes[0];
5285 slot = path.slots[0];
5287 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288 total = btrfs_item_size_nr(node, slot);
5290 /* Iterate all entry of INODE_REF */
5291 while (cur < total) {
5292 ret = INODE_REF_MISSING;
5294 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295 ref_index = btrfs_inode_ref_index(node, ref);
5296 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5299 if (cur + sizeof(*ref) + ref_namelen > total ||
5300 ref_namelen > BTRFS_NAME_LEN) {
5301 warning("root %llu INODE %s[%llu %llu] name too long",
5303 key->type == BTRFS_INODE_REF_KEY ?
5305 key->objectid, key->offset);
5307 if (cur + sizeof(*ref) > total)
5309 len = min_t(u32, total - cur - sizeof(*ref),
5315 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5318 if (len != namelen || strncmp(ref_namebuf, name, len))
5321 *index_ret = ref_index;
5325 len = sizeof(*ref) + ref_namelen;
5326 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5331 /* Skip if not support EXTENDED_IREF feature */
5335 btrfs_release_path(&path);
5336 btrfs_init_path(&path);
5338 dir_id = key->offset;
5339 key->type = BTRFS_INODE_EXTREF_KEY;
5340 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5342 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5344 ret = INODE_REF_MISSING;
5348 node = path.nodes[0];
5349 slot = path.slots[0];
5351 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5353 total = btrfs_item_size_nr(node, slot);
5355 /* Iterate all entry of INODE_EXTREF */
5356 while (cur < total) {
5357 ret = INODE_REF_MISSING;
5359 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360 ref_index = btrfs_inode_extref_index(node, extref);
5361 parent = btrfs_inode_extref_parent(node, extref);
5362 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5365 if (parent != dir_id)
5368 if (ref_namelen <= BTRFS_NAME_LEN) {
5371 len = BTRFS_NAME_LEN;
5372 warning("root %llu INODE %s[%llu %llu] name too long",
5374 key->type == BTRFS_INODE_REF_KEY ?
5376 key->objectid, key->offset);
5378 read_extent_buffer(node, ref_namebuf,
5379 (unsigned long)(extref + 1), len);
5381 if (len != namelen || strncmp(ref_namebuf, name, len))
5384 *index_ret = ref_index;
5389 len = sizeof(*extref) + ref_namelen;
5390 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5395 btrfs_release_path(&path);
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400 u64 ino, u64 index, const char *namebuf,
5401 int name_len, u8 filetype, int err)
5403 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405 root->objectid, key->objectid, key->offset, namebuf,
5407 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5410 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412 root->objectid, key->objectid, index, namebuf, filetype,
5413 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5416 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5418 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419 root->objectid, ino, index, namebuf, filetype,
5420 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5423 if (err & INODE_REF_MISSING)
5425 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426 root->objectid, ino, key->objectid, namebuf, filetype);
5431 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5433 * Returns error after repair
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436 u64 index, u8 filetype, char *namebuf, u32 name_len,
5441 if (err & INODE_ITEM_MISSING) {
5442 ret = repair_inode_item_missing(root, ino, filetype);
5444 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5447 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449 name_len, filetype, err);
5451 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453 err &= ~(INODE_REF_MISSING);
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5462 struct btrfs_key key;
5463 struct btrfs_path path;
5465 struct btrfs_dir_item *di;
5475 key.offset = (u64)-1;
5477 btrfs_init_path(&path);
5478 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5483 /* if found, go to spacial case */
5488 ret = btrfs_previous_item(root, &path, ino, type);
5496 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5498 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5500 while (cur < total) {
5501 len = btrfs_dir_name_len(path.nodes[0], di);
5502 if (len > BTRFS_NAME_LEN)
5503 len = BTRFS_NAME_LEN;
5506 len += btrfs_dir_data_len(path.nodes[0], di);
5508 di = (struct btrfs_dir_item *)((char *)di + len);
5514 btrfs_release_path(&path);
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5525 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5529 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5533 *size = item_size + index_size;
5537 error("failed to count root %llu INODE[%llu] root size",
5538 root->objectid, ino);
5543 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5546 * @root: the root of the fs/file tree
5547 * @key: the key of the INODE_REF/INODE_EXTREF
5549 * @size: the st_size of the INODE_ITEM
5550 * @ext_ref: the EXTENDED_IREF feature
5552 * Return 0 if no error occurred.
5553 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556 struct btrfs_path *path, u64 *size,
5557 unsigned int ext_ref)
5559 struct btrfs_dir_item *di;
5560 struct btrfs_inode_item *ii;
5561 struct btrfs_key key;
5562 struct btrfs_key location;
5563 struct extent_buffer *node;
5565 char namebuf[BTRFS_NAME_LEN] = {0};
5577 int need_research = 0;
5580 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581 * ignore index check.
5583 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584 index = di_key->offset;
5591 /* since after repair, path and the dir item may be changed */
5592 if (need_research) {
5594 err |= DIR_COUNT_AGAIN;
5595 btrfs_release_path(path);
5596 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597 /* the item was deleted, let path point the last checked item */
5599 if (path->slots[0] == 0)
5600 btrfs_prev_leaf(root, path);
5608 node = path->nodes[0];
5609 slot = path->slots[0];
5611 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612 total = btrfs_item_size_nr(node, slot);
5613 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5615 while (cur < total) {
5616 data_len = btrfs_dir_data_len(node, di);
5619 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5621 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622 di_key->objectid, di_key->offset, data_len);
5624 name_len = btrfs_dir_name_len(node, di);
5625 if (name_len <= BTRFS_NAME_LEN) {
5628 len = BTRFS_NAME_LEN;
5629 warning("root %llu %s[%llu %llu] name too long",
5631 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632 di_key->objectid, di_key->offset);
5634 (*size) += name_len;
5635 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5637 filetype = btrfs_dir_type(node, di);
5639 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640 di_key->offset != btrfs_name_hash(namebuf, len)) {
5642 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643 root->objectid, di_key->objectid, di_key->offset,
5644 namebuf, len, filetype, di_key->offset,
5645 btrfs_name_hash(namebuf, len));
5648 btrfs_dir_item_key_to_cpu(node, di, &location);
5649 /* Ignore related ROOT_ITEM check */
5650 if (location.type == BTRFS_ROOT_ITEM_KEY)
5653 btrfs_release_path(path);
5654 /* Check relative INODE_ITEM(existence/filetype) */
5655 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5657 tmp_err |= INODE_ITEM_MISSING;
5661 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662 struct btrfs_inode_item);
5663 mode = btrfs_inode_mode(path->nodes[0], ii);
5664 if (imode_to_type(mode) != filetype) {
5665 tmp_err |= INODE_ITEM_MISMATCH;
5669 /* Check relative INODE_REF/INODE_EXTREF */
5670 key.objectid = location.objectid;
5671 key.type = BTRFS_INODE_REF_KEY;
5672 key.offset = di_key->objectid;
5673 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5676 /* check relative INDEX/ITEM */
5677 key.objectid = di_key->objectid;
5678 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679 key.type = BTRFS_DIR_INDEX_KEY;
5682 key.type = BTRFS_DIR_ITEM_KEY;
5683 key.offset = btrfs_name_hash(namebuf, name_len);
5686 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687 name_len, filetype);
5688 /* find_dir_item may find index */
5689 if (key.type == BTRFS_DIR_INDEX_KEY)
5693 if (tmp_err && repair) {
5694 ret = repair_dir_item(root, di_key->objectid,
5695 location.objectid, index,
5696 imode_to_type(mode), namebuf,
5698 if (ret != tmp_err) {
5703 btrfs_release_path(path);
5704 print_dir_item_err(root, di_key, location.objectid, index,
5705 namebuf, name_len, filetype, tmp_err);
5707 len = sizeof(*di) + name_len + data_len;
5708 di = (struct btrfs_dir_item *)((char *)di + len);
5711 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713 root->objectid, di_key->objectid,
5720 btrfs_release_path(path);
5721 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5723 err |= ret > 0 ? -ENOENT : ret;
5728 * Wrapper function of btrfs_punch_hole.
5730 * Returns 0 means success.
5731 * Returns not 0 means error.
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5736 struct btrfs_trans_handle *trans;
5739 trans = btrfs_start_transaction(root, 1);
5741 return PTR_ERR(trans);
5743 ret = btrfs_punch_hole(trans, root, ino, start, len);
5745 error("failed to add hole [%llu, %llu] in inode [%llu]",
5748 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5751 btrfs_commit_transaction(trans, root);
5756 * Check file extent datasum/hole, update the size of the file extents,
5757 * check and update the last offset of the file extent.
5759 * @root: the root of fs/file tree.
5760 * @fkey: the key of the file extent.
5761 * @nodatasum: INODE_NODATASUM feature.
5762 * @size: the sum of all EXTENT_DATA items size for this inode.
5763 * @end: the offset of the last extent.
5765 * Return 0 if no error occurred.
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768 struct extent_buffer *node, int slot,
5769 unsigned int nodatasum, u64 *size, u64 *end)
5771 struct btrfs_file_extent_item *fi;
5774 u64 extent_num_bytes;
5776 u64 csum_found; /* In byte size, sectorsize aligned */
5777 u64 search_start; /* Logical range start we search for csum */
5778 u64 search_len; /* Logical range len we search for csum */
5779 unsigned int extent_type;
5780 unsigned int is_hole;
5785 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5787 /* Check inline extent */
5788 extent_type = btrfs_file_extent_type(node, fi);
5789 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790 struct btrfs_item *e = btrfs_item_nr(slot);
5791 u32 item_inline_len;
5793 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795 compressed = btrfs_file_extent_compression(node, fi);
5796 if (extent_num_bytes == 0) {
5798 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799 root->objectid, fkey->objectid, fkey->offset);
5800 err |= FILE_EXTENT_ERROR;
5802 if (!compressed && extent_num_bytes != item_inline_len) {
5804 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805 root->objectid, fkey->objectid, fkey->offset,
5806 extent_num_bytes, item_inline_len);
5807 err |= FILE_EXTENT_ERROR;
5809 *end += extent_num_bytes;
5810 *size += extent_num_bytes;
5814 /* Check extent type */
5815 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817 err |= FILE_EXTENT_ERROR;
5818 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819 root->objectid, fkey->objectid, fkey->offset);
5823 /* Check REG_EXTENT/PREALLOC_EXTENT */
5824 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827 extent_offset = btrfs_file_extent_offset(node, fi);
5828 compressed = btrfs_file_extent_compression(node, fi);
5829 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5832 * Check EXTENT_DATA csum
5834 * For plain (uncompressed) extent, we should only check the range
5835 * we're referring to, as it's possible that part of prealloc extent
5836 * has been written, and has csum:
5838 * |<--- Original large preallocated extent A ---->|
5839 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5842 * For compressed extent, we should check the whole range.
5845 search_start = disk_bytenr + extent_offset;
5846 search_len = extent_num_bytes;
5848 search_start = disk_bytenr;
5849 search_len = disk_num_bytes;
5851 ret = count_csum_range(root, search_start, search_len, &csum_found);
5852 if (csum_found > 0 && nodatasum) {
5853 err |= ODD_CSUM_ITEM;
5854 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855 root->objectid, fkey->objectid, fkey->offset);
5856 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857 !is_hole && (ret < 0 || csum_found < search_len)) {
5858 err |= CSUM_ITEM_MISSING;
5859 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860 root->objectid, fkey->objectid, fkey->offset,
5861 csum_found, search_len);
5862 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863 err |= ODD_CSUM_ITEM;
5864 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865 root->objectid, fkey->objectid, fkey->offset, csum_found);
5868 /* Check EXTENT_DATA hole */
5869 if (!no_holes && *end != fkey->offset) {
5871 ret = punch_extent_hole(root, fkey->objectid,
5872 *end, fkey->offset - *end);
5873 if (!repair || ret) {
5874 err |= FILE_EXTENT_ERROR;
5876 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5877 root->objectid, fkey->objectid, fkey->offset,
5878 fkey->objectid, *end);
5882 *end += extent_num_bytes;
5884 *size += extent_num_bytes;
5890 * Set inode item nbytes to @nbytes
5892 * Returns 0 on success
5893 * Returns != 0 on error
5895 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5896 struct btrfs_path *path,
5897 u64 ino, u64 nbytes)
5899 struct btrfs_trans_handle *trans;
5900 struct btrfs_inode_item *ii;
5901 struct btrfs_key key;
5902 struct btrfs_key research_key;
5906 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5909 key.type = BTRFS_INODE_ITEM_KEY;
5912 trans = btrfs_start_transaction(root, 1);
5913 if (IS_ERR(trans)) {
5914 ret = PTR_ERR(trans);
5919 btrfs_release_path(path);
5920 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5928 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5929 struct btrfs_inode_item);
5930 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5931 btrfs_mark_buffer_dirty(path->nodes[0]);
5933 btrfs_commit_transaction(trans, root);
5936 error("failed to set nbytes in inode %llu root %llu",
5937 ino, root->root_key.objectid);
5939 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5940 root->root_key.objectid, nbytes);
5943 btrfs_release_path(path);
5944 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5951 * Set directory inode isize to @isize.
5953 * Returns 0 on success.
5954 * Returns != 0 on error.
5956 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5957 struct btrfs_path *path,
5960 struct btrfs_trans_handle *trans;
5961 struct btrfs_inode_item *ii;
5962 struct btrfs_key key;
5963 struct btrfs_key research_key;
5967 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5970 key.type = BTRFS_INODE_ITEM_KEY;
5973 trans = btrfs_start_transaction(root, 1);
5974 if (IS_ERR(trans)) {
5975 ret = PTR_ERR(trans);
5980 btrfs_release_path(path);
5981 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5989 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5990 struct btrfs_inode_item);
5991 btrfs_set_inode_size(path->nodes[0], ii, isize);
5992 btrfs_mark_buffer_dirty(path->nodes[0]);
5994 btrfs_commit_transaction(trans, root);
5997 error("failed to set isize in inode %llu root %llu",
5998 ino, root->root_key.objectid);
6000 printf("Set isize in inode %llu root %llu to %llu\n",
6001 ino, root->root_key.objectid, isize);
6003 btrfs_release_path(path);
6004 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6011 * Wrapper function for btrfs_add_orphan_item().
6013 * Returns 0 on success.
6014 * Returns != 0 on error.
6016 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6017 struct btrfs_path *path, u64 ino)
6019 struct btrfs_trans_handle *trans;
6020 struct btrfs_key research_key;
6024 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6026 trans = btrfs_start_transaction(root, 1);
6027 if (IS_ERR(trans)) {
6028 ret = PTR_ERR(trans);
6033 btrfs_release_path(path);
6034 ret = btrfs_add_orphan_item(trans, root, path, ino);
6036 btrfs_commit_transaction(trans, root);
6039 error("failed to add inode %llu as orphan item root %llu",
6040 ino, root->root_key.objectid);
6042 printf("Added inode %llu as orphan item root %llu\n",
6043 ino, root->root_key.objectid);
6045 btrfs_release_path(path);
6046 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6052 /* Set inode_item nlink to @ref_count.
6053 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6055 * Returns 0 on success
6057 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6058 struct btrfs_path *path, u64 ino,
6059 const char *name, u32 namelen,
6060 u64 ref_count, u8 filetype, u64 *nlink)
6062 struct btrfs_trans_handle *trans;
6063 struct btrfs_inode_item *ii;
6064 struct btrfs_key key;
6065 struct btrfs_key old_key;
6066 char namebuf[BTRFS_NAME_LEN] = {0};
6072 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6074 if (name && namelen) {
6075 ASSERT(namelen <= BTRFS_NAME_LEN);
6076 memcpy(namebuf, name, namelen);
6079 sprintf(namebuf, "%llu", ino);
6080 name_len = count_digits(ino);
6081 printf("Can't find file name for inode %llu, use %s instead\n",
6085 trans = btrfs_start_transaction(root, 1);
6086 if (IS_ERR(trans)) {
6087 ret = PTR_ERR(trans);
6091 btrfs_release_path(path);
6092 /* if refs is 0, put it into lostfound */
6093 if (ref_count == 0) {
6094 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6095 name_len, filetype, &ref_count);
6100 /* reset inode_item's nlink to ref_count */
6102 key.type = BTRFS_INODE_ITEM_KEY;
6105 btrfs_release_path(path);
6106 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6112 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6113 struct btrfs_inode_item);
6114 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6115 btrfs_mark_buffer_dirty(path->nodes[0]);
6120 btrfs_commit_transaction(trans, root);
6124 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6125 root->objectid, ino, namebuf, filetype);
6127 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6128 root->objectid, ino, namebuf, filetype);
6131 btrfs_release_path(path);
6132 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6139 * Check INODE_ITEM and related ITEMs (the same inode number)
6140 * 1. check link count
6141 * 2. check inode ref/extref
6142 * 3. check dir item/index
6144 * @ext_ref: the EXTENDED_IREF feature
6146 * Return 0 if no error occurred.
6147 * Return >0 for error or hit the traversal is done(by error bitmap)
6149 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6150 unsigned int ext_ref)
6152 struct extent_buffer *node;
6153 struct btrfs_inode_item *ii;
6154 struct btrfs_key key;
6155 struct btrfs_key last_key;
6164 u64 extent_size = 0;
6166 unsigned int nodatasum;
6170 char namebuf[BTRFS_NAME_LEN] = {0};
6173 node = path->nodes[0];
6174 slot = path->slots[0];
6176 btrfs_item_key_to_cpu(node, &key, slot);
6177 inode_id = key.objectid;
6179 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6180 ret = btrfs_next_item(root, path);
6186 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6187 isize = btrfs_inode_size(node, ii);
6188 nbytes = btrfs_inode_nbytes(node, ii);
6189 mode = btrfs_inode_mode(node, ii);
6190 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6191 nlink = btrfs_inode_nlink(node, ii);
6192 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6195 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6196 ret = btrfs_next_item(root, path);
6198 /* out will fill 'err' rusing current statistics */
6200 } else if (ret > 0) {
6205 node = path->nodes[0];
6206 slot = path->slots[0];
6207 btrfs_item_key_to_cpu(node, &key, slot);
6208 if (key.objectid != inode_id)
6212 case BTRFS_INODE_REF_KEY:
6213 ret = check_inode_ref(root, &key, path, namebuf,
6214 &name_len, &refs, mode);
6217 case BTRFS_INODE_EXTREF_KEY:
6218 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6219 warning("root %llu EXTREF[%llu %llu] isn't supported",
6220 root->objectid, key.objectid,
6222 ret = check_inode_extref(root, &key, node, slot, &refs,
6226 case BTRFS_DIR_ITEM_KEY:
6227 case BTRFS_DIR_INDEX_KEY:
6229 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6230 root->objectid, inode_id,
6231 imode_to_type(mode), key.objectid,
6234 ret = check_dir_item(root, &key, path, &size, ext_ref);
6237 case BTRFS_EXTENT_DATA_KEY:
6239 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6240 root->objectid, inode_id, key.objectid,
6243 ret = check_file_extent(root, &key, node, slot,
6244 nodatasum, &extent_size,
6248 case BTRFS_XATTR_ITEM_KEY:
6251 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6252 key.objectid, key.type, key.offset);
6257 if (err & LAST_ITEM) {
6258 btrfs_release_path(path);
6259 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6264 /* verify INODE_ITEM nlink/isize/nbytes */
6266 if (repair && (err & DIR_COUNT_AGAIN)) {
6267 err &= ~DIR_COUNT_AGAIN;
6268 count_dir_isize(root, inode_id, &size);
6271 if ((nlink != 1 || refs != 1) && repair) {
6272 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6273 namebuf, name_len, refs, imode_to_type(mode),
6278 err |= LINK_COUNT_ERROR;
6279 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6280 root->objectid, inode_id, nlink);
6284 * Just a warning, as dir inode nbytes is just an
6285 * instructive value.
6287 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6288 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6289 root->objectid, inode_id,
6290 root->fs_info->nodesize);
6293 if (isize != size) {
6295 ret = repair_dir_isize_lowmem(root, path,
6297 if (!repair || ret) {
6300 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6301 root->objectid, inode_id, isize, size);
6305 if (nlink != refs) {
6307 ret = repair_inode_nlinks_lowmem(root, path,
6308 inode_id, namebuf, name_len, refs,
6309 imode_to_type(mode), &nlink);
6310 if (!repair || ret) {
6311 err |= LINK_COUNT_ERROR;
6313 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6314 root->objectid, inode_id, nlink, refs);
6316 } else if (!nlink) {
6318 ret = repair_inode_orphan_item_lowmem(root,
6320 if (!repair || ret) {
6322 error("root %llu INODE[%llu] is orphan item",
6323 root->objectid, inode_id);
6327 if (!nbytes && !no_holes && extent_end < isize) {
6329 ret = punch_extent_hole(root, inode_id,
6330 extent_end, isize - extent_end);
6331 if (!repair || ret) {
6332 err |= NBYTES_ERROR;
6334 "root %llu INODE[%llu] size %llu should have a file extent hole",
6335 root->objectid, inode_id, isize);
6339 if (nbytes != extent_size) {
6341 ret = repair_inode_nbytes_lowmem(root, path,
6342 inode_id, extent_size);
6343 if (!repair || ret) {
6344 err |= NBYTES_ERROR;
6346 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6347 root->objectid, inode_id, nbytes,
6353 if (err & LAST_ITEM)
6354 btrfs_next_item(root, path);
6359 * Insert the missing inode item and inode ref.
6361 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6362 * Root dir should be handled specially because root dir is the root of fs.
6364 * returns err (>0 or 0) after repair
6366 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6368 struct btrfs_trans_handle *trans;
6369 struct btrfs_key key;
6370 struct btrfs_path path;
6371 int filetype = BTRFS_FT_DIR;
6374 btrfs_init_path(&path);
6376 if (err & INODE_REF_MISSING) {
6377 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6378 key.type = BTRFS_INODE_REF_KEY;
6379 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6381 trans = btrfs_start_transaction(root, 1);
6382 if (IS_ERR(trans)) {
6383 ret = PTR_ERR(trans);
6387 btrfs_release_path(&path);
6388 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6392 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6393 BTRFS_FIRST_FREE_OBJECTID,
6394 BTRFS_FIRST_FREE_OBJECTID, 0);
6398 printf("Add INODE_REF[%llu %llu] name %s\n",
6399 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6401 err &= ~INODE_REF_MISSING;
6404 error("fail to insert first inode's ref");
6405 btrfs_commit_transaction(trans, root);
6408 if (err & INODE_ITEM_MISSING) {
6409 ret = repair_inode_item_missing(root,
6410 BTRFS_FIRST_FREE_OBJECTID, filetype);
6413 err &= ~INODE_ITEM_MISSING;
6417 error("fail to repair first inode");
6418 btrfs_release_path(&path);
6423 * check first root dir's inode_item and inode_ref
6425 * returns 0 means no error
6426 * returns >0 means error
6427 * returns <0 means fatal error
6429 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6431 struct btrfs_path path;
6432 struct btrfs_key key;
6433 struct btrfs_inode_item *ii;
6439 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6440 key.type = BTRFS_INODE_ITEM_KEY;
6443 /* For root being dropped, we don't need to check first inode */
6444 if (btrfs_root_refs(&root->root_item) == 0 &&
6445 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6446 BTRFS_FIRST_FREE_OBJECTID)
6449 btrfs_init_path(&path);
6450 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6455 err |= INODE_ITEM_MISSING;
6457 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6458 struct btrfs_inode_item);
6459 mode = btrfs_inode_mode(path.nodes[0], ii);
6460 if (imode_to_type(mode) != BTRFS_FT_DIR)
6461 err |= INODE_ITEM_MISMATCH;
6464 /* lookup first inode ref */
6465 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6466 key.type = BTRFS_INODE_REF_KEY;
6467 /* special index value */
6470 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6476 btrfs_release_path(&path);
6479 err = repair_fs_first_inode(root, err);
6481 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6482 error("root dir INODE_ITEM is %s",
6483 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6484 if (err & INODE_REF_MISSING)
6485 error("root dir INODE_REF is missing");
6487 return ret < 0 ? ret : err;
6490 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6491 u64 parent, u64 root)
6493 struct rb_node *node;
6494 struct tree_backref *back = NULL;
6495 struct tree_backref match = {
6502 match.parent = parent;
6503 match.node.full_backref = 1;
6508 node = rb_search(&rec->backref_tree, &match.node.node,
6509 (rb_compare_keys)compare_extent_backref, NULL);
6511 back = to_tree_backref(rb_node_to_extent_backref(node));
6516 static struct data_backref *find_data_backref(struct extent_record *rec,
6517 u64 parent, u64 root,
6518 u64 owner, u64 offset,
6520 u64 disk_bytenr, u64 bytes)
6522 struct rb_node *node;
6523 struct data_backref *back = NULL;
6524 struct data_backref match = {
6531 .found_ref = found_ref,
6532 .disk_bytenr = disk_bytenr,
6536 match.parent = parent;
6537 match.node.full_backref = 1;
6542 node = rb_search(&rec->backref_tree, &match.node.node,
6543 (rb_compare_keys)compare_extent_backref, NULL);
6545 back = to_data_backref(rb_node_to_extent_backref(node));
6550 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6551 * blocks and integrity of fs tree items.
6553 * @root: the root of the tree to be checked.
6554 * @ext_ref feature EXTENDED_IREF is enable or not.
6555 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6556 * otherwise means check fs tree(s) items relationship and
6557 * @root MUST be a fs tree root.
6558 * Returns 0 represents OK.
6559 * Returns not 0 represents error.
6561 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6562 struct btrfs_root *root, unsigned int ext_ref,
6566 struct btrfs_path path;
6567 struct node_refs nrefs;
6568 struct btrfs_root_item *root_item = &root->root_item;
6573 memset(&nrefs, 0, sizeof(nrefs));
6576 * We need to manually check the first inode item (256)
6577 * As the following traversal function will only start from
6578 * the first inode item in the leaf, if inode item (256) is
6579 * missing we will skip it forever.
6581 ret = check_fs_first_inode(root, ext_ref);
6587 level = btrfs_header_level(root->node);
6588 btrfs_init_path(&path);
6590 if (btrfs_root_refs(root_item) > 0 ||
6591 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6592 path.nodes[level] = root->node;
6593 path.slots[level] = 0;
6594 extent_buffer_get(root->node);
6596 struct btrfs_key key;
6598 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6599 level = root_item->drop_level;
6600 path.lowest_level = level;
6601 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6608 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6609 ext_ref, check_all);
6613 /* if ret is negative, walk shall stop */
6619 ret = walk_up_tree_v2(root, &path, &level);
6621 /* Normal exit, reset ret to err */
6628 btrfs_release_path(&path);
6633 * Iterate all items in the tree and call check_inode_item() to check.
6635 * @root: the root of the tree to be checked.
6636 * @ext_ref: the EXTENDED_IREF feature
6638 * Return 0 if no error found.
6639 * Return <0 for error.
6641 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6643 reset_cached_block_groups(root->fs_info);
6644 return check_btrfs_root(NULL, root, ext_ref, 0);
6648 * Find the relative ref for root_ref and root_backref.
6650 * @root: the root of the root tree.
6651 * @ref_key: the key of the root ref.
6653 * Return 0 if no error occurred.
6655 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6656 struct extent_buffer *node, int slot)
6658 struct btrfs_path path;
6659 struct btrfs_key key;
6660 struct btrfs_root_ref *ref;
6661 struct btrfs_root_ref *backref;
6662 char ref_name[BTRFS_NAME_LEN] = {0};
6663 char backref_name[BTRFS_NAME_LEN] = {0};
6669 u32 backref_namelen;
6674 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6675 ref_dirid = btrfs_root_ref_dirid(node, ref);
6676 ref_seq = btrfs_root_ref_sequence(node, ref);
6677 ref_namelen = btrfs_root_ref_name_len(node, ref);
6679 if (ref_namelen <= BTRFS_NAME_LEN) {
6682 len = BTRFS_NAME_LEN;
6683 warning("%s[%llu %llu] ref_name too long",
6684 ref_key->type == BTRFS_ROOT_REF_KEY ?
6685 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6688 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6690 /* Find relative root_ref */
6691 key.objectid = ref_key->offset;
6692 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6693 key.offset = ref_key->objectid;
6695 btrfs_init_path(&path);
6696 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6698 err |= ROOT_REF_MISSING;
6699 error("%s[%llu %llu] couldn't find relative ref",
6700 ref_key->type == BTRFS_ROOT_REF_KEY ?
6701 "ROOT_REF" : "ROOT_BACKREF",
6702 ref_key->objectid, ref_key->offset);
6706 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6707 struct btrfs_root_ref);
6708 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6709 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6710 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6712 if (backref_namelen <= BTRFS_NAME_LEN) {
6713 len = backref_namelen;
6715 len = BTRFS_NAME_LEN;
6716 warning("%s[%llu %llu] ref_name too long",
6717 key.type == BTRFS_ROOT_REF_KEY ?
6718 "ROOT_REF" : "ROOT_BACKREF",
6719 key.objectid, key.offset);
6721 read_extent_buffer(path.nodes[0], backref_name,
6722 (unsigned long)(backref + 1), len);
6724 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6725 ref_namelen != backref_namelen ||
6726 strncmp(ref_name, backref_name, len)) {
6727 err |= ROOT_REF_MISMATCH;
6728 error("%s[%llu %llu] mismatch relative ref",
6729 ref_key->type == BTRFS_ROOT_REF_KEY ?
6730 "ROOT_REF" : "ROOT_BACKREF",
6731 ref_key->objectid, ref_key->offset);
6734 btrfs_release_path(&path);
6739 * Check all fs/file tree in low_memory mode.
6741 * 1. for fs tree root item, call check_fs_root_v2()
6742 * 2. for fs tree root ref/backref, call check_root_ref()
6744 * Return 0 if no error occurred.
6746 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6748 struct btrfs_root *tree_root = fs_info->tree_root;
6749 struct btrfs_root *cur_root = NULL;
6750 struct btrfs_path path;
6751 struct btrfs_key key;
6752 struct extent_buffer *node;
6753 unsigned int ext_ref;
6758 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6760 btrfs_init_path(&path);
6761 key.objectid = BTRFS_FS_TREE_OBJECTID;
6763 key.type = BTRFS_ROOT_ITEM_KEY;
6765 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6769 } else if (ret > 0) {
6775 node = path.nodes[0];
6776 slot = path.slots[0];
6777 btrfs_item_key_to_cpu(node, &key, slot);
6778 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6780 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6781 fs_root_objectid(key.objectid)) {
6782 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6783 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6786 key.offset = (u64)-1;
6787 cur_root = btrfs_read_fs_root(fs_info, &key);
6790 if (IS_ERR(cur_root)) {
6791 error("Fail to read fs/subvol tree: %lld",
6797 ret = check_fs_root_v2(cur_root, ext_ref);
6800 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6801 btrfs_free_fs_root(cur_root);
6802 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6803 key.type == BTRFS_ROOT_BACKREF_KEY) {
6804 ret = check_root_ref(tree_root, &key, node, slot);
6808 ret = btrfs_next_item(tree_root, &path);
6818 btrfs_release_path(&path);
6822 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6823 struct cache_tree *root_cache)
6827 if (!ctx.progress_enabled)
6828 fprintf(stderr, "checking fs roots\n");
6829 if (check_mode == CHECK_MODE_LOWMEM)
6830 ret = check_fs_roots_v2(fs_info);
6832 ret = check_fs_roots(fs_info, root_cache);
6837 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6839 struct extent_backref *back, *tmp;
6840 struct tree_backref *tback;
6841 struct data_backref *dback;
6845 rbtree_postorder_for_each_entry_safe(back, tmp,
6846 &rec->backref_tree, node) {
6847 if (!back->found_extent_tree) {
6851 if (back->is_data) {
6852 dback = to_data_backref(back);
6853 fprintf(stderr, "Data backref %llu %s %llu"
6854 " owner %llu offset %llu num_refs %lu"
6855 " not found in extent tree\n",
6856 (unsigned long long)rec->start,
6857 back->full_backref ?
6859 back->full_backref ?
6860 (unsigned long long)dback->parent:
6861 (unsigned long long)dback->root,
6862 (unsigned long long)dback->owner,
6863 (unsigned long long)dback->offset,
6864 (unsigned long)dback->num_refs);
6866 tback = to_tree_backref(back);
6867 fprintf(stderr, "Tree backref %llu parent %llu"
6868 " root %llu not found in extent tree\n",
6869 (unsigned long long)rec->start,
6870 (unsigned long long)tback->parent,
6871 (unsigned long long)tback->root);
6874 if (!back->is_data && !back->found_ref) {
6878 tback = to_tree_backref(back);
6879 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6880 (unsigned long long)rec->start,
6881 back->full_backref ? "parent" : "root",
6882 back->full_backref ?
6883 (unsigned long long)tback->parent :
6884 (unsigned long long)tback->root, back);
6886 if (back->is_data) {
6887 dback = to_data_backref(back);
6888 if (dback->found_ref != dback->num_refs) {
6892 fprintf(stderr, "Incorrect local backref count"
6893 " on %llu %s %llu owner %llu"
6894 " offset %llu found %u wanted %u back %p\n",
6895 (unsigned long long)rec->start,
6896 back->full_backref ?
6898 back->full_backref ?
6899 (unsigned long long)dback->parent:
6900 (unsigned long long)dback->root,
6901 (unsigned long long)dback->owner,
6902 (unsigned long long)dback->offset,
6903 dback->found_ref, dback->num_refs, back);
6905 if (dback->disk_bytenr != rec->start) {
6909 fprintf(stderr, "Backref disk bytenr does not"
6910 " match extent record, bytenr=%llu, "
6911 "ref bytenr=%llu\n",
6912 (unsigned long long)rec->start,
6913 (unsigned long long)dback->disk_bytenr);
6916 if (dback->bytes != rec->nr) {
6920 fprintf(stderr, "Backref bytes do not match "
6921 "extent backref, bytenr=%llu, ref "
6922 "bytes=%llu, backref bytes=%llu\n",
6923 (unsigned long long)rec->start,
6924 (unsigned long long)rec->nr,
6925 (unsigned long long)dback->bytes);
6928 if (!back->is_data) {
6931 dback = to_data_backref(back);
6932 found += dback->found_ref;
6935 if (found != rec->refs) {
6939 fprintf(stderr, "Incorrect global backref count "
6940 "on %llu found %llu wanted %llu\n",
6941 (unsigned long long)rec->start,
6942 (unsigned long long)found,
6943 (unsigned long long)rec->refs);
6949 static void __free_one_backref(struct rb_node *node)
6951 struct extent_backref *back = rb_node_to_extent_backref(node);
6956 static void free_all_extent_backrefs(struct extent_record *rec)
6958 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6961 static void free_extent_record_cache(struct cache_tree *extent_cache)
6963 struct cache_extent *cache;
6964 struct extent_record *rec;
6967 cache = first_cache_extent(extent_cache);
6970 rec = container_of(cache, struct extent_record, cache);
6971 remove_cache_extent(extent_cache, cache);
6972 free_all_extent_backrefs(rec);
6977 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6978 struct extent_record *rec)
6980 if (rec->content_checked && rec->owner_ref_checked &&
6981 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6982 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6983 !rec->bad_full_backref && !rec->crossing_stripes &&
6984 !rec->wrong_chunk_type) {
6985 remove_cache_extent(extent_cache, &rec->cache);
6986 free_all_extent_backrefs(rec);
6987 list_del_init(&rec->list);
6993 static int check_owner_ref(struct btrfs_root *root,
6994 struct extent_record *rec,
6995 struct extent_buffer *buf)
6997 struct extent_backref *node, *tmp;
6998 struct tree_backref *back;
6999 struct btrfs_root *ref_root;
7000 struct btrfs_key key;
7001 struct btrfs_path path;
7002 struct extent_buffer *parent;
7007 rbtree_postorder_for_each_entry_safe(node, tmp,
7008 &rec->backref_tree, node) {
7011 if (!node->found_ref)
7013 if (node->full_backref)
7015 back = to_tree_backref(node);
7016 if (btrfs_header_owner(buf) == back->root)
7019 BUG_ON(rec->is_root);
7021 /* try to find the block by search corresponding fs tree */
7022 key.objectid = btrfs_header_owner(buf);
7023 key.type = BTRFS_ROOT_ITEM_KEY;
7024 key.offset = (u64)-1;
7026 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7027 if (IS_ERR(ref_root))
7030 level = btrfs_header_level(buf);
7032 btrfs_item_key_to_cpu(buf, &key, 0);
7034 btrfs_node_key_to_cpu(buf, &key, 0);
7036 btrfs_init_path(&path);
7037 path.lowest_level = level + 1;
7038 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7042 parent = path.nodes[level + 1];
7043 if (parent && buf->start == btrfs_node_blockptr(parent,
7044 path.slots[level + 1]))
7047 btrfs_release_path(&path);
7048 return found ? 0 : 1;
7051 static int is_extent_tree_record(struct extent_record *rec)
7053 struct extent_backref *node, *tmp;
7054 struct tree_backref *back;
7057 rbtree_postorder_for_each_entry_safe(node, tmp,
7058 &rec->backref_tree, node) {
7061 back = to_tree_backref(node);
7062 if (node->full_backref)
7064 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7071 static int record_bad_block_io(struct btrfs_fs_info *info,
7072 struct cache_tree *extent_cache,
7075 struct extent_record *rec;
7076 struct cache_extent *cache;
7077 struct btrfs_key key;
7079 cache = lookup_cache_extent(extent_cache, start, len);
7083 rec = container_of(cache, struct extent_record, cache);
7084 if (!is_extent_tree_record(rec))
7087 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7088 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7091 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7092 struct extent_buffer *buf, int slot)
7094 if (btrfs_header_level(buf)) {
7095 struct btrfs_key_ptr ptr1, ptr2;
7097 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7098 sizeof(struct btrfs_key_ptr));
7099 read_extent_buffer(buf, &ptr2,
7100 btrfs_node_key_ptr_offset(slot + 1),
7101 sizeof(struct btrfs_key_ptr));
7102 write_extent_buffer(buf, &ptr1,
7103 btrfs_node_key_ptr_offset(slot + 1),
7104 sizeof(struct btrfs_key_ptr));
7105 write_extent_buffer(buf, &ptr2,
7106 btrfs_node_key_ptr_offset(slot),
7107 sizeof(struct btrfs_key_ptr));
7109 struct btrfs_disk_key key;
7110 btrfs_node_key(buf, &key, 0);
7111 btrfs_fixup_low_keys(root, path, &key,
7112 btrfs_header_level(buf) + 1);
7115 struct btrfs_item *item1, *item2;
7116 struct btrfs_key k1, k2;
7117 char *item1_data, *item2_data;
7118 u32 item1_offset, item2_offset, item1_size, item2_size;
7120 item1 = btrfs_item_nr(slot);
7121 item2 = btrfs_item_nr(slot + 1);
7122 btrfs_item_key_to_cpu(buf, &k1, slot);
7123 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7124 item1_offset = btrfs_item_offset(buf, item1);
7125 item2_offset = btrfs_item_offset(buf, item2);
7126 item1_size = btrfs_item_size(buf, item1);
7127 item2_size = btrfs_item_size(buf, item2);
7129 item1_data = malloc(item1_size);
7132 item2_data = malloc(item2_size);
7138 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7139 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7141 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7142 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7146 btrfs_set_item_offset(buf, item1, item2_offset);
7147 btrfs_set_item_offset(buf, item2, item1_offset);
7148 btrfs_set_item_size(buf, item1, item2_size);
7149 btrfs_set_item_size(buf, item2, item1_size);
7151 path->slots[0] = slot;
7152 btrfs_set_item_key_unsafe(root, path, &k2);
7153 path->slots[0] = slot + 1;
7154 btrfs_set_item_key_unsafe(root, path, &k1);
7159 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7161 struct extent_buffer *buf;
7162 struct btrfs_key k1, k2;
7164 int level = path->lowest_level;
7167 buf = path->nodes[level];
7168 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7170 btrfs_node_key_to_cpu(buf, &k1, i);
7171 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7173 btrfs_item_key_to_cpu(buf, &k1, i);
7174 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7176 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7178 ret = swap_values(root, path, buf, i);
7181 btrfs_mark_buffer_dirty(buf);
7187 static int delete_bogus_item(struct btrfs_root *root,
7188 struct btrfs_path *path,
7189 struct extent_buffer *buf, int slot)
7191 struct btrfs_key key;
7192 int nritems = btrfs_header_nritems(buf);
7194 btrfs_item_key_to_cpu(buf, &key, slot);
7196 /* These are all the keys we can deal with missing. */
7197 if (key.type != BTRFS_DIR_INDEX_KEY &&
7198 key.type != BTRFS_EXTENT_ITEM_KEY &&
7199 key.type != BTRFS_METADATA_ITEM_KEY &&
7200 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7201 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7204 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7205 (unsigned long long)key.objectid, key.type,
7206 (unsigned long long)key.offset, slot, buf->start);
7207 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7208 btrfs_item_nr_offset(slot + 1),
7209 sizeof(struct btrfs_item) *
7210 (nritems - slot - 1));
7211 btrfs_set_header_nritems(buf, nritems - 1);
7213 struct btrfs_disk_key disk_key;
7215 btrfs_item_key(buf, &disk_key, 0);
7216 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7218 btrfs_mark_buffer_dirty(buf);
7222 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7224 struct extent_buffer *buf;
7228 /* We should only get this for leaves */
7229 BUG_ON(path->lowest_level);
7230 buf = path->nodes[0];
7232 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7233 unsigned int shift = 0, offset;
7235 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7236 BTRFS_LEAF_DATA_SIZE(root)) {
7237 if (btrfs_item_end_nr(buf, i) >
7238 BTRFS_LEAF_DATA_SIZE(root)) {
7239 ret = delete_bogus_item(root, path, buf, i);
7242 fprintf(stderr, "item is off the end of the "
7243 "leaf, can't fix\n");
7247 shift = BTRFS_LEAF_DATA_SIZE(root) -
7248 btrfs_item_end_nr(buf, i);
7249 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7250 btrfs_item_offset_nr(buf, i - 1)) {
7251 if (btrfs_item_end_nr(buf, i) >
7252 btrfs_item_offset_nr(buf, i - 1)) {
7253 ret = delete_bogus_item(root, path, buf, i);
7256 fprintf(stderr, "items overlap, can't fix\n");
7260 shift = btrfs_item_offset_nr(buf, i - 1) -
7261 btrfs_item_end_nr(buf, i);
7266 printf("Shifting item nr %d by %u bytes in block %llu\n",
7267 i, shift, (unsigned long long)buf->start);
7268 offset = btrfs_item_offset_nr(buf, i);
7269 memmove_extent_buffer(buf,
7270 btrfs_leaf_data(buf) + offset + shift,
7271 btrfs_leaf_data(buf) + offset,
7272 btrfs_item_size_nr(buf, i));
7273 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7275 btrfs_mark_buffer_dirty(buf);
7279 * We may have moved things, in which case we want to exit so we don't
7280 * write those changes out. Once we have proper abort functionality in
7281 * progs this can be changed to something nicer.
7288 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7289 * then just return -EIO.
7291 static int try_to_fix_bad_block(struct btrfs_root *root,
7292 struct extent_buffer *buf,
7293 enum btrfs_tree_block_status status)
7295 struct btrfs_trans_handle *trans;
7296 struct ulist *roots;
7297 struct ulist_node *node;
7298 struct btrfs_root *search_root;
7299 struct btrfs_path path;
7300 struct ulist_iterator iter;
7301 struct btrfs_key root_key, key;
7304 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7305 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7308 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7312 btrfs_init_path(&path);
7313 ULIST_ITER_INIT(&iter);
7314 while ((node = ulist_next(roots, &iter))) {
7315 root_key.objectid = node->val;
7316 root_key.type = BTRFS_ROOT_ITEM_KEY;
7317 root_key.offset = (u64)-1;
7319 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7326 trans = btrfs_start_transaction(search_root, 0);
7327 if (IS_ERR(trans)) {
7328 ret = PTR_ERR(trans);
7332 path.lowest_level = btrfs_header_level(buf);
7333 path.skip_check_block = 1;
7334 if (path.lowest_level)
7335 btrfs_node_key_to_cpu(buf, &key, 0);
7337 btrfs_item_key_to_cpu(buf, &key, 0);
7338 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7341 btrfs_commit_transaction(trans, search_root);
7344 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7345 ret = fix_key_order(search_root, &path);
7346 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7347 ret = fix_item_offset(search_root, &path);
7349 btrfs_commit_transaction(trans, search_root);
7352 btrfs_release_path(&path);
7353 btrfs_commit_transaction(trans, search_root);
7356 btrfs_release_path(&path);
7360 static int check_block(struct btrfs_root *root,
7361 struct cache_tree *extent_cache,
7362 struct extent_buffer *buf, u64 flags)
7364 struct extent_record *rec;
7365 struct cache_extent *cache;
7366 struct btrfs_key key;
7367 enum btrfs_tree_block_status status;
7371 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7374 rec = container_of(cache, struct extent_record, cache);
7375 rec->generation = btrfs_header_generation(buf);
7377 level = btrfs_header_level(buf);
7378 if (btrfs_header_nritems(buf) > 0) {
7381 btrfs_item_key_to_cpu(buf, &key, 0);
7383 btrfs_node_key_to_cpu(buf, &key, 0);
7385 rec->info_objectid = key.objectid;
7387 rec->info_level = level;
7389 if (btrfs_is_leaf(buf))
7390 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7392 status = btrfs_check_node(root, &rec->parent_key, buf);
7394 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7396 status = try_to_fix_bad_block(root, buf, status);
7397 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7399 fprintf(stderr, "bad block %llu\n",
7400 (unsigned long long)buf->start);
7403 * Signal to callers we need to start the scan over
7404 * again since we'll have cowed blocks.
7409 rec->content_checked = 1;
7410 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7411 rec->owner_ref_checked = 1;
7413 ret = check_owner_ref(root, rec, buf);
7415 rec->owner_ref_checked = 1;
7419 maybe_free_extent_rec(extent_cache, rec);
7424 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7425 u64 parent, u64 root)
7427 struct list_head *cur = rec->backrefs.next;
7428 struct extent_backref *node;
7429 struct tree_backref *back;
7431 while(cur != &rec->backrefs) {
7432 node = to_extent_backref(cur);
7436 back = to_tree_backref(node);
7438 if (!node->full_backref)
7440 if (parent == back->parent)
7443 if (node->full_backref)
7445 if (back->root == root)
7453 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7454 u64 parent, u64 root)
7456 struct tree_backref *ref = malloc(sizeof(*ref));
7460 memset(&ref->node, 0, sizeof(ref->node));
7462 ref->parent = parent;
7463 ref->node.full_backref = 1;
7466 ref->node.full_backref = 0;
7473 static struct data_backref *find_data_backref(struct extent_record *rec,
7474 u64 parent, u64 root,
7475 u64 owner, u64 offset,
7477 u64 disk_bytenr, u64 bytes)
7479 struct list_head *cur = rec->backrefs.next;
7480 struct extent_backref *node;
7481 struct data_backref *back;
7483 while(cur != &rec->backrefs) {
7484 node = to_extent_backref(cur);
7488 back = to_data_backref(node);
7490 if (!node->full_backref)
7492 if (parent == back->parent)
7495 if (node->full_backref)
7497 if (back->root == root && back->owner == owner &&
7498 back->offset == offset) {
7499 if (found_ref && node->found_ref &&
7500 (back->bytes != bytes ||
7501 back->disk_bytenr != disk_bytenr))
7511 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7512 u64 parent, u64 root,
7513 u64 owner, u64 offset,
7516 struct data_backref *ref = malloc(sizeof(*ref));
7520 memset(&ref->node, 0, sizeof(ref->node));
7521 ref->node.is_data = 1;
7524 ref->parent = parent;
7527 ref->node.full_backref = 1;
7531 ref->offset = offset;
7532 ref->node.full_backref = 0;
7534 ref->bytes = max_size;
7537 if (max_size > rec->max_size)
7538 rec->max_size = max_size;
7542 /* Check if the type of extent matches with its chunk */
7543 static void check_extent_type(struct extent_record *rec)
7545 struct btrfs_block_group_cache *bg_cache;
7547 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7551 /* data extent, check chunk directly*/
7552 if (!rec->metadata) {
7553 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7554 rec->wrong_chunk_type = 1;
7558 /* metadata extent, check the obvious case first */
7559 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7560 BTRFS_BLOCK_GROUP_METADATA))) {
7561 rec->wrong_chunk_type = 1;
7566 * Check SYSTEM extent, as it's also marked as metadata, we can only
7567 * make sure it's a SYSTEM extent by its backref
7569 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7570 struct extent_backref *node;
7571 struct tree_backref *tback;
7574 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7575 if (node->is_data) {
7576 /* tree block shouldn't have data backref */
7577 rec->wrong_chunk_type = 1;
7580 tback = container_of(node, struct tree_backref, node);
7582 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7583 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7585 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7586 if (!(bg_cache->flags & bg_type))
7587 rec->wrong_chunk_type = 1;
7592 * Allocate a new extent record, fill default values from @tmpl and insert int
7593 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7594 * the cache, otherwise it fails.
7596 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7597 struct extent_record *tmpl)
7599 struct extent_record *rec;
7602 BUG_ON(tmpl->max_size == 0);
7603 rec = malloc(sizeof(*rec));
7606 rec->start = tmpl->start;
7607 rec->max_size = tmpl->max_size;
7608 rec->nr = max(tmpl->nr, tmpl->max_size);
7609 rec->found_rec = tmpl->found_rec;
7610 rec->content_checked = tmpl->content_checked;
7611 rec->owner_ref_checked = tmpl->owner_ref_checked;
7612 rec->num_duplicates = 0;
7613 rec->metadata = tmpl->metadata;
7614 rec->flag_block_full_backref = FLAG_UNSET;
7615 rec->bad_full_backref = 0;
7616 rec->crossing_stripes = 0;
7617 rec->wrong_chunk_type = 0;
7618 rec->is_root = tmpl->is_root;
7619 rec->refs = tmpl->refs;
7620 rec->extent_item_refs = tmpl->extent_item_refs;
7621 rec->parent_generation = tmpl->parent_generation;
7622 INIT_LIST_HEAD(&rec->backrefs);
7623 INIT_LIST_HEAD(&rec->dups);
7624 INIT_LIST_HEAD(&rec->list);
7625 rec->backref_tree = RB_ROOT;
7626 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7627 rec->cache.start = tmpl->start;
7628 rec->cache.size = tmpl->nr;
7629 ret = insert_cache_extent(extent_cache, &rec->cache);
7634 bytes_used += rec->nr;
7637 rec->crossing_stripes = check_crossing_stripes(global_info,
7638 rec->start, global_info->nodesize);
7639 check_extent_type(rec);
7644 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7646 * - refs - if found, increase refs
7647 * - is_root - if found, set
7648 * - content_checked - if found, set
7649 * - owner_ref_checked - if found, set
7651 * If not found, create a new one, initialize and insert.
7653 static int add_extent_rec(struct cache_tree *extent_cache,
7654 struct extent_record *tmpl)
7656 struct extent_record *rec;
7657 struct cache_extent *cache;
7661 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7663 rec = container_of(cache, struct extent_record, cache);
7667 rec->nr = max(tmpl->nr, tmpl->max_size);
7670 * We need to make sure to reset nr to whatever the extent
7671 * record says was the real size, this way we can compare it to
7674 if (tmpl->found_rec) {
7675 if (tmpl->start != rec->start || rec->found_rec) {
7676 struct extent_record *tmp;
7679 if (list_empty(&rec->list))
7680 list_add_tail(&rec->list,
7681 &duplicate_extents);
7684 * We have to do this song and dance in case we
7685 * find an extent record that falls inside of
7686 * our current extent record but does not have
7687 * the same objectid.
7689 tmp = malloc(sizeof(*tmp));
7692 tmp->start = tmpl->start;
7693 tmp->max_size = tmpl->max_size;
7696 tmp->metadata = tmpl->metadata;
7697 tmp->extent_item_refs = tmpl->extent_item_refs;
7698 INIT_LIST_HEAD(&tmp->list);
7699 list_add_tail(&tmp->list, &rec->dups);
7700 rec->num_duplicates++;
7707 if (tmpl->extent_item_refs && !dup) {
7708 if (rec->extent_item_refs) {
7709 fprintf(stderr, "block %llu rec "
7710 "extent_item_refs %llu, passed %llu\n",
7711 (unsigned long long)tmpl->start,
7712 (unsigned long long)
7713 rec->extent_item_refs,
7714 (unsigned long long)tmpl->extent_item_refs);
7716 rec->extent_item_refs = tmpl->extent_item_refs;
7720 if (tmpl->content_checked)
7721 rec->content_checked = 1;
7722 if (tmpl->owner_ref_checked)
7723 rec->owner_ref_checked = 1;
7724 memcpy(&rec->parent_key, &tmpl->parent_key,
7725 sizeof(tmpl->parent_key));
7726 if (tmpl->parent_generation)
7727 rec->parent_generation = tmpl->parent_generation;
7728 if (rec->max_size < tmpl->max_size)
7729 rec->max_size = tmpl->max_size;
7732 * A metadata extent can't cross stripe_len boundary, otherwise
7733 * kernel scrub won't be able to handle it.
7734 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7738 rec->crossing_stripes = check_crossing_stripes(
7739 global_info, rec->start,
7740 global_info->nodesize);
7741 check_extent_type(rec);
7742 maybe_free_extent_rec(extent_cache, rec);
7746 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7751 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7752 u64 parent, u64 root, int found_ref)
7754 struct extent_record *rec;
7755 struct tree_backref *back;
7756 struct cache_extent *cache;
7758 bool insert = false;
7760 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7762 struct extent_record tmpl;
7764 memset(&tmpl, 0, sizeof(tmpl));
7765 tmpl.start = bytenr;
7770 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7774 /* really a bug in cache_extent implement now */
7775 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7780 rec = container_of(cache, struct extent_record, cache);
7781 if (rec->start != bytenr) {
7783 * Several cause, from unaligned bytenr to over lapping extents
7788 back = find_tree_backref(rec, parent, root);
7790 back = alloc_tree_backref(rec, parent, root);
7797 if (back->node.found_ref) {
7798 fprintf(stderr, "Extent back ref already exists "
7799 "for %llu parent %llu root %llu \n",
7800 (unsigned long long)bytenr,
7801 (unsigned long long)parent,
7802 (unsigned long long)root);
7804 back->node.found_ref = 1;
7806 if (back->node.found_extent_tree) {
7807 fprintf(stderr, "Extent back ref already exists "
7808 "for %llu parent %llu root %llu \n",
7809 (unsigned long long)bytenr,
7810 (unsigned long long)parent,
7811 (unsigned long long)root);
7813 back->node.found_extent_tree = 1;
7816 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7817 compare_extent_backref));
7818 check_extent_type(rec);
7819 maybe_free_extent_rec(extent_cache, rec);
7823 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7824 u64 parent, u64 root, u64 owner, u64 offset,
7825 u32 num_refs, int found_ref, u64 max_size)
7827 struct extent_record *rec;
7828 struct data_backref *back;
7829 struct cache_extent *cache;
7831 bool insert = false;
7833 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7835 struct extent_record tmpl;
7837 memset(&tmpl, 0, sizeof(tmpl));
7838 tmpl.start = bytenr;
7840 tmpl.max_size = max_size;
7842 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7846 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7851 rec = container_of(cache, struct extent_record, cache);
7852 if (rec->max_size < max_size)
7853 rec->max_size = max_size;
7856 * If found_ref is set then max_size is the real size and must match the
7857 * existing refs. So if we have already found a ref then we need to
7858 * make sure that this ref matches the existing one, otherwise we need
7859 * to add a new backref so we can notice that the backrefs don't match
7860 * and we need to figure out who is telling the truth. This is to
7861 * account for that awful fsync bug I introduced where we'd end up with
7862 * a btrfs_file_extent_item that would have its length include multiple
7863 * prealloc extents or point inside of a prealloc extent.
7865 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7868 back = alloc_data_backref(rec, parent, root, owner, offset,
7875 BUG_ON(num_refs != 1);
7876 if (back->node.found_ref)
7877 BUG_ON(back->bytes != max_size);
7878 back->node.found_ref = 1;
7879 back->found_ref += 1;
7880 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7881 back->bytes = max_size;
7882 back->disk_bytenr = bytenr;
7884 /* Need to reinsert if not already in the tree */
7886 rb_erase(&back->node.node, &rec->backref_tree);
7891 rec->content_checked = 1;
7892 rec->owner_ref_checked = 1;
7894 if (back->node.found_extent_tree) {
7895 fprintf(stderr, "Extent back ref already exists "
7896 "for %llu parent %llu root %llu "
7897 "owner %llu offset %llu num_refs %lu\n",
7898 (unsigned long long)bytenr,
7899 (unsigned long long)parent,
7900 (unsigned long long)root,
7901 (unsigned long long)owner,
7902 (unsigned long long)offset,
7903 (unsigned long)num_refs);
7905 back->num_refs = num_refs;
7906 back->node.found_extent_tree = 1;
7909 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7910 compare_extent_backref));
7912 maybe_free_extent_rec(extent_cache, rec);
7916 static int add_pending(struct cache_tree *pending,
7917 struct cache_tree *seen, u64 bytenr, u32 size)
7920 ret = add_cache_extent(seen, bytenr, size);
7923 add_cache_extent(pending, bytenr, size);
7927 static int pick_next_pending(struct cache_tree *pending,
7928 struct cache_tree *reada,
7929 struct cache_tree *nodes,
7930 u64 last, struct block_info *bits, int bits_nr,
7933 unsigned long node_start = last;
7934 struct cache_extent *cache;
7937 cache = search_cache_extent(reada, 0);
7939 bits[0].start = cache->start;
7940 bits[0].size = cache->size;
7945 if (node_start > 32768)
7946 node_start -= 32768;
7948 cache = search_cache_extent(nodes, node_start);
7950 cache = search_cache_extent(nodes, 0);
7953 cache = search_cache_extent(pending, 0);
7958 bits[ret].start = cache->start;
7959 bits[ret].size = cache->size;
7960 cache = next_cache_extent(cache);
7962 } while (cache && ret < bits_nr);
7968 bits[ret].start = cache->start;
7969 bits[ret].size = cache->size;
7970 cache = next_cache_extent(cache);
7972 } while (cache && ret < bits_nr);
7974 if (bits_nr - ret > 8) {
7975 u64 lookup = bits[0].start + bits[0].size;
7976 struct cache_extent *next;
7977 next = search_cache_extent(pending, lookup);
7979 if (next->start - lookup > 32768)
7981 bits[ret].start = next->start;
7982 bits[ret].size = next->size;
7983 lookup = next->start + next->size;
7987 next = next_cache_extent(next);
7995 static void free_chunk_record(struct cache_extent *cache)
7997 struct chunk_record *rec;
7999 rec = container_of(cache, struct chunk_record, cache);
8000 list_del_init(&rec->list);
8001 list_del_init(&rec->dextents);
8005 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8007 cache_tree_free_extents(chunk_cache, free_chunk_record);
8010 static void free_device_record(struct rb_node *node)
8012 struct device_record *rec;
8014 rec = container_of(node, struct device_record, node);
8018 FREE_RB_BASED_TREE(device_cache, free_device_record);
8020 int insert_block_group_record(struct block_group_tree *tree,
8021 struct block_group_record *bg_rec)
8025 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8029 list_add_tail(&bg_rec->list, &tree->block_groups);
8033 static void free_block_group_record(struct cache_extent *cache)
8035 struct block_group_record *rec;
8037 rec = container_of(cache, struct block_group_record, cache);
8038 list_del_init(&rec->list);
8042 void free_block_group_tree(struct block_group_tree *tree)
8044 cache_tree_free_extents(&tree->tree, free_block_group_record);
8047 int insert_device_extent_record(struct device_extent_tree *tree,
8048 struct device_extent_record *de_rec)
8053 * Device extent is a bit different from the other extents, because
8054 * the extents which belong to the different devices may have the
8055 * same start and size, so we need use the special extent cache
8056 * search/insert functions.
8058 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8062 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8063 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8067 static void free_device_extent_record(struct cache_extent *cache)
8069 struct device_extent_record *rec;
8071 rec = container_of(cache, struct device_extent_record, cache);
8072 if (!list_empty(&rec->chunk_list))
8073 list_del_init(&rec->chunk_list);
8074 if (!list_empty(&rec->device_list))
8075 list_del_init(&rec->device_list);
8079 void free_device_extent_tree(struct device_extent_tree *tree)
8081 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8084 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8085 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8086 struct extent_buffer *leaf, int slot)
8088 struct btrfs_extent_ref_v0 *ref0;
8089 struct btrfs_key key;
8092 btrfs_item_key_to_cpu(leaf, &key, slot);
8093 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8094 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8095 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8098 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8099 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8105 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8106 struct btrfs_key *key,
8109 struct btrfs_chunk *ptr;
8110 struct chunk_record *rec;
8113 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8114 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8116 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8118 fprintf(stderr, "memory allocation failed\n");
8122 INIT_LIST_HEAD(&rec->list);
8123 INIT_LIST_HEAD(&rec->dextents);
8126 rec->cache.start = key->offset;
8127 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8129 rec->generation = btrfs_header_generation(leaf);
8131 rec->objectid = key->objectid;
8132 rec->type = key->type;
8133 rec->offset = key->offset;
8135 rec->length = rec->cache.size;
8136 rec->owner = btrfs_chunk_owner(leaf, ptr);
8137 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8138 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8139 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8140 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8141 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8142 rec->num_stripes = num_stripes;
8143 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8145 for (i = 0; i < rec->num_stripes; ++i) {
8146 rec->stripes[i].devid =
8147 btrfs_stripe_devid_nr(leaf, ptr, i);
8148 rec->stripes[i].offset =
8149 btrfs_stripe_offset_nr(leaf, ptr, i);
8150 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8151 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8158 static int process_chunk_item(struct cache_tree *chunk_cache,
8159 struct btrfs_key *key, struct extent_buffer *eb,
8162 struct chunk_record *rec;
8163 struct btrfs_chunk *chunk;
8166 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8168 * Do extra check for this chunk item,
8170 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8171 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8172 * and owner<->key_type check.
8174 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8177 error("chunk(%llu, %llu) is not valid, ignore it",
8178 key->offset, btrfs_chunk_length(eb, chunk));
8181 rec = btrfs_new_chunk_record(eb, key, slot);
8182 ret = insert_cache_extent(chunk_cache, &rec->cache);
8184 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8185 rec->offset, rec->length);
8192 static int process_device_item(struct rb_root *dev_cache,
8193 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8195 struct btrfs_dev_item *ptr;
8196 struct device_record *rec;
8199 ptr = btrfs_item_ptr(eb,
8200 slot, struct btrfs_dev_item);
8202 rec = malloc(sizeof(*rec));
8204 fprintf(stderr, "memory allocation failed\n");
8208 rec->devid = key->offset;
8209 rec->generation = btrfs_header_generation(eb);
8211 rec->objectid = key->objectid;
8212 rec->type = key->type;
8213 rec->offset = key->offset;
8215 rec->devid = btrfs_device_id(eb, ptr);
8216 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8217 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8219 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8221 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8228 struct block_group_record *
8229 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8232 struct btrfs_block_group_item *ptr;
8233 struct block_group_record *rec;
8235 rec = calloc(1, sizeof(*rec));
8237 fprintf(stderr, "memory allocation failed\n");
8241 rec->cache.start = key->objectid;
8242 rec->cache.size = key->offset;
8244 rec->generation = btrfs_header_generation(leaf);
8246 rec->objectid = key->objectid;
8247 rec->type = key->type;
8248 rec->offset = key->offset;
8250 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8251 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8253 INIT_LIST_HEAD(&rec->list);
8258 static int process_block_group_item(struct block_group_tree *block_group_cache,
8259 struct btrfs_key *key,
8260 struct extent_buffer *eb, int slot)
8262 struct block_group_record *rec;
8265 rec = btrfs_new_block_group_record(eb, key, slot);
8266 ret = insert_block_group_record(block_group_cache, rec);
8268 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8269 rec->objectid, rec->offset);
8276 struct device_extent_record *
8277 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8278 struct btrfs_key *key, int slot)
8280 struct device_extent_record *rec;
8281 struct btrfs_dev_extent *ptr;
8283 rec = calloc(1, sizeof(*rec));
8285 fprintf(stderr, "memory allocation failed\n");
8289 rec->cache.objectid = key->objectid;
8290 rec->cache.start = key->offset;
8292 rec->generation = btrfs_header_generation(leaf);
8294 rec->objectid = key->objectid;
8295 rec->type = key->type;
8296 rec->offset = key->offset;
8298 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8299 rec->chunk_objecteid =
8300 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8302 btrfs_dev_extent_chunk_offset(leaf, ptr);
8303 rec->length = btrfs_dev_extent_length(leaf, ptr);
8304 rec->cache.size = rec->length;
8306 INIT_LIST_HEAD(&rec->chunk_list);
8307 INIT_LIST_HEAD(&rec->device_list);
8313 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8314 struct btrfs_key *key, struct extent_buffer *eb,
8317 struct device_extent_record *rec;
8320 rec = btrfs_new_device_extent_record(eb, key, slot);
8321 ret = insert_device_extent_record(dev_extent_cache, rec);
8324 "Device extent[%llu, %llu, %llu] existed.\n",
8325 rec->objectid, rec->offset, rec->length);
8332 static int process_extent_item(struct btrfs_root *root,
8333 struct cache_tree *extent_cache,
8334 struct extent_buffer *eb, int slot)
8336 struct btrfs_extent_item *ei;
8337 struct btrfs_extent_inline_ref *iref;
8338 struct btrfs_extent_data_ref *dref;
8339 struct btrfs_shared_data_ref *sref;
8340 struct btrfs_key key;
8341 struct extent_record tmpl;
8346 u32 item_size = btrfs_item_size_nr(eb, slot);
8352 btrfs_item_key_to_cpu(eb, &key, slot);
8354 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8356 num_bytes = root->fs_info->nodesize;
8358 num_bytes = key.offset;
8361 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8362 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8363 key.objectid, root->fs_info->sectorsize);
8366 if (item_size < sizeof(*ei)) {
8367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8368 struct btrfs_extent_item_v0 *ei0;
8369 BUG_ON(item_size != sizeof(*ei0));
8370 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8371 refs = btrfs_extent_refs_v0(eb, ei0);
8375 memset(&tmpl, 0, sizeof(tmpl));
8376 tmpl.start = key.objectid;
8377 tmpl.nr = num_bytes;
8378 tmpl.extent_item_refs = refs;
8379 tmpl.metadata = metadata;
8381 tmpl.max_size = num_bytes;
8383 return add_extent_rec(extent_cache, &tmpl);
8386 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8387 refs = btrfs_extent_refs(eb, ei);
8388 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8392 if (metadata && num_bytes != root->fs_info->nodesize) {
8393 error("ignore invalid metadata extent, length %llu does not equal to %u",
8394 num_bytes, root->fs_info->nodesize);
8397 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8398 error("ignore invalid data extent, length %llu is not aligned to %u",
8399 num_bytes, root->fs_info->sectorsize);
8403 memset(&tmpl, 0, sizeof(tmpl));
8404 tmpl.start = key.objectid;
8405 tmpl.nr = num_bytes;
8406 tmpl.extent_item_refs = refs;
8407 tmpl.metadata = metadata;
8409 tmpl.max_size = num_bytes;
8410 add_extent_rec(extent_cache, &tmpl);
8412 ptr = (unsigned long)(ei + 1);
8413 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8414 key.type == BTRFS_EXTENT_ITEM_KEY)
8415 ptr += sizeof(struct btrfs_tree_block_info);
8417 end = (unsigned long)ei + item_size;
8419 iref = (struct btrfs_extent_inline_ref *)ptr;
8420 type = btrfs_extent_inline_ref_type(eb, iref);
8421 offset = btrfs_extent_inline_ref_offset(eb, iref);
8423 case BTRFS_TREE_BLOCK_REF_KEY:
8424 ret = add_tree_backref(extent_cache, key.objectid,
8428 "add_tree_backref failed (extent items tree block): %s",
8431 case BTRFS_SHARED_BLOCK_REF_KEY:
8432 ret = add_tree_backref(extent_cache, key.objectid,
8436 "add_tree_backref failed (extent items shared block): %s",
8439 case BTRFS_EXTENT_DATA_REF_KEY:
8440 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8441 add_data_backref(extent_cache, key.objectid, 0,
8442 btrfs_extent_data_ref_root(eb, dref),
8443 btrfs_extent_data_ref_objectid(eb,
8445 btrfs_extent_data_ref_offset(eb, dref),
8446 btrfs_extent_data_ref_count(eb, dref),
8449 case BTRFS_SHARED_DATA_REF_KEY:
8450 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8451 add_data_backref(extent_cache, key.objectid, offset,
8453 btrfs_shared_data_ref_count(eb, sref),
8457 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8458 key.objectid, key.type, num_bytes);
8461 ptr += btrfs_extent_inline_ref_size(type);
8468 static int check_cache_range(struct btrfs_root *root,
8469 struct btrfs_block_group_cache *cache,
8470 u64 offset, u64 bytes)
8472 struct btrfs_free_space *entry;
8478 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8479 bytenr = btrfs_sb_offset(i);
8480 ret = btrfs_rmap_block(root->fs_info,
8481 cache->key.objectid, bytenr, 0,
8482 &logical, &nr, &stripe_len);
8487 if (logical[nr] + stripe_len <= offset)
8489 if (offset + bytes <= logical[nr])
8491 if (logical[nr] == offset) {
8492 if (stripe_len >= bytes) {
8496 bytes -= stripe_len;
8497 offset += stripe_len;
8498 } else if (logical[nr] < offset) {
8499 if (logical[nr] + stripe_len >=
8504 bytes = (offset + bytes) -
8505 (logical[nr] + stripe_len);
8506 offset = logical[nr] + stripe_len;
8509 * Could be tricky, the super may land in the
8510 * middle of the area we're checking. First
8511 * check the easiest case, it's at the end.
8513 if (logical[nr] + stripe_len >=
8515 bytes = logical[nr] - offset;
8519 /* Check the left side */
8520 ret = check_cache_range(root, cache,
8522 logical[nr] - offset);
8528 /* Now we continue with the right side */
8529 bytes = (offset + bytes) -
8530 (logical[nr] + stripe_len);
8531 offset = logical[nr] + stripe_len;
8538 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8540 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8541 offset, offset+bytes);
8545 if (entry->offset != offset) {
8546 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8551 if (entry->bytes != bytes) {
8552 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8553 bytes, entry->bytes, offset);
8557 unlink_free_space(cache->free_space_ctl, entry);
8562 static int verify_space_cache(struct btrfs_root *root,
8563 struct btrfs_block_group_cache *cache)
8565 struct btrfs_path path;
8566 struct extent_buffer *leaf;
8567 struct btrfs_key key;
8571 root = root->fs_info->extent_root;
8573 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8575 btrfs_init_path(&path);
8576 key.objectid = last;
8578 key.type = BTRFS_EXTENT_ITEM_KEY;
8579 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8584 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8585 ret = btrfs_next_leaf(root, &path);
8593 leaf = path.nodes[0];
8594 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8595 if (key.objectid >= cache->key.offset + cache->key.objectid)
8597 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8598 key.type != BTRFS_METADATA_ITEM_KEY) {
8603 if (last == key.objectid) {
8604 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8605 last = key.objectid + key.offset;
8607 last = key.objectid + root->fs_info->nodesize;
8612 ret = check_cache_range(root, cache, last,
8613 key.objectid - last);
8616 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8617 last = key.objectid + key.offset;
8619 last = key.objectid + root->fs_info->nodesize;
8623 if (last < cache->key.objectid + cache->key.offset)
8624 ret = check_cache_range(root, cache, last,
8625 cache->key.objectid +
8626 cache->key.offset - last);
8629 btrfs_release_path(&path);
8632 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8633 fprintf(stderr, "There are still entries left in the space "
8641 static int check_space_cache(struct btrfs_root *root)
8643 struct btrfs_block_group_cache *cache;
8644 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8648 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8649 btrfs_super_generation(root->fs_info->super_copy) !=
8650 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8651 printf("cache and super generation don't match, space cache "
8652 "will be invalidated\n");
8656 if (ctx.progress_enabled) {
8657 ctx.tp = TASK_FREE_SPACE;
8658 task_start(ctx.info);
8662 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8666 start = cache->key.objectid + cache->key.offset;
8667 if (!cache->free_space_ctl) {
8668 if (btrfs_init_free_space_ctl(cache,
8669 root->fs_info->sectorsize)) {
8674 btrfs_remove_free_space_cache(cache);
8677 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8678 ret = exclude_super_stripes(root, cache);
8680 fprintf(stderr, "could not exclude super stripes: %s\n",
8685 ret = load_free_space_tree(root->fs_info, cache);
8686 free_excluded_extents(root, cache);
8688 fprintf(stderr, "could not load free space tree: %s\n",
8695 ret = load_free_space_cache(root->fs_info, cache);
8700 ret = verify_space_cache(root, cache);
8702 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8703 cache->key.objectid);
8708 task_stop(ctx.info);
8710 return error ? -EINVAL : 0;
8713 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8714 u64 num_bytes, unsigned long leaf_offset,
8715 struct extent_buffer *eb) {
8717 struct btrfs_fs_info *fs_info = root->fs_info;
8719 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8721 unsigned long csum_offset;
8725 u64 data_checked = 0;
8731 if (num_bytes % fs_info->sectorsize)
8734 data = malloc(num_bytes);
8738 while (offset < num_bytes) {
8741 read_len = num_bytes - offset;
8742 /* read as much space once a time */
8743 ret = read_extent_data(fs_info, data + offset,
8744 bytenr + offset, &read_len, mirror);
8748 /* verify every 4k data's checksum */
8749 while (data_checked < read_len) {
8751 tmp = offset + data_checked;
8753 csum = btrfs_csum_data((char *)data + tmp,
8754 csum, fs_info->sectorsize);
8755 btrfs_csum_final(csum, (u8 *)&csum);
8757 csum_offset = leaf_offset +
8758 tmp / fs_info->sectorsize * csum_size;
8759 read_extent_buffer(eb, (char *)&csum_expected,
8760 csum_offset, csum_size);
8761 /* try another mirror */
8762 if (csum != csum_expected) {
8763 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8764 mirror, bytenr + tmp,
8765 csum, csum_expected);
8766 num_copies = btrfs_num_copies(root->fs_info,
8768 if (mirror < num_copies - 1) {
8773 data_checked += fs_info->sectorsize;
8782 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8785 struct btrfs_path path;
8786 struct extent_buffer *leaf;
8787 struct btrfs_key key;
8790 btrfs_init_path(&path);
8791 key.objectid = bytenr;
8792 key.type = BTRFS_EXTENT_ITEM_KEY;
8793 key.offset = (u64)-1;
8796 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8799 fprintf(stderr, "Error looking up extent record %d\n", ret);
8800 btrfs_release_path(&path);
8803 if (path.slots[0] > 0) {
8806 ret = btrfs_prev_leaf(root, &path);
8809 } else if (ret > 0) {
8816 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8819 * Block group items come before extent items if they have the same
8820 * bytenr, so walk back one more just in case. Dear future traveller,
8821 * first congrats on mastering time travel. Now if it's not too much
8822 * trouble could you go back to 2006 and tell Chris to make the
8823 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8824 * EXTENT_ITEM_KEY please?
8826 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8827 if (path.slots[0] > 0) {
8830 ret = btrfs_prev_leaf(root, &path);
8833 } else if (ret > 0) {
8838 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8842 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8843 ret = btrfs_next_leaf(root, &path);
8845 fprintf(stderr, "Error going to next leaf "
8847 btrfs_release_path(&path);
8853 leaf = path.nodes[0];
8854 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8855 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8859 if (key.objectid + key.offset < bytenr) {
8863 if (key.objectid > bytenr + num_bytes)
8866 if (key.objectid == bytenr) {
8867 if (key.offset >= num_bytes) {
8871 num_bytes -= key.offset;
8872 bytenr += key.offset;
8873 } else if (key.objectid < bytenr) {
8874 if (key.objectid + key.offset >= bytenr + num_bytes) {
8878 num_bytes = (bytenr + num_bytes) -
8879 (key.objectid + key.offset);
8880 bytenr = key.objectid + key.offset;
8882 if (key.objectid + key.offset < bytenr + num_bytes) {
8883 u64 new_start = key.objectid + key.offset;
8884 u64 new_bytes = bytenr + num_bytes - new_start;
8887 * Weird case, the extent is in the middle of
8888 * our range, we'll have to search one side
8889 * and then the other. Not sure if this happens
8890 * in real life, but no harm in coding it up
8891 * anyway just in case.
8893 btrfs_release_path(&path);
8894 ret = check_extent_exists(root, new_start,
8897 fprintf(stderr, "Right section didn't "
8901 num_bytes = key.objectid - bytenr;
8904 num_bytes = key.objectid - bytenr;
8911 if (num_bytes && !ret) {
8912 fprintf(stderr, "There are no extents for csum range "
8913 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8917 btrfs_release_path(&path);
8921 static int check_csums(struct btrfs_root *root)
8923 struct btrfs_path path;
8924 struct extent_buffer *leaf;
8925 struct btrfs_key key;
8926 u64 offset = 0, num_bytes = 0;
8927 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8931 unsigned long leaf_offset;
8933 root = root->fs_info->csum_root;
8934 if (!extent_buffer_uptodate(root->node)) {
8935 fprintf(stderr, "No valid csum tree found\n");
8939 btrfs_init_path(&path);
8940 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8941 key.type = BTRFS_EXTENT_CSUM_KEY;
8943 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8945 fprintf(stderr, "Error searching csum tree %d\n", ret);
8946 btrfs_release_path(&path);
8950 if (ret > 0 && path.slots[0])
8955 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8956 ret = btrfs_next_leaf(root, &path);
8958 fprintf(stderr, "Error going to next leaf "
8965 leaf = path.nodes[0];
8967 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8968 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8973 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8974 csum_size) * root->fs_info->sectorsize;
8975 if (!check_data_csum)
8976 goto skip_csum_check;
8977 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8978 ret = check_extent_csums(root, key.offset, data_len,
8984 offset = key.offset;
8985 } else if (key.offset != offset + num_bytes) {
8986 ret = check_extent_exists(root, offset, num_bytes);
8988 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8989 "there is no extent record\n",
8990 offset, offset+num_bytes);
8993 offset = key.offset;
8996 num_bytes += data_len;
9000 btrfs_release_path(&path);
9004 static int is_dropped_key(struct btrfs_key *key,
9005 struct btrfs_key *drop_key) {
9006 if (key->objectid < drop_key->objectid)
9008 else if (key->objectid == drop_key->objectid) {
9009 if (key->type < drop_key->type)
9011 else if (key->type == drop_key->type) {
9012 if (key->offset < drop_key->offset)
9020 * Here are the rules for FULL_BACKREF.
9022 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9023 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9025 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9026 * if it happened after the relocation occurred since we'll have dropped the
9027 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9028 * have no real way to know for sure.
9030 * We process the blocks one root at a time, and we start from the lowest root
9031 * objectid and go to the highest. So we can just lookup the owner backref for
9032 * the record and if we don't find it then we know it doesn't exist and we have
9035 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9036 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9037 * be set or not and then we can check later once we've gathered all the refs.
9039 static int calc_extent_flag(struct cache_tree *extent_cache,
9040 struct extent_buffer *buf,
9041 struct root_item_record *ri,
9044 struct extent_record *rec;
9045 struct cache_extent *cache;
9046 struct tree_backref *tback;
9049 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9050 /* we have added this extent before */
9054 rec = container_of(cache, struct extent_record, cache);
9057 * Except file/reloc tree, we can not have
9060 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9065 if (buf->start == ri->bytenr)
9068 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9071 owner = btrfs_header_owner(buf);
9072 if (owner == ri->objectid)
9075 tback = find_tree_backref(rec, 0, owner);
9080 if (rec->flag_block_full_backref != FLAG_UNSET &&
9081 rec->flag_block_full_backref != 0)
9082 rec->bad_full_backref = 1;
9085 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9086 if (rec->flag_block_full_backref != FLAG_UNSET &&
9087 rec->flag_block_full_backref != 1)
9088 rec->bad_full_backref = 1;
9092 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9094 fprintf(stderr, "Invalid key type(");
9095 print_key_type(stderr, 0, key_type);
9096 fprintf(stderr, ") found in root(");
9097 print_objectid(stderr, rootid, 0);
9098 fprintf(stderr, ")\n");
9102 * Check if the key is valid with its extent buffer.
9104 * This is a early check in case invalid key exists in a extent buffer
9105 * This is not comprehensive yet, but should prevent wrong key/item passed
9108 static int check_type_with_root(u64 rootid, u8 key_type)
9111 /* Only valid in chunk tree */
9112 case BTRFS_DEV_ITEM_KEY:
9113 case BTRFS_CHUNK_ITEM_KEY:
9114 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9117 /* valid in csum and log tree */
9118 case BTRFS_CSUM_TREE_OBJECTID:
9119 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9123 case BTRFS_EXTENT_ITEM_KEY:
9124 case BTRFS_METADATA_ITEM_KEY:
9125 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9126 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9129 case BTRFS_ROOT_ITEM_KEY:
9130 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9133 case BTRFS_DEV_EXTENT_KEY:
9134 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9140 report_mismatch_key_root(key_type, rootid);
9144 static int run_next_block(struct btrfs_root *root,
9145 struct block_info *bits,
9148 struct cache_tree *pending,
9149 struct cache_tree *seen,
9150 struct cache_tree *reada,
9151 struct cache_tree *nodes,
9152 struct cache_tree *extent_cache,
9153 struct cache_tree *chunk_cache,
9154 struct rb_root *dev_cache,
9155 struct block_group_tree *block_group_cache,
9156 struct device_extent_tree *dev_extent_cache,
9157 struct root_item_record *ri)
9159 struct btrfs_fs_info *fs_info = root->fs_info;
9160 struct extent_buffer *buf;
9161 struct extent_record *rec = NULL;
9172 struct btrfs_key key;
9173 struct cache_extent *cache;
9176 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9177 bits_nr, &reada_bits);
9182 for(i = 0; i < nritems; i++) {
9183 ret = add_cache_extent(reada, bits[i].start,
9188 /* fixme, get the parent transid */
9189 readahead_tree_block(fs_info, bits[i].start, 0);
9192 *last = bits[0].start;
9193 bytenr = bits[0].start;
9194 size = bits[0].size;
9196 cache = lookup_cache_extent(pending, bytenr, size);
9198 remove_cache_extent(pending, cache);
9201 cache = lookup_cache_extent(reada, bytenr, size);
9203 remove_cache_extent(reada, cache);
9206 cache = lookup_cache_extent(nodes, bytenr, size);
9208 remove_cache_extent(nodes, cache);
9211 cache = lookup_cache_extent(extent_cache, bytenr, size);
9213 rec = container_of(cache, struct extent_record, cache);
9214 gen = rec->parent_generation;
9217 /* fixme, get the real parent transid */
9218 buf = read_tree_block(root->fs_info, bytenr, gen);
9219 if (!extent_buffer_uptodate(buf)) {
9220 record_bad_block_io(root->fs_info,
9221 extent_cache, bytenr, size);
9225 nritems = btrfs_header_nritems(buf);
9228 if (!init_extent_tree) {
9229 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9230 btrfs_header_level(buf), 1, NULL,
9233 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9235 fprintf(stderr, "Couldn't calc extent flags\n");
9236 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9241 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9243 fprintf(stderr, "Couldn't calc extent flags\n");
9244 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9248 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9250 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9251 ri->objectid == btrfs_header_owner(buf)) {
9253 * Ok we got to this block from it's original owner and
9254 * we have FULL_BACKREF set. Relocation can leave
9255 * converted blocks over so this is altogether possible,
9256 * however it's not possible if the generation > the
9257 * last snapshot, so check for this case.
9259 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9260 btrfs_header_generation(buf) > ri->last_snapshot) {
9261 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9262 rec->bad_full_backref = 1;
9267 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9268 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9269 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9270 rec->bad_full_backref = 1;
9274 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9275 rec->flag_block_full_backref = 1;
9279 rec->flag_block_full_backref = 0;
9281 owner = btrfs_header_owner(buf);
9284 ret = check_block(root, extent_cache, buf, flags);
9288 if (btrfs_is_leaf(buf)) {
9289 btree_space_waste += btrfs_leaf_free_space(root, buf);
9290 for (i = 0; i < nritems; i++) {
9291 struct btrfs_file_extent_item *fi;
9292 btrfs_item_key_to_cpu(buf, &key, i);
9294 * Check key type against the leaf owner.
9295 * Could filter quite a lot of early error if
9298 if (check_type_with_root(btrfs_header_owner(buf),
9300 fprintf(stderr, "ignoring invalid key\n");
9303 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9304 process_extent_item(root, extent_cache, buf,
9308 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9309 process_extent_item(root, extent_cache, buf,
9313 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9315 btrfs_item_size_nr(buf, i);
9318 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9319 process_chunk_item(chunk_cache, &key, buf, i);
9322 if (key.type == BTRFS_DEV_ITEM_KEY) {
9323 process_device_item(dev_cache, &key, buf, i);
9326 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9327 process_block_group_item(block_group_cache,
9331 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9332 process_device_extent_item(dev_extent_cache,
9337 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9338 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9339 process_extent_ref_v0(extent_cache, buf, i);
9346 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9347 ret = add_tree_backref(extent_cache,
9348 key.objectid, 0, key.offset, 0);
9351 "add_tree_backref failed (leaf tree block): %s",
9355 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9356 ret = add_tree_backref(extent_cache,
9357 key.objectid, key.offset, 0, 0);
9360 "add_tree_backref failed (leaf shared block): %s",
9364 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9365 struct btrfs_extent_data_ref *ref;
9366 ref = btrfs_item_ptr(buf, i,
9367 struct btrfs_extent_data_ref);
9368 add_data_backref(extent_cache,
9370 btrfs_extent_data_ref_root(buf, ref),
9371 btrfs_extent_data_ref_objectid(buf,
9373 btrfs_extent_data_ref_offset(buf, ref),
9374 btrfs_extent_data_ref_count(buf, ref),
9375 0, root->fs_info->sectorsize);
9378 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9379 struct btrfs_shared_data_ref *ref;
9380 ref = btrfs_item_ptr(buf, i,
9381 struct btrfs_shared_data_ref);
9382 add_data_backref(extent_cache,
9383 key.objectid, key.offset, 0, 0, 0,
9384 btrfs_shared_data_ref_count(buf, ref),
9385 0, root->fs_info->sectorsize);
9388 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9389 struct bad_item *bad;
9391 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9395 bad = malloc(sizeof(struct bad_item));
9398 INIT_LIST_HEAD(&bad->list);
9399 memcpy(&bad->key, &key,
9400 sizeof(struct btrfs_key));
9401 bad->root_id = owner;
9402 list_add_tail(&bad->list, &delete_items);
9405 if (key.type != BTRFS_EXTENT_DATA_KEY)
9407 fi = btrfs_item_ptr(buf, i,
9408 struct btrfs_file_extent_item);
9409 if (btrfs_file_extent_type(buf, fi) ==
9410 BTRFS_FILE_EXTENT_INLINE)
9412 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9415 data_bytes_allocated +=
9416 btrfs_file_extent_disk_num_bytes(buf, fi);
9417 if (data_bytes_allocated < root->fs_info->sectorsize) {
9420 data_bytes_referenced +=
9421 btrfs_file_extent_num_bytes(buf, fi);
9422 add_data_backref(extent_cache,
9423 btrfs_file_extent_disk_bytenr(buf, fi),
9424 parent, owner, key.objectid, key.offset -
9425 btrfs_file_extent_offset(buf, fi), 1, 1,
9426 btrfs_file_extent_disk_num_bytes(buf, fi));
9430 struct btrfs_key first_key;
9432 first_key.objectid = 0;
9435 btrfs_item_key_to_cpu(buf, &first_key, 0);
9436 level = btrfs_header_level(buf);
9437 for (i = 0; i < nritems; i++) {
9438 struct extent_record tmpl;
9440 ptr = btrfs_node_blockptr(buf, i);
9441 size = root->fs_info->nodesize;
9442 btrfs_node_key_to_cpu(buf, &key, i);
9444 if ((level == ri->drop_level)
9445 && is_dropped_key(&key, &ri->drop_key)) {
9450 memset(&tmpl, 0, sizeof(tmpl));
9451 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9452 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9457 tmpl.max_size = size;
9458 ret = add_extent_rec(extent_cache, &tmpl);
9462 ret = add_tree_backref(extent_cache, ptr, parent,
9466 "add_tree_backref failed (non-leaf block): %s",
9472 add_pending(nodes, seen, ptr, size);
9474 add_pending(pending, seen, ptr, size);
9477 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9478 nritems) * sizeof(struct btrfs_key_ptr);
9480 total_btree_bytes += buf->len;
9481 if (fs_root_objectid(btrfs_header_owner(buf)))
9482 total_fs_tree_bytes += buf->len;
9483 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9484 total_extent_tree_bytes += buf->len;
9486 free_extent_buffer(buf);
9490 static int add_root_to_pending(struct extent_buffer *buf,
9491 struct cache_tree *extent_cache,
9492 struct cache_tree *pending,
9493 struct cache_tree *seen,
9494 struct cache_tree *nodes,
9497 struct extent_record tmpl;
9500 if (btrfs_header_level(buf) > 0)
9501 add_pending(nodes, seen, buf->start, buf->len);
9503 add_pending(pending, seen, buf->start, buf->len);
9505 memset(&tmpl, 0, sizeof(tmpl));
9506 tmpl.start = buf->start;
9511 tmpl.max_size = buf->len;
9512 add_extent_rec(extent_cache, &tmpl);
9514 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9515 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9516 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9519 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9524 /* as we fix the tree, we might be deleting blocks that
9525 * we're tracking for repair. This hook makes sure we
9526 * remove any backrefs for blocks as we are fixing them.
9528 static int free_extent_hook(struct btrfs_trans_handle *trans,
9529 struct btrfs_root *root,
9530 u64 bytenr, u64 num_bytes, u64 parent,
9531 u64 root_objectid, u64 owner, u64 offset,
9534 struct extent_record *rec;
9535 struct cache_extent *cache;
9537 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9539 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9540 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9544 rec = container_of(cache, struct extent_record, cache);
9546 struct data_backref *back;
9547 back = find_data_backref(rec, parent, root_objectid, owner,
9548 offset, 1, bytenr, num_bytes);
9551 if (back->node.found_ref) {
9552 back->found_ref -= refs_to_drop;
9554 rec->refs -= refs_to_drop;
9556 if (back->node.found_extent_tree) {
9557 back->num_refs -= refs_to_drop;
9558 if (rec->extent_item_refs)
9559 rec->extent_item_refs -= refs_to_drop;
9561 if (back->found_ref == 0)
9562 back->node.found_ref = 0;
9563 if (back->num_refs == 0)
9564 back->node.found_extent_tree = 0;
9566 if (!back->node.found_extent_tree && back->node.found_ref) {
9567 rb_erase(&back->node.node, &rec->backref_tree);
9571 struct tree_backref *back;
9572 back = find_tree_backref(rec, parent, root_objectid);
9575 if (back->node.found_ref) {
9578 back->node.found_ref = 0;
9580 if (back->node.found_extent_tree) {
9581 if (rec->extent_item_refs)
9582 rec->extent_item_refs--;
9583 back->node.found_extent_tree = 0;
9585 if (!back->node.found_extent_tree && back->node.found_ref) {
9586 rb_erase(&back->node.node, &rec->backref_tree);
9590 maybe_free_extent_rec(extent_cache, rec);
9595 static int delete_extent_records(struct btrfs_trans_handle *trans,
9596 struct btrfs_root *root,
9597 struct btrfs_path *path,
9600 struct btrfs_key key;
9601 struct btrfs_key found_key;
9602 struct extent_buffer *leaf;
9607 key.objectid = bytenr;
9609 key.offset = (u64)-1;
9612 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9619 if (path->slots[0] == 0)
9625 leaf = path->nodes[0];
9626 slot = path->slots[0];
9628 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9629 if (found_key.objectid != bytenr)
9632 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9633 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9634 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9635 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9636 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9637 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9638 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9639 btrfs_release_path(path);
9640 if (found_key.type == 0) {
9641 if (found_key.offset == 0)
9643 key.offset = found_key.offset - 1;
9644 key.type = found_key.type;
9646 key.type = found_key.type - 1;
9647 key.offset = (u64)-1;
9651 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9652 found_key.objectid, found_key.type, found_key.offset);
9654 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9657 btrfs_release_path(path);
9659 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9660 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9661 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9662 found_key.offset : root->fs_info->nodesize;
9664 ret = btrfs_update_block_group(trans, root, bytenr,
9671 btrfs_release_path(path);
9676 * for a single backref, this will allocate a new extent
9677 * and add the backref to it.
9679 static int record_extent(struct btrfs_trans_handle *trans,
9680 struct btrfs_fs_info *info,
9681 struct btrfs_path *path,
9682 struct extent_record *rec,
9683 struct extent_backref *back,
9684 int allocated, u64 flags)
9687 struct btrfs_root *extent_root = info->extent_root;
9688 struct extent_buffer *leaf;
9689 struct btrfs_key ins_key;
9690 struct btrfs_extent_item *ei;
9691 struct data_backref *dback;
9692 struct btrfs_tree_block_info *bi;
9695 rec->max_size = max_t(u64, rec->max_size,
9699 u32 item_size = sizeof(*ei);
9702 item_size += sizeof(*bi);
9704 ins_key.objectid = rec->start;
9705 ins_key.offset = rec->max_size;
9706 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9708 ret = btrfs_insert_empty_item(trans, extent_root, path,
9709 &ins_key, item_size);
9713 leaf = path->nodes[0];
9714 ei = btrfs_item_ptr(leaf, path->slots[0],
9715 struct btrfs_extent_item);
9717 btrfs_set_extent_refs(leaf, ei, 0);
9718 btrfs_set_extent_generation(leaf, ei, rec->generation);
9720 if (back->is_data) {
9721 btrfs_set_extent_flags(leaf, ei,
9722 BTRFS_EXTENT_FLAG_DATA);
9724 struct btrfs_disk_key copy_key;;
9726 bi = (struct btrfs_tree_block_info *)(ei + 1);
9727 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9730 btrfs_set_disk_key_objectid(©_key,
9731 rec->info_objectid);
9732 btrfs_set_disk_key_type(©_key, 0);
9733 btrfs_set_disk_key_offset(©_key, 0);
9735 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9736 btrfs_set_tree_block_key(leaf, bi, ©_key);
9738 btrfs_set_extent_flags(leaf, ei,
9739 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9742 btrfs_mark_buffer_dirty(leaf);
9743 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9744 rec->max_size, 1, 0);
9747 btrfs_release_path(path);
9750 if (back->is_data) {
9754 dback = to_data_backref(back);
9755 if (back->full_backref)
9756 parent = dback->parent;
9760 for (i = 0; i < dback->found_ref; i++) {
9761 /* if parent != 0, we're doing a full backref
9762 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9763 * just makes the backref allocator create a data
9766 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9767 rec->start, rec->max_size,
9771 BTRFS_FIRST_FREE_OBJECTID :
9777 fprintf(stderr, "adding new data backref"
9778 " on %llu %s %llu owner %llu"
9779 " offset %llu found %d\n",
9780 (unsigned long long)rec->start,
9781 back->full_backref ?
9783 back->full_backref ?
9784 (unsigned long long)parent :
9785 (unsigned long long)dback->root,
9786 (unsigned long long)dback->owner,
9787 (unsigned long long)dback->offset,
9791 struct tree_backref *tback;
9793 tback = to_tree_backref(back);
9794 if (back->full_backref)
9795 parent = tback->parent;
9799 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9800 rec->start, rec->max_size,
9801 parent, tback->root, 0, 0);
9802 fprintf(stderr, "adding new tree backref on "
9803 "start %llu len %llu parent %llu root %llu\n",
9804 rec->start, rec->max_size, parent, tback->root);
9807 btrfs_release_path(path);
9811 static struct extent_entry *find_entry(struct list_head *entries,
9812 u64 bytenr, u64 bytes)
9814 struct extent_entry *entry = NULL;
9816 list_for_each_entry(entry, entries, list) {
9817 if (entry->bytenr == bytenr && entry->bytes == bytes)
9824 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9826 struct extent_entry *entry, *best = NULL, *prev = NULL;
9828 list_for_each_entry(entry, entries, list) {
9830 * If there are as many broken entries as entries then we know
9831 * not to trust this particular entry.
9833 if (entry->broken == entry->count)
9837 * Special case, when there are only two entries and 'best' is
9847 * If our current entry == best then we can't be sure our best
9848 * is really the best, so we need to keep searching.
9850 if (best && best->count == entry->count) {
9856 /* Prev == entry, not good enough, have to keep searching */
9857 if (!prev->broken && prev->count == entry->count)
9861 best = (prev->count > entry->count) ? prev : entry;
9862 else if (best->count < entry->count)
9870 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9871 struct data_backref *dback, struct extent_entry *entry)
9873 struct btrfs_trans_handle *trans;
9874 struct btrfs_root *root;
9875 struct btrfs_file_extent_item *fi;
9876 struct extent_buffer *leaf;
9877 struct btrfs_key key;
9881 key.objectid = dback->root;
9882 key.type = BTRFS_ROOT_ITEM_KEY;
9883 key.offset = (u64)-1;
9884 root = btrfs_read_fs_root(info, &key);
9886 fprintf(stderr, "Couldn't find root for our ref\n");
9891 * The backref points to the original offset of the extent if it was
9892 * split, so we need to search down to the offset we have and then walk
9893 * forward until we find the backref we're looking for.
9895 key.objectid = dback->owner;
9896 key.type = BTRFS_EXTENT_DATA_KEY;
9897 key.offset = dback->offset;
9898 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9900 fprintf(stderr, "Error looking up ref %d\n", ret);
9905 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9906 ret = btrfs_next_leaf(root, path);
9908 fprintf(stderr, "Couldn't find our ref, next\n");
9912 leaf = path->nodes[0];
9913 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9914 if (key.objectid != dback->owner ||
9915 key.type != BTRFS_EXTENT_DATA_KEY) {
9916 fprintf(stderr, "Couldn't find our ref, search\n");
9919 fi = btrfs_item_ptr(leaf, path->slots[0],
9920 struct btrfs_file_extent_item);
9921 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9922 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9924 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9929 btrfs_release_path(path);
9931 trans = btrfs_start_transaction(root, 1);
9933 return PTR_ERR(trans);
9936 * Ok we have the key of the file extent we want to fix, now we can cow
9937 * down to the thing and fix it.
9939 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9941 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9942 key.objectid, key.type, key.offset, ret);
9946 fprintf(stderr, "Well that's odd, we just found this key "
9947 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9952 leaf = path->nodes[0];
9953 fi = btrfs_item_ptr(leaf, path->slots[0],
9954 struct btrfs_file_extent_item);
9956 if (btrfs_file_extent_compression(leaf, fi) &&
9957 dback->disk_bytenr != entry->bytenr) {
9958 fprintf(stderr, "Ref doesn't match the record start and is "
9959 "compressed, please take a btrfs-image of this file "
9960 "system and send it to a btrfs developer so they can "
9961 "complete this functionality for bytenr %Lu\n",
9962 dback->disk_bytenr);
9967 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9968 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9969 } else if (dback->disk_bytenr > entry->bytenr) {
9970 u64 off_diff, offset;
9972 off_diff = dback->disk_bytenr - entry->bytenr;
9973 offset = btrfs_file_extent_offset(leaf, fi);
9974 if (dback->disk_bytenr + offset +
9975 btrfs_file_extent_num_bytes(leaf, fi) >
9976 entry->bytenr + entry->bytes) {
9977 fprintf(stderr, "Ref is past the entry end, please "
9978 "take a btrfs-image of this file system and "
9979 "send it to a btrfs developer, ref %Lu\n",
9980 dback->disk_bytenr);
9985 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9986 btrfs_set_file_extent_offset(leaf, fi, offset);
9987 } else if (dback->disk_bytenr < entry->bytenr) {
9990 offset = btrfs_file_extent_offset(leaf, fi);
9991 if (dback->disk_bytenr + offset < entry->bytenr) {
9992 fprintf(stderr, "Ref is before the entry start, please"
9993 " take a btrfs-image of this file system and "
9994 "send it to a btrfs developer, ref %Lu\n",
9995 dback->disk_bytenr);
10000 offset += dback->disk_bytenr;
10001 offset -= entry->bytenr;
10002 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10003 btrfs_set_file_extent_offset(leaf, fi, offset);
10006 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10009 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10010 * only do this if we aren't using compression, otherwise it's a
10013 if (!btrfs_file_extent_compression(leaf, fi))
10014 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10016 printf("ram bytes may be wrong?\n");
10017 btrfs_mark_buffer_dirty(leaf);
10019 err = btrfs_commit_transaction(trans, root);
10020 btrfs_release_path(path);
10021 return ret ? ret : err;
10024 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10025 struct extent_record *rec)
10027 struct extent_backref *back, *tmp;
10028 struct data_backref *dback;
10029 struct extent_entry *entry, *best = NULL;
10030 LIST_HEAD(entries);
10031 int nr_entries = 0;
10032 int broken_entries = 0;
10034 short mismatch = 0;
10037 * Metadata is easy and the backrefs should always agree on bytenr and
10038 * size, if not we've got bigger issues.
10043 rbtree_postorder_for_each_entry_safe(back, tmp,
10044 &rec->backref_tree, node) {
10045 if (back->full_backref || !back->is_data)
10048 dback = to_data_backref(back);
10051 * We only pay attention to backrefs that we found a real
10054 if (dback->found_ref == 0)
10058 * For now we only catch when the bytes don't match, not the
10059 * bytenr. We can easily do this at the same time, but I want
10060 * to have a fs image to test on before we just add repair
10061 * functionality willy-nilly so we know we won't screw up the
10065 entry = find_entry(&entries, dback->disk_bytenr,
10068 entry = malloc(sizeof(struct extent_entry));
10073 memset(entry, 0, sizeof(*entry));
10074 entry->bytenr = dback->disk_bytenr;
10075 entry->bytes = dback->bytes;
10076 list_add_tail(&entry->list, &entries);
10081 * If we only have on entry we may think the entries agree when
10082 * in reality they don't so we have to do some extra checking.
10084 if (dback->disk_bytenr != rec->start ||
10085 dback->bytes != rec->nr || back->broken)
10088 if (back->broken) {
10096 /* Yay all the backrefs agree, carry on good sir */
10097 if (nr_entries <= 1 && !mismatch)
10100 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10101 "%Lu\n", rec->start);
10104 * First we want to see if the backrefs can agree amongst themselves who
10105 * is right, so figure out which one of the entries has the highest
10108 best = find_most_right_entry(&entries);
10111 * Ok so we may have an even split between what the backrefs think, so
10112 * this is where we use the extent ref to see what it thinks.
10115 entry = find_entry(&entries, rec->start, rec->nr);
10116 if (!entry && (!broken_entries || !rec->found_rec)) {
10117 fprintf(stderr, "Backrefs don't agree with each other "
10118 "and extent record doesn't agree with anybody,"
10119 " so we can't fix bytenr %Lu bytes %Lu\n",
10120 rec->start, rec->nr);
10123 } else if (!entry) {
10125 * Ok our backrefs were broken, we'll assume this is the
10126 * correct value and add an entry for this range.
10128 entry = malloc(sizeof(struct extent_entry));
10133 memset(entry, 0, sizeof(*entry));
10134 entry->bytenr = rec->start;
10135 entry->bytes = rec->nr;
10136 list_add_tail(&entry->list, &entries);
10140 best = find_most_right_entry(&entries);
10142 fprintf(stderr, "Backrefs and extent record evenly "
10143 "split on who is right, this is going to "
10144 "require user input to fix bytenr %Lu bytes "
10145 "%Lu\n", rec->start, rec->nr);
10152 * I don't think this can happen currently as we'll abort() if we catch
10153 * this case higher up, but in case somebody removes that we still can't
10154 * deal with it properly here yet, so just bail out of that's the case.
10156 if (best->bytenr != rec->start) {
10157 fprintf(stderr, "Extent start and backref starts don't match, "
10158 "please use btrfs-image on this file system and send "
10159 "it to a btrfs developer so they can make fsck fix "
10160 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10161 rec->start, rec->nr);
10167 * Ok great we all agreed on an extent record, let's go find the real
10168 * references and fix up the ones that don't match.
10170 rbtree_postorder_for_each_entry_safe(back, tmp,
10171 &rec->backref_tree, node) {
10172 if (back->full_backref || !back->is_data)
10175 dback = to_data_backref(back);
10178 * Still ignoring backrefs that don't have a real ref attached
10181 if (dback->found_ref == 0)
10184 if (dback->bytes == best->bytes &&
10185 dback->disk_bytenr == best->bytenr)
10188 ret = repair_ref(info, path, dback, best);
10194 * Ok we messed with the actual refs, which means we need to drop our
10195 * entire cache and go back and rescan. I know this is a huge pain and
10196 * adds a lot of extra work, but it's the only way to be safe. Once all
10197 * the backrefs agree we may not need to do anything to the extent
10202 while (!list_empty(&entries)) {
10203 entry = list_entry(entries.next, struct extent_entry, list);
10204 list_del_init(&entry->list);
10210 static int process_duplicates(struct cache_tree *extent_cache,
10211 struct extent_record *rec)
10213 struct extent_record *good, *tmp;
10214 struct cache_extent *cache;
10218 * If we found a extent record for this extent then return, or if we
10219 * have more than one duplicate we are likely going to need to delete
10222 if (rec->found_rec || rec->num_duplicates > 1)
10225 /* Shouldn't happen but just in case */
10226 BUG_ON(!rec->num_duplicates);
10229 * So this happens if we end up with a backref that doesn't match the
10230 * actual extent entry. So either the backref is bad or the extent
10231 * entry is bad. Either way we want to have the extent_record actually
10232 * reflect what we found in the extent_tree, so we need to take the
10233 * duplicate out and use that as the extent_record since the only way we
10234 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10236 remove_cache_extent(extent_cache, &rec->cache);
10238 good = to_extent_record(rec->dups.next);
10239 list_del_init(&good->list);
10240 INIT_LIST_HEAD(&good->backrefs);
10241 INIT_LIST_HEAD(&good->dups);
10242 good->cache.start = good->start;
10243 good->cache.size = good->nr;
10244 good->content_checked = 0;
10245 good->owner_ref_checked = 0;
10246 good->num_duplicates = 0;
10247 good->refs = rec->refs;
10248 list_splice_init(&rec->backrefs, &good->backrefs);
10250 cache = lookup_cache_extent(extent_cache, good->start,
10254 tmp = container_of(cache, struct extent_record, cache);
10257 * If we find another overlapping extent and it's found_rec is
10258 * set then it's a duplicate and we need to try and delete
10261 if (tmp->found_rec || tmp->num_duplicates > 0) {
10262 if (list_empty(&good->list))
10263 list_add_tail(&good->list,
10264 &duplicate_extents);
10265 good->num_duplicates += tmp->num_duplicates + 1;
10266 list_splice_init(&tmp->dups, &good->dups);
10267 list_del_init(&tmp->list);
10268 list_add_tail(&tmp->list, &good->dups);
10269 remove_cache_extent(extent_cache, &tmp->cache);
10274 * Ok we have another non extent item backed extent rec, so lets
10275 * just add it to this extent and carry on like we did above.
10277 good->refs += tmp->refs;
10278 list_splice_init(&tmp->backrefs, &good->backrefs);
10279 remove_cache_extent(extent_cache, &tmp->cache);
10282 ret = insert_cache_extent(extent_cache, &good->cache);
10285 return good->num_duplicates ? 0 : 1;
10288 static int delete_duplicate_records(struct btrfs_root *root,
10289 struct extent_record *rec)
10291 struct btrfs_trans_handle *trans;
10292 LIST_HEAD(delete_list);
10293 struct btrfs_path path;
10294 struct extent_record *tmp, *good, *n;
10297 struct btrfs_key key;
10299 btrfs_init_path(&path);
10302 /* Find the record that covers all of the duplicates. */
10303 list_for_each_entry(tmp, &rec->dups, list) {
10304 if (good->start < tmp->start)
10306 if (good->nr > tmp->nr)
10309 if (tmp->start + tmp->nr < good->start + good->nr) {
10310 fprintf(stderr, "Ok we have overlapping extents that "
10311 "aren't completely covered by each other, this "
10312 "is going to require more careful thought. "
10313 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10314 tmp->start, tmp->nr, good->start, good->nr);
10321 list_add_tail(&rec->list, &delete_list);
10323 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10326 list_move_tail(&tmp->list, &delete_list);
10329 root = root->fs_info->extent_root;
10330 trans = btrfs_start_transaction(root, 1);
10331 if (IS_ERR(trans)) {
10332 ret = PTR_ERR(trans);
10336 list_for_each_entry(tmp, &delete_list, list) {
10337 if (tmp->found_rec == 0)
10339 key.objectid = tmp->start;
10340 key.type = BTRFS_EXTENT_ITEM_KEY;
10341 key.offset = tmp->nr;
10343 /* Shouldn't happen but just in case */
10344 if (tmp->metadata) {
10345 fprintf(stderr, "Well this shouldn't happen, extent "
10346 "record overlaps but is metadata? "
10347 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10351 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10357 ret = btrfs_del_item(trans, root, &path);
10360 btrfs_release_path(&path);
10363 err = btrfs_commit_transaction(trans, root);
10367 while (!list_empty(&delete_list)) {
10368 tmp = to_extent_record(delete_list.next);
10369 list_del_init(&tmp->list);
10375 while (!list_empty(&rec->dups)) {
10376 tmp = to_extent_record(rec->dups.next);
10377 list_del_init(&tmp->list);
10381 btrfs_release_path(&path);
10383 if (!ret && !nr_del)
10384 rec->num_duplicates = 0;
10386 return ret ? ret : nr_del;
10389 static int find_possible_backrefs(struct btrfs_fs_info *info,
10390 struct btrfs_path *path,
10391 struct cache_tree *extent_cache,
10392 struct extent_record *rec)
10394 struct btrfs_root *root;
10395 struct extent_backref *back, *tmp;
10396 struct data_backref *dback;
10397 struct cache_extent *cache;
10398 struct btrfs_file_extent_item *fi;
10399 struct btrfs_key key;
10403 rbtree_postorder_for_each_entry_safe(back, tmp,
10404 &rec->backref_tree, node) {
10405 /* Don't care about full backrefs (poor unloved backrefs) */
10406 if (back->full_backref || !back->is_data)
10409 dback = to_data_backref(back);
10411 /* We found this one, we don't need to do a lookup */
10412 if (dback->found_ref)
10415 key.objectid = dback->root;
10416 key.type = BTRFS_ROOT_ITEM_KEY;
10417 key.offset = (u64)-1;
10419 root = btrfs_read_fs_root(info, &key);
10421 /* No root, definitely a bad ref, skip */
10422 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10424 /* Other err, exit */
10426 return PTR_ERR(root);
10428 key.objectid = dback->owner;
10429 key.type = BTRFS_EXTENT_DATA_KEY;
10430 key.offset = dback->offset;
10431 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10433 btrfs_release_path(path);
10436 /* Didn't find it, we can carry on */
10441 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10442 struct btrfs_file_extent_item);
10443 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10444 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10445 btrfs_release_path(path);
10446 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10448 struct extent_record *tmp;
10449 tmp = container_of(cache, struct extent_record, cache);
10452 * If we found an extent record for the bytenr for this
10453 * particular backref then we can't add it to our
10454 * current extent record. We only want to add backrefs
10455 * that don't have a corresponding extent item in the
10456 * extent tree since they likely belong to this record
10457 * and we need to fix it if it doesn't match bytenrs.
10459 if (tmp->found_rec)
10463 dback->found_ref += 1;
10464 dback->disk_bytenr = bytenr;
10465 dback->bytes = bytes;
10468 * Set this so the verify backref code knows not to trust the
10469 * values in this backref.
10478 * Record orphan data ref into corresponding root.
10480 * Return 0 if the extent item contains data ref and recorded.
10481 * Return 1 if the extent item contains no useful data ref
10482 * On that case, it may contains only shared_dataref or metadata backref
10483 * or the file extent exists(this should be handled by the extent bytenr
10484 * recovery routine)
10485 * Return <0 if something goes wrong.
10487 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10488 struct extent_record *rec)
10490 struct btrfs_key key;
10491 struct btrfs_root *dest_root;
10492 struct extent_backref *back, *tmp;
10493 struct data_backref *dback;
10494 struct orphan_data_extent *orphan;
10495 struct btrfs_path path;
10496 int recorded_data_ref = 0;
10501 btrfs_init_path(&path);
10502 rbtree_postorder_for_each_entry_safe(back, tmp,
10503 &rec->backref_tree, node) {
10504 if (back->full_backref || !back->is_data ||
10505 !back->found_extent_tree)
10507 dback = to_data_backref(back);
10508 if (dback->found_ref)
10510 key.objectid = dback->root;
10511 key.type = BTRFS_ROOT_ITEM_KEY;
10512 key.offset = (u64)-1;
10514 dest_root = btrfs_read_fs_root(fs_info, &key);
10516 /* For non-exist root we just skip it */
10517 if (IS_ERR(dest_root) || !dest_root)
10520 key.objectid = dback->owner;
10521 key.type = BTRFS_EXTENT_DATA_KEY;
10522 key.offset = dback->offset;
10524 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10525 btrfs_release_path(&path);
10527 * For ret < 0, it's OK since the fs-tree may be corrupted,
10528 * we need to record it for inode/file extent rebuild.
10529 * For ret > 0, we record it only for file extent rebuild.
10530 * For ret == 0, the file extent exists but only bytenr
10531 * mismatch, let the original bytenr fix routine to handle,
10537 orphan = malloc(sizeof(*orphan));
10542 INIT_LIST_HEAD(&orphan->list);
10543 orphan->root = dback->root;
10544 orphan->objectid = dback->owner;
10545 orphan->offset = dback->offset;
10546 orphan->disk_bytenr = rec->cache.start;
10547 orphan->disk_len = rec->cache.size;
10548 list_add(&dest_root->orphan_data_extents, &orphan->list);
10549 recorded_data_ref = 1;
10552 btrfs_release_path(&path);
10554 return !recorded_data_ref;
10560 * when an incorrect extent item is found, this will delete
10561 * all of the existing entries for it and recreate them
10562 * based on what the tree scan found.
10564 static int fixup_extent_refs(struct btrfs_fs_info *info,
10565 struct cache_tree *extent_cache,
10566 struct extent_record *rec)
10568 struct btrfs_trans_handle *trans = NULL;
10570 struct btrfs_path path;
10571 struct cache_extent *cache;
10572 struct extent_backref *back, *tmp;
10576 if (rec->flag_block_full_backref)
10577 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10579 btrfs_init_path(&path);
10580 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10582 * Sometimes the backrefs themselves are so broken they don't
10583 * get attached to any meaningful rec, so first go back and
10584 * check any of our backrefs that we couldn't find and throw
10585 * them into the list if we find the backref so that
10586 * verify_backrefs can figure out what to do.
10588 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10593 /* step one, make sure all of the backrefs agree */
10594 ret = verify_backrefs(info, &path, rec);
10598 trans = btrfs_start_transaction(info->extent_root, 1);
10599 if (IS_ERR(trans)) {
10600 ret = PTR_ERR(trans);
10604 /* step two, delete all the existing records */
10605 ret = delete_extent_records(trans, info->extent_root, &path,
10611 /* was this block corrupt? If so, don't add references to it */
10612 cache = lookup_cache_extent(info->corrupt_blocks,
10613 rec->start, rec->max_size);
10619 /* step three, recreate all the refs we did find */
10620 rbtree_postorder_for_each_entry_safe(back, tmp,
10621 &rec->backref_tree, node) {
10623 * if we didn't find any references, don't create a
10624 * new extent record
10626 if (!back->found_ref)
10629 rec->bad_full_backref = 0;
10630 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10638 int err = btrfs_commit_transaction(trans, info->extent_root);
10644 fprintf(stderr, "Repaired extent references for %llu\n",
10645 (unsigned long long)rec->start);
10647 btrfs_release_path(&path);
10651 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10652 struct extent_record *rec)
10654 struct btrfs_trans_handle *trans;
10655 struct btrfs_root *root = fs_info->extent_root;
10656 struct btrfs_path path;
10657 struct btrfs_extent_item *ei;
10658 struct btrfs_key key;
10662 key.objectid = rec->start;
10663 if (rec->metadata) {
10664 key.type = BTRFS_METADATA_ITEM_KEY;
10665 key.offset = rec->info_level;
10667 key.type = BTRFS_EXTENT_ITEM_KEY;
10668 key.offset = rec->max_size;
10671 trans = btrfs_start_transaction(root, 0);
10673 return PTR_ERR(trans);
10675 btrfs_init_path(&path);
10676 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10678 btrfs_release_path(&path);
10679 btrfs_commit_transaction(trans, root);
10682 fprintf(stderr, "Didn't find extent for %llu\n",
10683 (unsigned long long)rec->start);
10684 btrfs_release_path(&path);
10685 btrfs_commit_transaction(trans, root);
10689 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10690 struct btrfs_extent_item);
10691 flags = btrfs_extent_flags(path.nodes[0], ei);
10692 if (rec->flag_block_full_backref) {
10693 fprintf(stderr, "setting full backref on %llu\n",
10694 (unsigned long long)key.objectid);
10695 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10697 fprintf(stderr, "clearing full backref on %llu\n",
10698 (unsigned long long)key.objectid);
10699 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10701 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10702 btrfs_mark_buffer_dirty(path.nodes[0]);
10703 btrfs_release_path(&path);
10704 ret = btrfs_commit_transaction(trans, root);
10706 fprintf(stderr, "Repaired extent flags for %llu\n",
10707 (unsigned long long)rec->start);
10712 /* right now we only prune from the extent allocation tree */
10713 static int prune_one_block(struct btrfs_trans_handle *trans,
10714 struct btrfs_fs_info *info,
10715 struct btrfs_corrupt_block *corrupt)
10718 struct btrfs_path path;
10719 struct extent_buffer *eb;
10723 int level = corrupt->level + 1;
10725 btrfs_init_path(&path);
10727 /* we want to stop at the parent to our busted block */
10728 path.lowest_level = level;
10730 ret = btrfs_search_slot(trans, info->extent_root,
10731 &corrupt->key, &path, -1, 1);
10736 eb = path.nodes[level];
10743 * hopefully the search gave us the block we want to prune,
10744 * lets try that first
10746 slot = path.slots[level];
10747 found = btrfs_node_blockptr(eb, slot);
10748 if (found == corrupt->cache.start)
10751 nritems = btrfs_header_nritems(eb);
10753 /* the search failed, lets scan this node and hope we find it */
10754 for (slot = 0; slot < nritems; slot++) {
10755 found = btrfs_node_blockptr(eb, slot);
10756 if (found == corrupt->cache.start)
10760 * we couldn't find the bad block. TODO, search all the nodes for pointers
10763 if (eb == info->extent_root->node) {
10768 btrfs_release_path(&path);
10773 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10774 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10777 btrfs_release_path(&path);
10781 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10783 struct btrfs_trans_handle *trans = NULL;
10784 struct cache_extent *cache;
10785 struct btrfs_corrupt_block *corrupt;
10788 cache = search_cache_extent(info->corrupt_blocks, 0);
10792 trans = btrfs_start_transaction(info->extent_root, 1);
10794 return PTR_ERR(trans);
10796 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10797 prune_one_block(trans, info, corrupt);
10798 remove_cache_extent(info->corrupt_blocks, cache);
10801 return btrfs_commit_transaction(trans, info->extent_root);
10805 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10807 struct btrfs_block_group_cache *cache;
10812 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10813 &start, &end, EXTENT_DIRTY);
10816 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10821 cache = btrfs_lookup_first_block_group(fs_info, start);
10826 start = cache->key.objectid + cache->key.offset;
10830 static int check_extent_refs(struct btrfs_root *root,
10831 struct cache_tree *extent_cache)
10833 struct extent_record *rec;
10834 struct cache_extent *cache;
10841 * if we're doing a repair, we have to make sure
10842 * we don't allocate from the problem extents.
10843 * In the worst case, this will be all the
10844 * extents in the FS
10846 cache = search_cache_extent(extent_cache, 0);
10848 rec = container_of(cache, struct extent_record, cache);
10849 set_extent_dirty(root->fs_info->excluded_extents,
10851 rec->start + rec->max_size - 1);
10852 cache = next_cache_extent(cache);
10855 /* pin down all the corrupted blocks too */
10856 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10858 set_extent_dirty(root->fs_info->excluded_extents,
10860 cache->start + cache->size - 1);
10861 cache = next_cache_extent(cache);
10863 prune_corrupt_blocks(root->fs_info);
10864 reset_cached_block_groups(root->fs_info);
10867 reset_cached_block_groups(root->fs_info);
10870 * We need to delete any duplicate entries we find first otherwise we
10871 * could mess up the extent tree when we have backrefs that actually
10872 * belong to a different extent item and not the weird duplicate one.
10874 while (repair && !list_empty(&duplicate_extents)) {
10875 rec = to_extent_record(duplicate_extents.next);
10876 list_del_init(&rec->list);
10878 /* Sometimes we can find a backref before we find an actual
10879 * extent, so we need to process it a little bit to see if there
10880 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10881 * if this is a backref screwup. If we need to delete stuff
10882 * process_duplicates() will return 0, otherwise it will return
10885 if (process_duplicates(extent_cache, rec))
10887 ret = delete_duplicate_records(root, rec);
10891 * delete_duplicate_records will return the number of entries
10892 * deleted, so if it's greater than 0 then we know we actually
10893 * did something and we need to remove.
10906 cache = search_cache_extent(extent_cache, 0);
10909 rec = container_of(cache, struct extent_record, cache);
10910 if (rec->num_duplicates) {
10911 fprintf(stderr, "extent item %llu has multiple extent "
10912 "items\n", (unsigned long long)rec->start);
10916 if (rec->refs != rec->extent_item_refs) {
10917 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10918 (unsigned long long)rec->start,
10919 (unsigned long long)rec->nr);
10920 fprintf(stderr, "extent item %llu, found %llu\n",
10921 (unsigned long long)rec->extent_item_refs,
10922 (unsigned long long)rec->refs);
10923 ret = record_orphan_data_extents(root->fs_info, rec);
10929 if (all_backpointers_checked(rec, 1)) {
10930 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10931 (unsigned long long)rec->start,
10932 (unsigned long long)rec->nr);
10936 if (!rec->owner_ref_checked) {
10937 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10938 (unsigned long long)rec->start,
10939 (unsigned long long)rec->nr);
10944 if (repair && fix) {
10945 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10951 if (rec->bad_full_backref) {
10952 fprintf(stderr, "bad full backref, on [%llu]\n",
10953 (unsigned long long)rec->start);
10955 ret = fixup_extent_flags(root->fs_info, rec);
10963 * Although it's not a extent ref's problem, we reuse this
10964 * routine for error reporting.
10965 * No repair function yet.
10967 if (rec->crossing_stripes) {
10969 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10970 rec->start, rec->start + rec->max_size);
10974 if (rec->wrong_chunk_type) {
10976 "bad extent [%llu, %llu), type mismatch with chunk\n",
10977 rec->start, rec->start + rec->max_size);
10982 remove_cache_extent(extent_cache, cache);
10983 free_all_extent_backrefs(rec);
10984 if (!init_extent_tree && repair && (!cur_err || fix))
10985 clear_extent_dirty(root->fs_info->excluded_extents,
10987 rec->start + rec->max_size - 1);
10992 if (ret && ret != -EAGAIN) {
10993 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10996 struct btrfs_trans_handle *trans;
10998 root = root->fs_info->extent_root;
10999 trans = btrfs_start_transaction(root, 1);
11000 if (IS_ERR(trans)) {
11001 ret = PTR_ERR(trans);
11005 ret = btrfs_fix_block_accounting(trans, root);
11008 ret = btrfs_commit_transaction(trans, root);
11020 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11024 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11025 stripe_size = length;
11026 stripe_size /= num_stripes;
11027 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11028 stripe_size = length * 2;
11029 stripe_size /= num_stripes;
11030 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11031 stripe_size = length;
11032 stripe_size /= (num_stripes - 1);
11033 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11034 stripe_size = length;
11035 stripe_size /= (num_stripes - 2);
11037 stripe_size = length;
11039 return stripe_size;
11043 * Check the chunk with its block group/dev list ref:
11044 * Return 0 if all refs seems valid.
11045 * Return 1 if part of refs seems valid, need later check for rebuild ref
11046 * like missing block group and needs to search extent tree to rebuild them.
11047 * Return -1 if essential refs are missing and unable to rebuild.
11049 static int check_chunk_refs(struct chunk_record *chunk_rec,
11050 struct block_group_tree *block_group_cache,
11051 struct device_extent_tree *dev_extent_cache,
11054 struct cache_extent *block_group_item;
11055 struct block_group_record *block_group_rec;
11056 struct cache_extent *dev_extent_item;
11057 struct device_extent_record *dev_extent_rec;
11061 int metadump_v2 = 0;
11065 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11067 chunk_rec->length);
11068 if (block_group_item) {
11069 block_group_rec = container_of(block_group_item,
11070 struct block_group_record,
11072 if (chunk_rec->length != block_group_rec->offset ||
11073 chunk_rec->offset != block_group_rec->objectid ||
11075 chunk_rec->type_flags != block_group_rec->flags)) {
11078 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11079 chunk_rec->objectid,
11084 chunk_rec->type_flags,
11085 block_group_rec->objectid,
11086 block_group_rec->type,
11087 block_group_rec->offset,
11088 block_group_rec->offset,
11089 block_group_rec->objectid,
11090 block_group_rec->flags);
11093 list_del_init(&block_group_rec->list);
11094 chunk_rec->bg_rec = block_group_rec;
11099 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11100 chunk_rec->objectid,
11105 chunk_rec->type_flags);
11112 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11113 chunk_rec->num_stripes);
11114 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11115 devid = chunk_rec->stripes[i].devid;
11116 offset = chunk_rec->stripes[i].offset;
11117 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11118 devid, offset, length);
11119 if (dev_extent_item) {
11120 dev_extent_rec = container_of(dev_extent_item,
11121 struct device_extent_record,
11123 if (dev_extent_rec->objectid != devid ||
11124 dev_extent_rec->offset != offset ||
11125 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11126 dev_extent_rec->length != length) {
11129 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11130 chunk_rec->objectid,
11133 chunk_rec->stripes[i].devid,
11134 chunk_rec->stripes[i].offset,
11135 dev_extent_rec->objectid,
11136 dev_extent_rec->offset,
11137 dev_extent_rec->length);
11140 list_move(&dev_extent_rec->chunk_list,
11141 &chunk_rec->dextents);
11146 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11147 chunk_rec->objectid,
11150 chunk_rec->stripes[i].devid,
11151 chunk_rec->stripes[i].offset);
11158 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11159 int check_chunks(struct cache_tree *chunk_cache,
11160 struct block_group_tree *block_group_cache,
11161 struct device_extent_tree *dev_extent_cache,
11162 struct list_head *good, struct list_head *bad,
11163 struct list_head *rebuild, int silent)
11165 struct cache_extent *chunk_item;
11166 struct chunk_record *chunk_rec;
11167 struct block_group_record *bg_rec;
11168 struct device_extent_record *dext_rec;
11172 chunk_item = first_cache_extent(chunk_cache);
11173 while (chunk_item) {
11174 chunk_rec = container_of(chunk_item, struct chunk_record,
11176 err = check_chunk_refs(chunk_rec, block_group_cache,
11177 dev_extent_cache, silent);
11180 if (err == 0 && good)
11181 list_add_tail(&chunk_rec->list, good);
11182 if (err > 0 && rebuild)
11183 list_add_tail(&chunk_rec->list, rebuild);
11184 if (err < 0 && bad)
11185 list_add_tail(&chunk_rec->list, bad);
11186 chunk_item = next_cache_extent(chunk_item);
11189 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11192 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11200 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11204 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11205 dext_rec->objectid,
11215 static int check_device_used(struct device_record *dev_rec,
11216 struct device_extent_tree *dext_cache)
11218 struct cache_extent *cache;
11219 struct device_extent_record *dev_extent_rec;
11220 u64 total_byte = 0;
11222 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11224 dev_extent_rec = container_of(cache,
11225 struct device_extent_record,
11227 if (dev_extent_rec->objectid != dev_rec->devid)
11230 list_del_init(&dev_extent_rec->device_list);
11231 total_byte += dev_extent_rec->length;
11232 cache = next_cache_extent(cache);
11235 if (total_byte != dev_rec->byte_used) {
11237 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11238 total_byte, dev_rec->byte_used, dev_rec->objectid,
11239 dev_rec->type, dev_rec->offset);
11247 * Extra (optional) check for dev_item size to report possbile problem on a new
11250 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11252 if (!IS_ALIGNED(total_bytes, sectorsize)) {
11254 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11255 devid, total_bytes, sectorsize);
11257 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11258 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11263 * Unlike device size alignment check above, some super total_bytes check
11264 * failure can lead to mount failure for newer kernel.
11266 * So this function will return the error for a fatal super total_bytes problem.
11268 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11270 struct btrfs_device *dev;
11271 struct list_head *dev_list = &fs_info->fs_devices->devices;
11272 u64 total_bytes = 0;
11273 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11275 list_for_each_entry(dev, dev_list, dev_list)
11276 total_bytes += dev->total_bytes;
11278 /* Important check, which can cause unmountable fs */
11279 if (super_bytes < total_bytes) {
11280 error("super total bytes %llu smaller than real device(s) size %llu",
11281 super_bytes, total_bytes);
11282 error("mounting this fs may fail for newer kernels");
11283 error("this can be fixed by 'btrfs rescue fix-device-size'");
11288 * Optional check, just to make everything aligned and match with each
11291 * For a btrfs-image restored fs, we don't need to check it anyway.
11293 if (btrfs_super_flags(fs_info->super_copy) &
11294 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11296 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11297 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11298 super_bytes != total_bytes) {
11299 warning("minor unaligned/mismatch device size detected");
11301 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11306 /* check btrfs_dev_item -> btrfs_dev_extent */
11307 static int check_devices(struct rb_root *dev_cache,
11308 struct device_extent_tree *dev_extent_cache)
11310 struct rb_node *dev_node;
11311 struct device_record *dev_rec;
11312 struct device_extent_record *dext_rec;
11316 dev_node = rb_first(dev_cache);
11318 dev_rec = container_of(dev_node, struct device_record, node);
11319 err = check_device_used(dev_rec, dev_extent_cache);
11323 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11324 global_info->sectorsize);
11325 dev_node = rb_next(dev_node);
11327 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11330 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11331 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11338 static int add_root_item_to_list(struct list_head *head,
11339 u64 objectid, u64 bytenr, u64 last_snapshot,
11340 u8 level, u8 drop_level,
11341 struct btrfs_key *drop_key)
11344 struct root_item_record *ri_rec;
11345 ri_rec = malloc(sizeof(*ri_rec));
11348 ri_rec->bytenr = bytenr;
11349 ri_rec->objectid = objectid;
11350 ri_rec->level = level;
11351 ri_rec->drop_level = drop_level;
11352 ri_rec->last_snapshot = last_snapshot;
11354 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11355 list_add_tail(&ri_rec->list, head);
11360 static void free_root_item_list(struct list_head *list)
11362 struct root_item_record *ri_rec;
11364 while (!list_empty(list)) {
11365 ri_rec = list_first_entry(list, struct root_item_record,
11367 list_del_init(&ri_rec->list);
11372 static int deal_root_from_list(struct list_head *list,
11373 struct btrfs_root *root,
11374 struct block_info *bits,
11376 struct cache_tree *pending,
11377 struct cache_tree *seen,
11378 struct cache_tree *reada,
11379 struct cache_tree *nodes,
11380 struct cache_tree *extent_cache,
11381 struct cache_tree *chunk_cache,
11382 struct rb_root *dev_cache,
11383 struct block_group_tree *block_group_cache,
11384 struct device_extent_tree *dev_extent_cache)
11389 while (!list_empty(list)) {
11390 struct root_item_record *rec;
11391 struct extent_buffer *buf;
11392 rec = list_entry(list->next,
11393 struct root_item_record, list);
11395 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11396 if (!extent_buffer_uptodate(buf)) {
11397 free_extent_buffer(buf);
11401 ret = add_root_to_pending(buf, extent_cache, pending,
11402 seen, nodes, rec->objectid);
11406 * To rebuild extent tree, we need deal with snapshot
11407 * one by one, otherwise we deal with node firstly which
11408 * can maximize readahead.
11411 ret = run_next_block(root, bits, bits_nr, &last,
11412 pending, seen, reada, nodes,
11413 extent_cache, chunk_cache,
11414 dev_cache, block_group_cache,
11415 dev_extent_cache, rec);
11419 free_extent_buffer(buf);
11420 list_del(&rec->list);
11426 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11427 reada, nodes, extent_cache, chunk_cache,
11428 dev_cache, block_group_cache,
11429 dev_extent_cache, NULL);
11439 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11441 struct rb_root dev_cache;
11442 struct cache_tree chunk_cache;
11443 struct block_group_tree block_group_cache;
11444 struct device_extent_tree dev_extent_cache;
11445 struct cache_tree extent_cache;
11446 struct cache_tree seen;
11447 struct cache_tree pending;
11448 struct cache_tree reada;
11449 struct cache_tree nodes;
11450 struct extent_io_tree excluded_extents;
11451 struct cache_tree corrupt_blocks;
11452 struct btrfs_path path;
11453 struct btrfs_key key;
11454 struct btrfs_key found_key;
11456 struct block_info *bits;
11458 struct extent_buffer *leaf;
11460 struct btrfs_root_item ri;
11461 struct list_head dropping_trees;
11462 struct list_head normal_trees;
11463 struct btrfs_root *root1;
11464 struct btrfs_root *root;
11468 root = fs_info->fs_root;
11469 dev_cache = RB_ROOT;
11470 cache_tree_init(&chunk_cache);
11471 block_group_tree_init(&block_group_cache);
11472 device_extent_tree_init(&dev_extent_cache);
11474 cache_tree_init(&extent_cache);
11475 cache_tree_init(&seen);
11476 cache_tree_init(&pending);
11477 cache_tree_init(&nodes);
11478 cache_tree_init(&reada);
11479 cache_tree_init(&corrupt_blocks);
11480 extent_io_tree_init(&excluded_extents);
11481 INIT_LIST_HEAD(&dropping_trees);
11482 INIT_LIST_HEAD(&normal_trees);
11485 fs_info->excluded_extents = &excluded_extents;
11486 fs_info->fsck_extent_cache = &extent_cache;
11487 fs_info->free_extent_hook = free_extent_hook;
11488 fs_info->corrupt_blocks = &corrupt_blocks;
11492 bits = malloc(bits_nr * sizeof(struct block_info));
11498 if (ctx.progress_enabled) {
11499 ctx.tp = TASK_EXTENTS;
11500 task_start(ctx.info);
11504 root1 = fs_info->tree_root;
11505 level = btrfs_header_level(root1->node);
11506 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11507 root1->node->start, 0, level, 0, NULL);
11510 root1 = fs_info->chunk_root;
11511 level = btrfs_header_level(root1->node);
11512 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11513 root1->node->start, 0, level, 0, NULL);
11516 btrfs_init_path(&path);
11519 key.type = BTRFS_ROOT_ITEM_KEY;
11520 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11524 leaf = path.nodes[0];
11525 slot = path.slots[0];
11526 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11527 ret = btrfs_next_leaf(root, &path);
11530 leaf = path.nodes[0];
11531 slot = path.slots[0];
11533 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11534 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11535 unsigned long offset;
11538 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11539 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11540 last_snapshot = btrfs_root_last_snapshot(&ri);
11541 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11542 level = btrfs_root_level(&ri);
11543 ret = add_root_item_to_list(&normal_trees,
11544 found_key.objectid,
11545 btrfs_root_bytenr(&ri),
11546 last_snapshot, level,
11551 level = btrfs_root_level(&ri);
11552 objectid = found_key.objectid;
11553 btrfs_disk_key_to_cpu(&found_key,
11554 &ri.drop_progress);
11555 ret = add_root_item_to_list(&dropping_trees,
11557 btrfs_root_bytenr(&ri),
11558 last_snapshot, level,
11559 ri.drop_level, &found_key);
11566 btrfs_release_path(&path);
11569 * check_block can return -EAGAIN if it fixes something, please keep
11570 * this in mind when dealing with return values from these functions, if
11571 * we get -EAGAIN we want to fall through and restart the loop.
11573 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11574 &seen, &reada, &nodes, &extent_cache,
11575 &chunk_cache, &dev_cache, &block_group_cache,
11576 &dev_extent_cache);
11578 if (ret == -EAGAIN)
11582 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11583 &pending, &seen, &reada, &nodes,
11584 &extent_cache, &chunk_cache, &dev_cache,
11585 &block_group_cache, &dev_extent_cache);
11587 if (ret == -EAGAIN)
11592 ret = check_chunks(&chunk_cache, &block_group_cache,
11593 &dev_extent_cache, NULL, NULL, NULL, 0);
11595 if (ret == -EAGAIN)
11600 ret = check_extent_refs(root, &extent_cache);
11602 if (ret == -EAGAIN)
11607 ret = check_devices(&dev_cache, &dev_extent_cache);
11612 task_stop(ctx.info);
11614 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11615 extent_io_tree_cleanup(&excluded_extents);
11616 fs_info->fsck_extent_cache = NULL;
11617 fs_info->free_extent_hook = NULL;
11618 fs_info->corrupt_blocks = NULL;
11619 fs_info->excluded_extents = NULL;
11622 free_chunk_cache_tree(&chunk_cache);
11623 free_device_cache_tree(&dev_cache);
11624 free_block_group_tree(&block_group_cache);
11625 free_device_extent_tree(&dev_extent_cache);
11626 free_extent_cache_tree(&seen);
11627 free_extent_cache_tree(&pending);
11628 free_extent_cache_tree(&reada);
11629 free_extent_cache_tree(&nodes);
11630 free_root_item_list(&normal_trees);
11631 free_root_item_list(&dropping_trees);
11634 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11635 free_extent_cache_tree(&seen);
11636 free_extent_cache_tree(&pending);
11637 free_extent_cache_tree(&reada);
11638 free_extent_cache_tree(&nodes);
11639 free_chunk_cache_tree(&chunk_cache);
11640 free_block_group_tree(&block_group_cache);
11641 free_device_cache_tree(&dev_cache);
11642 free_device_extent_tree(&dev_extent_cache);
11643 free_extent_record_cache(&extent_cache);
11644 free_root_item_list(&normal_trees);
11645 free_root_item_list(&dropping_trees);
11646 extent_io_tree_cleanup(&excluded_extents);
11650 static int check_extent_inline_ref(struct extent_buffer *eb,
11651 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11654 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11657 case BTRFS_TREE_BLOCK_REF_KEY:
11658 case BTRFS_EXTENT_DATA_REF_KEY:
11659 case BTRFS_SHARED_BLOCK_REF_KEY:
11660 case BTRFS_SHARED_DATA_REF_KEY:
11664 error("extent[%llu %u %llu] has unknown ref type: %d",
11665 key->objectid, key->type, key->offset, type);
11666 ret = UNKNOWN_TYPE;
11674 * Check backrefs of a tree block given by @bytenr or @eb.
11676 * @root: the root containing the @bytenr or @eb
11677 * @eb: tree block extent buffer, can be NULL
11678 * @bytenr: bytenr of the tree block to search
11679 * @level: tree level of the tree block
11680 * @owner: owner of the tree block
11682 * Return >0 for any error found and output error message
11683 * Return 0 for no error found
11685 static int check_tree_block_ref(struct btrfs_root *root,
11686 struct extent_buffer *eb, u64 bytenr,
11687 int level, u64 owner, struct node_refs *nrefs)
11689 struct btrfs_key key;
11690 struct btrfs_root *extent_root = root->fs_info->extent_root;
11691 struct btrfs_path path;
11692 struct btrfs_extent_item *ei;
11693 struct btrfs_extent_inline_ref *iref;
11694 struct extent_buffer *leaf;
11699 int root_level = btrfs_header_level(root->node);
11701 u32 nodesize = root->fs_info->nodesize;
11710 btrfs_init_path(&path);
11711 key.objectid = bytenr;
11712 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11713 key.type = BTRFS_METADATA_ITEM_KEY;
11715 key.type = BTRFS_EXTENT_ITEM_KEY;
11716 key.offset = (u64)-1;
11718 /* Search for the backref in extent tree */
11719 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11721 err |= BACKREF_MISSING;
11724 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11726 err |= BACKREF_MISSING;
11730 leaf = path.nodes[0];
11731 slot = path.slots[0];
11732 btrfs_item_key_to_cpu(leaf, &key, slot);
11734 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11736 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11737 skinny_level = (int)key.offset;
11738 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11740 struct btrfs_tree_block_info *info;
11742 info = (struct btrfs_tree_block_info *)(ei + 1);
11743 skinny_level = btrfs_tree_block_level(leaf, info);
11744 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11753 * Due to the feature of shared tree blocks, if the upper node
11754 * is a fs root or shared node, the extent of checked node may
11755 * not be updated until the next CoW.
11758 strict = should_check_extent_strictly(root, nrefs,
11760 if (!(btrfs_extent_flags(leaf, ei) &
11761 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11763 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11764 key.objectid, nodesize,
11765 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11766 err = BACKREF_MISMATCH;
11768 header_gen = btrfs_header_generation(eb);
11769 extent_gen = btrfs_extent_generation(leaf, ei);
11770 if (header_gen != extent_gen) {
11772 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11773 key.objectid, nodesize, header_gen,
11775 err = BACKREF_MISMATCH;
11777 if (level != skinny_level) {
11779 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11780 key.objectid, nodesize, level, skinny_level);
11781 err = BACKREF_MISMATCH;
11783 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11785 "extent[%llu %u] is referred by other roots than %llu",
11786 key.objectid, nodesize, root->objectid);
11787 err = BACKREF_MISMATCH;
11792 * Iterate the extent/metadata item to find the exact backref
11794 item_size = btrfs_item_size_nr(leaf, slot);
11795 ptr = (unsigned long)iref;
11796 end = (unsigned long)ei + item_size;
11798 while (ptr < end) {
11799 iref = (struct btrfs_extent_inline_ref *)ptr;
11800 type = btrfs_extent_inline_ref_type(leaf, iref);
11801 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11803 ret = check_extent_inline_ref(leaf, &key, iref);
11808 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11809 if (offset == root->objectid)
11811 if (!strict && owner == offset)
11813 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11815 * Backref of tree reloc root points to itself, no need
11816 * to check backref any more.
11818 * This may be an error of loop backref, but extent tree
11819 * checker should have already handled it.
11820 * Here we only need to avoid infinite iteration.
11822 if (offset == bytenr) {
11826 * Check if the backref points to valid
11829 found_ref = !check_tree_block_ref( root, NULL,
11830 offset, level + 1, owner,
11837 ptr += btrfs_extent_inline_ref_size(type);
11841 * Inlined extent item doesn't have what we need, check
11842 * TREE_BLOCK_REF_KEY
11845 btrfs_release_path(&path);
11846 key.objectid = bytenr;
11847 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11848 key.offset = root->objectid;
11850 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11855 * Finally check SHARED BLOCK REF, any found will be good
11856 * Here we're not doing comprehensive extent backref checking,
11857 * only need to ensure there is some extent referring to this
11861 btrfs_release_path(&path);
11862 key.objectid = bytenr;
11863 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11864 key.offset = (u64)-1;
11866 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11868 err |= BACKREF_MISSING;
11871 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11873 err |= BACKREF_MISSING;
11879 err |= BACKREF_MISSING;
11881 btrfs_release_path(&path);
11882 if (nrefs && strict &&
11883 level < root_level && nrefs->full_backref[level + 1])
11884 parent = nrefs->bytenr[level + 1];
11885 if (eb && (err & BACKREF_MISSING))
11887 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11888 bytenr, nodesize, owner, level,
11889 parent ? "parent" : "root",
11890 parent ? parent : root->objectid);
11895 * If @err contains BACKREF_MISSING then add extent of the
11896 * file_extent_data_item.
11898 * Returns error bits after reapir.
11900 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11901 struct btrfs_root *root,
11902 struct btrfs_path *pathp,
11903 struct node_refs *nrefs,
11906 struct btrfs_file_extent_item *fi;
11907 struct btrfs_key fi_key;
11908 struct btrfs_key key;
11909 struct btrfs_extent_item *ei;
11910 struct btrfs_path path;
11911 struct btrfs_root *extent_root = root->fs_info->extent_root;
11912 struct extent_buffer *eb;
11924 eb = pathp->nodes[0];
11925 slot = pathp->slots[0];
11926 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11927 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11929 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11930 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11933 file_offset = fi_key.offset;
11934 generation = btrfs_file_extent_generation(eb, fi);
11935 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11936 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11937 extent_offset = btrfs_file_extent_offset(eb, fi);
11938 offset = file_offset - extent_offset;
11940 /* now repair only adds backref */
11941 if ((err & BACKREF_MISSING) == 0)
11944 /* search extent item */
11945 key.objectid = disk_bytenr;
11946 key.type = BTRFS_EXTENT_ITEM_KEY;
11947 key.offset = num_bytes;
11949 btrfs_init_path(&path);
11950 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11956 /* insert an extent item */
11958 key.objectid = disk_bytenr;
11959 key.type = BTRFS_EXTENT_ITEM_KEY;
11960 key.offset = num_bytes;
11961 size = sizeof(*ei);
11963 btrfs_release_path(&path);
11964 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11968 eb = path.nodes[0];
11969 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11971 btrfs_set_extent_refs(eb, ei, 0);
11972 btrfs_set_extent_generation(eb, ei, generation);
11973 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11975 btrfs_mark_buffer_dirty(eb);
11976 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11978 btrfs_release_path(&path);
11981 if (nrefs->full_backref[0])
11982 parent = btrfs_header_bytenr(eb);
11986 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11988 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11992 "failed to increase extent data backref[%llu %llu] root %llu",
11993 disk_bytenr, num_bytes, root->objectid);
11996 printf("Add one extent data backref [%llu %llu]\n",
11997 disk_bytenr, num_bytes);
12000 err &= ~BACKREF_MISSING;
12003 error("can't repair root %llu extent data item[%llu %llu]",
12004 root->objectid, disk_bytenr, num_bytes);
12009 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
12011 * Return >0 any error found and output error message
12012 * Return 0 for no error found
12014 static int check_extent_data_item(struct btrfs_root *root,
12015 struct btrfs_path *pathp,
12016 struct node_refs *nrefs, int account_bytes)
12018 struct btrfs_file_extent_item *fi;
12019 struct extent_buffer *eb = pathp->nodes[0];
12020 struct btrfs_path path;
12021 struct btrfs_root *extent_root = root->fs_info->extent_root;
12022 struct btrfs_key fi_key;
12023 struct btrfs_key dbref_key;
12024 struct extent_buffer *leaf;
12025 struct btrfs_extent_item *ei;
12026 struct btrfs_extent_inline_ref *iref;
12027 struct btrfs_extent_data_ref *dref;
12030 u64 disk_num_bytes;
12031 u64 extent_num_bytes;
12038 int found_dbackref = 0;
12039 int slot = pathp->slots[0];
12044 btrfs_item_key_to_cpu(eb, &fi_key, slot);
12045 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12047 /* Nothing to check for hole and inline data extents */
12048 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12049 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12052 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12053 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12054 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12055 offset = btrfs_file_extent_offset(eb, fi);
12057 /* Check unaligned disk_num_bytes and num_bytes */
12058 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12060 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12061 fi_key.objectid, fi_key.offset, disk_num_bytes,
12062 root->fs_info->sectorsize);
12063 err |= BYTES_UNALIGNED;
12064 } else if (account_bytes) {
12065 data_bytes_allocated += disk_num_bytes;
12067 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12069 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12070 fi_key.objectid, fi_key.offset, extent_num_bytes,
12071 root->fs_info->sectorsize);
12072 err |= BYTES_UNALIGNED;
12073 } else if (account_bytes) {
12074 data_bytes_referenced += extent_num_bytes;
12076 owner = btrfs_header_owner(eb);
12078 /* Check the extent item of the file extent in extent tree */
12079 btrfs_init_path(&path);
12080 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12081 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12082 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12084 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12088 leaf = path.nodes[0];
12089 slot = path.slots[0];
12090 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12092 extent_flags = btrfs_extent_flags(leaf, ei);
12094 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12096 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12097 disk_bytenr, disk_num_bytes,
12098 BTRFS_EXTENT_FLAG_DATA);
12099 err |= BACKREF_MISMATCH;
12102 /* Check data backref inside that extent item */
12103 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12104 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12105 ptr = (unsigned long)iref;
12106 end = (unsigned long)ei + item_size;
12107 strict = should_check_extent_strictly(root, nrefs, -1);
12109 while (ptr < end) {
12113 bool match = false;
12115 iref = (struct btrfs_extent_inline_ref *)ptr;
12116 type = btrfs_extent_inline_ref_type(leaf, iref);
12117 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12119 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12124 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12125 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12126 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12127 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12129 if (ref_objectid == fi_key.objectid &&
12130 ref_offset == fi_key.offset - offset)
12132 if (ref_root == root->objectid && match)
12133 found_dbackref = 1;
12134 else if (!strict && owner == ref_root && match)
12135 found_dbackref = 1;
12136 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12137 found_dbackref = !check_tree_block_ref(root, NULL,
12138 btrfs_extent_inline_ref_offset(leaf, iref),
12142 if (found_dbackref)
12144 ptr += btrfs_extent_inline_ref_size(type);
12147 if (!found_dbackref) {
12148 btrfs_release_path(&path);
12150 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12151 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12152 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12153 dbref_key.offset = hash_extent_data_ref(root->objectid,
12154 fi_key.objectid, fi_key.offset - offset);
12156 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12157 &dbref_key, &path, 0, 0);
12159 found_dbackref = 1;
12163 btrfs_release_path(&path);
12166 * Neither inlined nor EXTENT_DATA_REF found, try
12167 * SHARED_DATA_REF as last chance.
12169 dbref_key.objectid = disk_bytenr;
12170 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12171 dbref_key.offset = eb->start;
12173 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12174 &dbref_key, &path, 0, 0);
12176 found_dbackref = 1;
12182 if (!found_dbackref)
12183 err |= BACKREF_MISSING;
12184 btrfs_release_path(&path);
12185 if (err & BACKREF_MISSING) {
12186 error("data extent[%llu %llu] backref lost",
12187 disk_bytenr, disk_num_bytes);
12193 * Get real tree block level for the case like shared block
12194 * Return >= 0 as tree level
12195 * Return <0 for error
12197 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12199 struct extent_buffer *eb;
12200 struct btrfs_path path;
12201 struct btrfs_key key;
12202 struct btrfs_extent_item *ei;
12209 /* Search extent tree for extent generation and level */
12210 key.objectid = bytenr;
12211 key.type = BTRFS_METADATA_ITEM_KEY;
12212 key.offset = (u64)-1;
12214 btrfs_init_path(&path);
12215 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12218 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12226 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12227 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12228 struct btrfs_extent_item);
12229 flags = btrfs_extent_flags(path.nodes[0], ei);
12230 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12235 /* Get transid for later read_tree_block() check */
12236 transid = btrfs_extent_generation(path.nodes[0], ei);
12238 /* Get backref level as one source */
12239 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12240 backref_level = key.offset;
12242 struct btrfs_tree_block_info *info;
12244 info = (struct btrfs_tree_block_info *)(ei + 1);
12245 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12247 btrfs_release_path(&path);
12249 /* Get level from tree block as an alternative source */
12250 eb = read_tree_block(fs_info, bytenr, transid);
12251 if (!extent_buffer_uptodate(eb)) {
12252 free_extent_buffer(eb);
12255 header_level = btrfs_header_level(eb);
12256 free_extent_buffer(eb);
12258 if (header_level != backref_level)
12260 return header_level;
12263 btrfs_release_path(&path);
12268 * Check if a tree block backref is valid (points to a valid tree block)
12269 * if level == -1, level will be resolved
12270 * Return >0 for any error found and print error message
12272 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12273 u64 bytenr, int level)
12275 struct btrfs_root *root;
12276 struct btrfs_key key;
12277 struct btrfs_path path;
12278 struct extent_buffer *eb;
12279 struct extent_buffer *node;
12280 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12284 /* Query level for level == -1 special case */
12286 level = query_tree_block_level(fs_info, bytenr);
12288 err |= REFERENCER_MISSING;
12292 key.objectid = root_id;
12293 key.type = BTRFS_ROOT_ITEM_KEY;
12294 key.offset = (u64)-1;
12296 root = btrfs_read_fs_root(fs_info, &key);
12297 if (IS_ERR(root)) {
12298 err |= REFERENCER_MISSING;
12302 /* Read out the tree block to get item/node key */
12303 eb = read_tree_block(fs_info, bytenr, 0);
12304 if (!extent_buffer_uptodate(eb)) {
12305 err |= REFERENCER_MISSING;
12306 free_extent_buffer(eb);
12310 /* Empty tree, no need to check key */
12311 if (!btrfs_header_nritems(eb) && !level) {
12312 free_extent_buffer(eb);
12317 btrfs_node_key_to_cpu(eb, &key, 0);
12319 btrfs_item_key_to_cpu(eb, &key, 0);
12321 free_extent_buffer(eb);
12323 btrfs_init_path(&path);
12324 path.lowest_level = level;
12325 /* Search with the first key, to ensure we can reach it */
12326 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12328 err |= REFERENCER_MISSING;
12332 node = path.nodes[level];
12333 if (btrfs_header_bytenr(node) != bytenr) {
12335 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12336 bytenr, nodesize, bytenr,
12337 btrfs_header_bytenr(node));
12338 err |= REFERENCER_MISMATCH;
12340 if (btrfs_header_level(node) != level) {
12342 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12343 bytenr, nodesize, level,
12344 btrfs_header_level(node));
12345 err |= REFERENCER_MISMATCH;
12349 btrfs_release_path(&path);
12351 if (err & REFERENCER_MISSING) {
12353 error("extent [%llu %d] lost referencer (owner: %llu)",
12354 bytenr, nodesize, root_id);
12357 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12358 bytenr, nodesize, root_id, level);
12365 * Check if tree block @eb is tree reloc root.
12366 * Return 0 if it's not or any problem happens
12367 * Return 1 if it's a tree reloc root
12369 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12370 struct extent_buffer *eb)
12372 struct btrfs_root *tree_reloc_root;
12373 struct btrfs_key key;
12374 u64 bytenr = btrfs_header_bytenr(eb);
12375 u64 owner = btrfs_header_owner(eb);
12378 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12379 key.offset = owner;
12380 key.type = BTRFS_ROOT_ITEM_KEY;
12382 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12383 if (IS_ERR(tree_reloc_root))
12386 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12388 btrfs_free_fs_root(tree_reloc_root);
12393 * Check referencer for shared block backref
12394 * If level == -1, this function will resolve the level.
12396 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12397 u64 parent, u64 bytenr, int level)
12399 struct extent_buffer *eb;
12401 int found_parent = 0;
12404 eb = read_tree_block(fs_info, parent, 0);
12405 if (!extent_buffer_uptodate(eb))
12409 level = query_tree_block_level(fs_info, bytenr);
12413 /* It's possible it's a tree reloc root */
12414 if (parent == bytenr) {
12415 if (is_tree_reloc_root(fs_info, eb))
12420 if (level + 1 != btrfs_header_level(eb))
12423 nr = btrfs_header_nritems(eb);
12424 for (i = 0; i < nr; i++) {
12425 if (bytenr == btrfs_node_blockptr(eb, i)) {
12431 free_extent_buffer(eb);
12432 if (!found_parent) {
12434 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12435 bytenr, fs_info->nodesize, parent, level);
12436 return REFERENCER_MISSING;
12442 * Check referencer for normal (inlined) data ref
12443 * If len == 0, it will be resolved by searching in extent tree
12445 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12446 u64 root_id, u64 objectid, u64 offset,
12447 u64 bytenr, u64 len, u32 count)
12449 struct btrfs_root *root;
12450 struct btrfs_root *extent_root = fs_info->extent_root;
12451 struct btrfs_key key;
12452 struct btrfs_path path;
12453 struct extent_buffer *leaf;
12454 struct btrfs_file_extent_item *fi;
12455 u32 found_count = 0;
12460 key.objectid = bytenr;
12461 key.type = BTRFS_EXTENT_ITEM_KEY;
12462 key.offset = (u64)-1;
12464 btrfs_init_path(&path);
12465 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12468 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12471 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12472 if (key.objectid != bytenr ||
12473 key.type != BTRFS_EXTENT_ITEM_KEY)
12476 btrfs_release_path(&path);
12478 key.objectid = root_id;
12479 key.type = BTRFS_ROOT_ITEM_KEY;
12480 key.offset = (u64)-1;
12481 btrfs_init_path(&path);
12483 root = btrfs_read_fs_root(fs_info, &key);
12487 key.objectid = objectid;
12488 key.type = BTRFS_EXTENT_DATA_KEY;
12490 * It can be nasty as data backref offset is
12491 * file offset - file extent offset, which is smaller or
12492 * equal to original backref offset. The only special case is
12493 * overflow. So we need to special check and do further search.
12495 key.offset = offset & (1ULL << 63) ? 0 : offset;
12497 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12502 * Search afterwards to get correct one
12503 * NOTE: As we must do a comprehensive check on the data backref to
12504 * make sure the dref count also matches, we must iterate all file
12505 * extents for that inode.
12508 leaf = path.nodes[0];
12509 slot = path.slots[0];
12511 if (slot >= btrfs_header_nritems(leaf) ||
12512 btrfs_header_owner(leaf) != root_id)
12514 btrfs_item_key_to_cpu(leaf, &key, slot);
12515 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12517 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12519 * Except normal disk bytenr and disk num bytes, we still
12520 * need to do extra check on dbackref offset as
12521 * dbackref offset = file_offset - file_extent_offset
12523 * Also, we must check the leaf owner.
12524 * In case of shared tree blocks (snapshots) we can inherit
12525 * leaves from source snapshot.
12526 * In that case, reference from source snapshot should not
12529 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12530 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12531 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12532 offset && btrfs_header_owner(leaf) == root_id)
12536 ret = btrfs_next_item(root, &path);
12541 btrfs_release_path(&path);
12542 if (found_count != count) {
12544 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12545 bytenr, len, root_id, objectid, offset, count, found_count);
12546 return REFERENCER_MISSING;
12552 * Check if the referencer of a shared data backref exists
12554 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12555 u64 parent, u64 bytenr)
12557 struct extent_buffer *eb;
12558 struct btrfs_key key;
12559 struct btrfs_file_extent_item *fi;
12561 int found_parent = 0;
12564 eb = read_tree_block(fs_info, parent, 0);
12565 if (!extent_buffer_uptodate(eb))
12568 nr = btrfs_header_nritems(eb);
12569 for (i = 0; i < nr; i++) {
12570 btrfs_item_key_to_cpu(eb, &key, i);
12571 if (key.type != BTRFS_EXTENT_DATA_KEY)
12574 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12575 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12578 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12585 free_extent_buffer(eb);
12586 if (!found_parent) {
12587 error("shared extent %llu referencer lost (parent: %llu)",
12589 return REFERENCER_MISSING;
12595 * Only delete backref if REFERENCER_MISSING now
12597 * Returns <0 the extent was deleted
12598 * Returns >0 the backref was deleted but extent still exists, returned value
12599 * means error after repair
12600 * Returns 0 nothing happened
12602 static int repair_extent_item(struct btrfs_trans_handle *trans,
12603 struct btrfs_root *root, struct btrfs_path *path,
12604 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12605 u64 owner, u64 offset, int err)
12607 struct btrfs_key old_key;
12611 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12613 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12614 /* delete the backref */
12615 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12616 num_bytes, parent, root_objectid, owner, offset);
12619 err &= ~REFERENCER_MISSING;
12620 printf("Delete backref in extent [%llu %llu]\n",
12621 bytenr, num_bytes);
12623 error("fail to delete backref in extent [%llu %llu]",
12624 bytenr, num_bytes);
12628 /* btrfs_free_extent may delete the extent */
12629 btrfs_release_path(path);
12630 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12640 * This function will check a given extent item, including its backref and
12641 * itself (like crossing stripe boundary and type)
12643 * Since we don't use extent_record anymore, introduce new error bit
12645 static int check_extent_item(struct btrfs_trans_handle *trans,
12646 struct btrfs_fs_info *fs_info,
12647 struct btrfs_path *path)
12649 struct btrfs_extent_item *ei;
12650 struct btrfs_extent_inline_ref *iref;
12651 struct btrfs_extent_data_ref *dref;
12652 struct extent_buffer *eb = path->nodes[0];
12655 int slot = path->slots[0];
12657 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12658 u32 item_size = btrfs_item_size_nr(eb, slot);
12668 struct btrfs_key key;
12672 btrfs_item_key_to_cpu(eb, &key, slot);
12673 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12674 bytes_used += key.offset;
12675 num_bytes = key.offset;
12677 bytes_used += nodesize;
12678 num_bytes = nodesize;
12681 if (item_size < sizeof(*ei)) {
12683 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12684 * old thing when on disk format is still un-determined.
12685 * No need to care about it anymore
12687 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12691 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12692 flags = btrfs_extent_flags(eb, ei);
12694 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12696 if (metadata && check_crossing_stripes(global_info, key.objectid,
12698 error("bad metadata [%llu, %llu) crossing stripe boundary",
12699 key.objectid, key.objectid + nodesize);
12700 err |= CROSSING_STRIPE_BOUNDARY;
12703 ptr = (unsigned long)(ei + 1);
12705 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12706 /* Old EXTENT_ITEM metadata */
12707 struct btrfs_tree_block_info *info;
12709 info = (struct btrfs_tree_block_info *)ptr;
12710 level = btrfs_tree_block_level(eb, info);
12711 ptr += sizeof(struct btrfs_tree_block_info);
12713 /* New METADATA_ITEM */
12714 level = key.offset;
12716 end = (unsigned long)ei + item_size;
12719 /* Reached extent item end normally */
12723 /* Beyond extent item end, wrong item size */
12725 err |= ITEM_SIZE_MISMATCH;
12726 error("extent item at bytenr %llu slot %d has wrong size",
12735 /* Now check every backref in this extent item */
12736 iref = (struct btrfs_extent_inline_ref *)ptr;
12737 type = btrfs_extent_inline_ref_type(eb, iref);
12738 offset = btrfs_extent_inline_ref_offset(eb, iref);
12740 case BTRFS_TREE_BLOCK_REF_KEY:
12741 root_objectid = offset;
12743 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12747 case BTRFS_SHARED_BLOCK_REF_KEY:
12749 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12753 case BTRFS_EXTENT_DATA_REF_KEY:
12754 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12755 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12756 owner = btrfs_extent_data_ref_objectid(eb, dref);
12757 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12758 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12759 owner_offset, key.objectid, key.offset,
12760 btrfs_extent_data_ref_count(eb, dref));
12763 case BTRFS_SHARED_DATA_REF_KEY:
12765 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12769 error("extent[%llu %d %llu] has unknown ref type: %d",
12770 key.objectid, key.type, key.offset, type);
12771 ret = UNKNOWN_TYPE;
12776 if (err && repair) {
12777 ret = repair_extent_item(trans, fs_info->extent_root, path,
12778 key.objectid, num_bytes, parent, root_objectid,
12779 owner, owner_offset, ret);
12788 ptr += btrfs_extent_inline_ref_size(type);
12796 * Check if a dev extent item is referred correctly by its chunk
12798 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12799 struct extent_buffer *eb, int slot)
12801 struct btrfs_root *chunk_root = fs_info->chunk_root;
12802 struct btrfs_dev_extent *ptr;
12803 struct btrfs_path path;
12804 struct btrfs_key chunk_key;
12805 struct btrfs_key devext_key;
12806 struct btrfs_chunk *chunk;
12807 struct extent_buffer *l;
12811 int found_chunk = 0;
12814 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12815 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12816 length = btrfs_dev_extent_length(eb, ptr);
12818 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12819 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12820 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12822 btrfs_init_path(&path);
12823 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12828 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12829 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12834 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12837 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12838 for (i = 0; i < num_stripes; i++) {
12839 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12840 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12842 if (devid == devext_key.objectid &&
12843 offset == devext_key.offset) {
12849 btrfs_release_path(&path);
12850 if (!found_chunk) {
12852 "device extent[%llu, %llu, %llu] did not find the related chunk",
12853 devext_key.objectid, devext_key.offset, length);
12854 return REFERENCER_MISSING;
12860 * Check if the used space is correct with the dev item
12862 static int check_dev_item(struct btrfs_fs_info *fs_info,
12863 struct extent_buffer *eb, int slot)
12865 struct btrfs_root *dev_root = fs_info->dev_root;
12866 struct btrfs_dev_item *dev_item;
12867 struct btrfs_path path;
12868 struct btrfs_key key;
12869 struct btrfs_dev_extent *ptr;
12876 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12877 dev_id = btrfs_device_id(eb, dev_item);
12878 used = btrfs_device_bytes_used(eb, dev_item);
12879 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12881 key.objectid = dev_id;
12882 key.type = BTRFS_DEV_EXTENT_KEY;
12885 btrfs_init_path(&path);
12886 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12888 btrfs_item_key_to_cpu(eb, &key, slot);
12889 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12890 key.objectid, key.type, key.offset);
12891 btrfs_release_path(&path);
12892 return REFERENCER_MISSING;
12895 /* Iterate dev_extents to calculate the used space of a device */
12897 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12900 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12901 if (key.objectid > dev_id)
12903 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12906 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12907 struct btrfs_dev_extent);
12908 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12910 ret = btrfs_next_item(dev_root, &path);
12914 btrfs_release_path(&path);
12916 if (used != total) {
12917 btrfs_item_key_to_cpu(eb, &key, slot);
12919 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12920 total, used, BTRFS_ROOT_TREE_OBJECTID,
12921 BTRFS_DEV_EXTENT_KEY, dev_id);
12922 return ACCOUNTING_MISMATCH;
12924 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12930 * Check a block group item with its referener (chunk) and its used space
12931 * with extent/metadata item
12933 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12934 struct extent_buffer *eb, int slot)
12936 struct btrfs_root *extent_root = fs_info->extent_root;
12937 struct btrfs_root *chunk_root = fs_info->chunk_root;
12938 struct btrfs_block_group_item *bi;
12939 struct btrfs_block_group_item bg_item;
12940 struct btrfs_path path;
12941 struct btrfs_key bg_key;
12942 struct btrfs_key chunk_key;
12943 struct btrfs_key extent_key;
12944 struct btrfs_chunk *chunk;
12945 struct extent_buffer *leaf;
12946 struct btrfs_extent_item *ei;
12947 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12955 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12956 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12957 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12958 used = btrfs_block_group_used(&bg_item);
12959 bg_flags = btrfs_block_group_flags(&bg_item);
12961 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12962 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12963 chunk_key.offset = bg_key.objectid;
12965 btrfs_init_path(&path);
12966 /* Search for the referencer chunk */
12967 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12970 "block group[%llu %llu] did not find the related chunk item",
12971 bg_key.objectid, bg_key.offset);
12972 err |= REFERENCER_MISSING;
12974 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12975 struct btrfs_chunk);
12976 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12979 "block group[%llu %llu] related chunk item length does not match",
12980 bg_key.objectid, bg_key.offset);
12981 err |= REFERENCER_MISMATCH;
12984 btrfs_release_path(&path);
12986 /* Search from the block group bytenr */
12987 extent_key.objectid = bg_key.objectid;
12988 extent_key.type = 0;
12989 extent_key.offset = 0;
12991 btrfs_init_path(&path);
12992 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12996 /* Iterate extent tree to account used space */
12998 leaf = path.nodes[0];
13000 /* Search slot can point to the last item beyond leaf nritems */
13001 if (path.slots[0] >= btrfs_header_nritems(leaf))
13004 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
13005 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
13008 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
13009 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
13011 if (extent_key.objectid < bg_key.objectid)
13014 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
13017 total += extent_key.offset;
13019 ei = btrfs_item_ptr(leaf, path.slots[0],
13020 struct btrfs_extent_item);
13021 flags = btrfs_extent_flags(leaf, ei);
13022 if (flags & BTRFS_EXTENT_FLAG_DATA) {
13023 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
13025 "bad extent[%llu, %llu) type mismatch with chunk",
13026 extent_key.objectid,
13027 extent_key.objectid + extent_key.offset);
13028 err |= CHUNK_TYPE_MISMATCH;
13030 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13031 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13032 BTRFS_BLOCK_GROUP_METADATA))) {
13034 "bad extent[%llu, %llu) type mismatch with chunk",
13035 extent_key.objectid,
13036 extent_key.objectid + nodesize);
13037 err |= CHUNK_TYPE_MISMATCH;
13041 ret = btrfs_next_item(extent_root, &path);
13047 btrfs_release_path(&path);
13049 if (total != used) {
13051 "block group[%llu %llu] used %llu but extent items used %llu",
13052 bg_key.objectid, bg_key.offset, used, total);
13053 err |= BG_ACCOUNTING_ERROR;
13059 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13060 * FIXME: We still need to repair error of dev_item.
13062 * Returns error after repair.
13064 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13065 struct btrfs_root *chunk_root,
13066 struct btrfs_path *path, int err)
13068 struct btrfs_chunk *chunk;
13069 struct btrfs_key chunk_key;
13070 struct extent_buffer *eb = path->nodes[0];
13072 int slot = path->slots[0];
13076 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13077 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13079 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13080 type = btrfs_chunk_type(path->nodes[0], chunk);
13081 length = btrfs_chunk_length(eb, chunk);
13083 if (err & REFERENCER_MISSING) {
13084 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13085 type, chunk_key.objectid, chunk_key.offset, length);
13087 error("fail to add block group item[%llu %llu]",
13088 chunk_key.offset, length);
13091 err &= ~REFERENCER_MISSING;
13092 printf("Added block group item[%llu %llu]\n",
13093 chunk_key.offset, length);
13102 * Check a chunk item.
13103 * Including checking all referred dev_extents and block group
13105 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13106 struct extent_buffer *eb, int slot)
13108 struct btrfs_root *extent_root = fs_info->extent_root;
13109 struct btrfs_root *dev_root = fs_info->dev_root;
13110 struct btrfs_path path;
13111 struct btrfs_key chunk_key;
13112 struct btrfs_key bg_key;
13113 struct btrfs_key devext_key;
13114 struct btrfs_chunk *chunk;
13115 struct extent_buffer *leaf;
13116 struct btrfs_block_group_item *bi;
13117 struct btrfs_block_group_item bg_item;
13118 struct btrfs_dev_extent *ptr;
13130 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13131 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13132 length = btrfs_chunk_length(eb, chunk);
13133 chunk_end = chunk_key.offset + length;
13134 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13137 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13139 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13142 type = btrfs_chunk_type(eb, chunk);
13144 bg_key.objectid = chunk_key.offset;
13145 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13146 bg_key.offset = length;
13148 btrfs_init_path(&path);
13149 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13152 "chunk[%llu %llu) did not find the related block group item",
13153 chunk_key.offset, chunk_end);
13154 err |= REFERENCER_MISSING;
13156 leaf = path.nodes[0];
13157 bi = btrfs_item_ptr(leaf, path.slots[0],
13158 struct btrfs_block_group_item);
13159 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13161 if (btrfs_block_group_flags(&bg_item) != type) {
13163 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13164 chunk_key.offset, chunk_end, type,
13165 btrfs_block_group_flags(&bg_item));
13166 err |= REFERENCER_MISSING;
13170 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13171 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13172 for (i = 0; i < num_stripes; i++) {
13173 btrfs_release_path(&path);
13174 btrfs_init_path(&path);
13175 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13176 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13177 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13179 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13182 goto not_match_dev;
13184 leaf = path.nodes[0];
13185 ptr = btrfs_item_ptr(leaf, path.slots[0],
13186 struct btrfs_dev_extent);
13187 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13188 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13189 if (objectid != chunk_key.objectid ||
13190 offset != chunk_key.offset ||
13191 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13192 goto not_match_dev;
13195 err |= BACKREF_MISSING;
13197 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13198 chunk_key.objectid, chunk_end, i);
13201 btrfs_release_path(&path);
13206 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13207 struct btrfs_root *root,
13208 struct btrfs_path *path)
13210 struct btrfs_key key;
13213 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13214 btrfs_release_path(path);
13215 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13221 ret = btrfs_del_item(trans, root, path);
13225 if (path->slots[0] == 0)
13226 btrfs_prev_leaf(root, path);
13231 error("failed to delete root %llu item[%llu, %u, %llu]",
13232 root->objectid, key.objectid, key.type, key.offset);
13234 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13235 root->objectid, key.objectid, key.type, key.offset);
13240 * Main entry function to check known items and update related accounting info
13242 static int check_leaf_items(struct btrfs_trans_handle *trans,
13243 struct btrfs_root *root, struct btrfs_path *path,
13244 struct node_refs *nrefs, int account_bytes)
13246 struct btrfs_fs_info *fs_info = root->fs_info;
13247 struct btrfs_key key;
13248 struct extent_buffer *eb;
13251 struct btrfs_extent_data_ref *dref;
13256 eb = path->nodes[0];
13257 slot = path->slots[0];
13258 if (slot >= btrfs_header_nritems(eb)) {
13260 error("empty leaf [%llu %u] root %llu", eb->start,
13261 root->fs_info->nodesize, root->objectid);
13267 btrfs_item_key_to_cpu(eb, &key, slot);
13271 case BTRFS_EXTENT_DATA_KEY:
13272 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13274 ret = repair_extent_data_item(trans, root, path, nrefs,
13278 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13279 ret = check_block_group_item(fs_info, eb, slot);
13281 ret & REFERENCER_MISSING)
13282 ret = delete_extent_tree_item(trans, root, path);
13285 case BTRFS_DEV_ITEM_KEY:
13286 ret = check_dev_item(fs_info, eb, slot);
13289 case BTRFS_CHUNK_ITEM_KEY:
13290 ret = check_chunk_item(fs_info, eb, slot);
13292 ret = repair_chunk_item(trans, root, path, ret);
13295 case BTRFS_DEV_EXTENT_KEY:
13296 ret = check_dev_extent_item(fs_info, eb, slot);
13299 case BTRFS_EXTENT_ITEM_KEY:
13300 case BTRFS_METADATA_ITEM_KEY:
13301 ret = check_extent_item(trans, fs_info, path);
13304 case BTRFS_EXTENT_CSUM_KEY:
13305 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13308 case BTRFS_TREE_BLOCK_REF_KEY:
13309 ret = check_tree_block_backref(fs_info, key.offset,
13312 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13313 ret = delete_extent_tree_item(trans, root, path);
13316 case BTRFS_EXTENT_DATA_REF_KEY:
13317 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13318 ret = check_extent_data_backref(fs_info,
13319 btrfs_extent_data_ref_root(eb, dref),
13320 btrfs_extent_data_ref_objectid(eb, dref),
13321 btrfs_extent_data_ref_offset(eb, dref),
13323 btrfs_extent_data_ref_count(eb, dref));
13325 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13326 ret = delete_extent_tree_item(trans, root, path);
13329 case BTRFS_SHARED_BLOCK_REF_KEY:
13330 ret = check_shared_block_backref(fs_info, key.offset,
13333 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13334 ret = delete_extent_tree_item(trans, root, path);
13337 case BTRFS_SHARED_DATA_REF_KEY:
13338 ret = check_shared_data_backref(fs_info, key.offset,
13341 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13342 ret = delete_extent_tree_item(trans, root, path);
13356 * Low memory usage version check_chunks_and_extents.
13358 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13360 struct btrfs_trans_handle *trans = NULL;
13361 struct btrfs_path path;
13362 struct btrfs_key old_key;
13363 struct btrfs_key key;
13364 struct btrfs_root *root1;
13365 struct btrfs_root *root;
13366 struct btrfs_root *cur_root;
13370 root = fs_info->fs_root;
13373 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13374 if (IS_ERR(trans)) {
13375 error("failed to start transaction before check");
13376 return PTR_ERR(trans);
13380 root1 = root->fs_info->chunk_root;
13381 ret = check_btrfs_root(trans, root1, 0, 1);
13384 root1 = root->fs_info->tree_root;
13385 ret = check_btrfs_root(trans, root1, 0, 1);
13388 btrfs_init_path(&path);
13389 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13391 key.type = BTRFS_ROOT_ITEM_KEY;
13393 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13395 error("cannot find extent tree in tree_root");
13400 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13401 if (key.type != BTRFS_ROOT_ITEM_KEY)
13404 key.offset = (u64)-1;
13406 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13407 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13410 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13411 if (IS_ERR(cur_root) || !cur_root) {
13412 error("failed to read tree: %lld", key.objectid);
13416 ret = check_btrfs_root(trans, cur_root, 0, 1);
13419 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13420 btrfs_free_fs_root(cur_root);
13422 btrfs_release_path(&path);
13423 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13424 &old_key, &path, 0, 0);
13428 ret = btrfs_next_item(root1, &path);
13434 /* if repair, update block accounting */
13436 ret = btrfs_fix_block_accounting(trans, root);
13440 err &= ~BG_ACCOUNTING_ERROR;
13444 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13446 btrfs_release_path(&path);
13451 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13455 if (!ctx.progress_enabled)
13456 fprintf(stderr, "checking extents\n");
13457 if (check_mode == CHECK_MODE_LOWMEM)
13458 ret = check_chunks_and_extents_v2(fs_info);
13460 ret = check_chunks_and_extents(fs_info);
13462 /* Also repair device size related problems */
13463 if (repair && !ret) {
13464 ret = btrfs_fix_device_and_super_size(fs_info);
13471 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13472 struct btrfs_root *root, int overwrite)
13474 struct extent_buffer *c;
13475 struct extent_buffer *old = root->node;
13478 struct btrfs_disk_key disk_key = {0,0,0};
13484 extent_buffer_get(c);
13487 c = btrfs_alloc_free_block(trans, root,
13488 root->fs_info->nodesize,
13489 root->root_key.objectid,
13490 &disk_key, level, 0, 0);
13493 extent_buffer_get(c);
13497 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13498 btrfs_set_header_level(c, level);
13499 btrfs_set_header_bytenr(c, c->start);
13500 btrfs_set_header_generation(c, trans->transid);
13501 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13502 btrfs_set_header_owner(c, root->root_key.objectid);
13504 write_extent_buffer(c, root->fs_info->fsid,
13505 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13507 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13508 btrfs_header_chunk_tree_uuid(c),
13511 btrfs_mark_buffer_dirty(c);
13513 * this case can happen in the following case:
13515 * 1.overwrite previous root.
13517 * 2.reinit reloc data root, this is because we skip pin
13518 * down reloc data tree before which means we can allocate
13519 * same block bytenr here.
13521 if (old->start == c->start) {
13522 btrfs_set_root_generation(&root->root_item,
13524 root->root_item.level = btrfs_header_level(root->node);
13525 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13526 &root->root_key, &root->root_item);
13528 free_extent_buffer(c);
13532 free_extent_buffer(old);
13534 add_root_to_dirty_list(root);
13538 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13539 struct extent_buffer *eb, int tree_root)
13541 struct extent_buffer *tmp;
13542 struct btrfs_root_item *ri;
13543 struct btrfs_key key;
13545 int level = btrfs_header_level(eb);
13551 * If we have pinned this block before, don't pin it again.
13552 * This can not only avoid forever loop with broken filesystem
13553 * but also give us some speedups.
13555 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13556 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13559 btrfs_pin_extent(fs_info, eb->start, eb->len);
13561 nritems = btrfs_header_nritems(eb);
13562 for (i = 0; i < nritems; i++) {
13564 btrfs_item_key_to_cpu(eb, &key, i);
13565 if (key.type != BTRFS_ROOT_ITEM_KEY)
13567 /* Skip the extent root and reloc roots */
13568 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13569 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13570 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13572 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13573 bytenr = btrfs_disk_root_bytenr(eb, ri);
13576 * If at any point we start needing the real root we
13577 * will have to build a stump root for the root we are
13578 * in, but for now this doesn't actually use the root so
13579 * just pass in extent_root.
13581 tmp = read_tree_block(fs_info, bytenr, 0);
13582 if (!extent_buffer_uptodate(tmp)) {
13583 fprintf(stderr, "Error reading root block\n");
13586 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13587 free_extent_buffer(tmp);
13591 bytenr = btrfs_node_blockptr(eb, i);
13593 /* If we aren't the tree root don't read the block */
13594 if (level == 1 && !tree_root) {
13595 btrfs_pin_extent(fs_info, bytenr,
13596 fs_info->nodesize);
13600 tmp = read_tree_block(fs_info, bytenr, 0);
13601 if (!extent_buffer_uptodate(tmp)) {
13602 fprintf(stderr, "Error reading tree block\n");
13605 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13606 free_extent_buffer(tmp);
13615 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13619 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13623 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13626 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13628 struct btrfs_block_group_cache *cache;
13629 struct btrfs_path path;
13630 struct extent_buffer *leaf;
13631 struct btrfs_chunk *chunk;
13632 struct btrfs_key key;
13636 btrfs_init_path(&path);
13638 key.type = BTRFS_CHUNK_ITEM_KEY;
13640 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13642 btrfs_release_path(&path);
13647 * We do this in case the block groups were screwed up and had alloc
13648 * bits that aren't actually set on the chunks. This happens with
13649 * restored images every time and could happen in real life I guess.
13651 fs_info->avail_data_alloc_bits = 0;
13652 fs_info->avail_metadata_alloc_bits = 0;
13653 fs_info->avail_system_alloc_bits = 0;
13655 /* First we need to create the in-memory block groups */
13657 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13658 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13660 btrfs_release_path(&path);
13668 leaf = path.nodes[0];
13669 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13670 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13675 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13676 btrfs_add_block_group(fs_info, 0,
13677 btrfs_chunk_type(leaf, chunk),
13678 key.objectid, key.offset,
13679 btrfs_chunk_length(leaf, chunk));
13680 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13681 key.offset + btrfs_chunk_length(leaf, chunk));
13686 cache = btrfs_lookup_first_block_group(fs_info, start);
13690 start = cache->key.objectid + cache->key.offset;
13693 btrfs_release_path(&path);
13697 static int reset_balance(struct btrfs_trans_handle *trans,
13698 struct btrfs_fs_info *fs_info)
13700 struct btrfs_root *root = fs_info->tree_root;
13701 struct btrfs_path path;
13702 struct extent_buffer *leaf;
13703 struct btrfs_key key;
13704 int del_slot, del_nr = 0;
13708 btrfs_init_path(&path);
13709 key.objectid = BTRFS_BALANCE_OBJECTID;
13710 key.type = BTRFS_BALANCE_ITEM_KEY;
13712 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13717 goto reinit_data_reloc;
13722 ret = btrfs_del_item(trans, root, &path);
13725 btrfs_release_path(&path);
13727 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13728 key.type = BTRFS_ROOT_ITEM_KEY;
13730 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13734 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13739 ret = btrfs_del_items(trans, root, &path,
13746 btrfs_release_path(&path);
13749 ret = btrfs_search_slot(trans, root, &key, &path,
13756 leaf = path.nodes[0];
13757 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13758 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13760 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13765 del_slot = path.slots[0];
13774 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13778 btrfs_release_path(&path);
13781 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13782 key.type = BTRFS_ROOT_ITEM_KEY;
13783 key.offset = (u64)-1;
13784 root = btrfs_read_fs_root(fs_info, &key);
13785 if (IS_ERR(root)) {
13786 fprintf(stderr, "Error reading data reloc tree\n");
13787 ret = PTR_ERR(root);
13790 record_root_in_trans(trans, root);
13791 ret = btrfs_fsck_reinit_root(trans, root, 0);
13794 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13796 btrfs_release_path(&path);
13800 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13801 struct btrfs_fs_info *fs_info)
13807 * The only reason we don't do this is because right now we're just
13808 * walking the trees we find and pinning down their bytes, we don't look
13809 * at any of the leaves. In order to do mixed groups we'd have to check
13810 * the leaves of any fs roots and pin down the bytes for any file
13811 * extents we find. Not hard but why do it if we don't have to?
13813 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13814 fprintf(stderr, "We don't support re-initing the extent tree "
13815 "for mixed block groups yet, please notify a btrfs "
13816 "developer you want to do this so they can add this "
13817 "functionality.\n");
13822 * first we need to walk all of the trees except the extent tree and pin
13823 * down the bytes that are in use so we don't overwrite any existing
13826 ret = pin_metadata_blocks(fs_info);
13828 fprintf(stderr, "error pinning down used bytes\n");
13833 * Need to drop all the block groups since we're going to recreate all
13836 btrfs_free_block_groups(fs_info);
13837 ret = reset_block_groups(fs_info);
13839 fprintf(stderr, "error resetting the block groups\n");
13843 /* Ok we can allocate now, reinit the extent root */
13844 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13846 fprintf(stderr, "extent root initialization failed\n");
13848 * When the transaction code is updated we should end the
13849 * transaction, but for now progs only knows about commit so
13850 * just return an error.
13856 * Now we have all the in-memory block groups setup so we can make
13857 * allocations properly, and the metadata we care about is safe since we
13858 * pinned all of it above.
13861 struct btrfs_block_group_cache *cache;
13863 cache = btrfs_lookup_first_block_group(fs_info, start);
13866 start = cache->key.objectid + cache->key.offset;
13867 ret = btrfs_insert_item(trans, fs_info->extent_root,
13868 &cache->key, &cache->item,
13869 sizeof(cache->item));
13871 fprintf(stderr, "Error adding block group\n");
13874 btrfs_extent_post_op(trans, fs_info->extent_root);
13877 ret = reset_balance(trans, fs_info);
13879 fprintf(stderr, "error resetting the pending balance\n");
13884 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13886 struct btrfs_path path;
13887 struct btrfs_trans_handle *trans;
13888 struct btrfs_key key;
13891 printf("Recowing metadata block %llu\n", eb->start);
13892 key.objectid = btrfs_header_owner(eb);
13893 key.type = BTRFS_ROOT_ITEM_KEY;
13894 key.offset = (u64)-1;
13896 root = btrfs_read_fs_root(root->fs_info, &key);
13897 if (IS_ERR(root)) {
13898 fprintf(stderr, "Couldn't find owner root %llu\n",
13900 return PTR_ERR(root);
13903 trans = btrfs_start_transaction(root, 1);
13905 return PTR_ERR(trans);
13907 btrfs_init_path(&path);
13908 path.lowest_level = btrfs_header_level(eb);
13909 if (path.lowest_level)
13910 btrfs_node_key_to_cpu(eb, &key, 0);
13912 btrfs_item_key_to_cpu(eb, &key, 0);
13914 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13915 btrfs_commit_transaction(trans, root);
13916 btrfs_release_path(&path);
13920 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13922 struct btrfs_path path;
13923 struct btrfs_trans_handle *trans;
13924 struct btrfs_key key;
13927 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13928 bad->key.type, bad->key.offset);
13929 key.objectid = bad->root_id;
13930 key.type = BTRFS_ROOT_ITEM_KEY;
13931 key.offset = (u64)-1;
13933 root = btrfs_read_fs_root(root->fs_info, &key);
13934 if (IS_ERR(root)) {
13935 fprintf(stderr, "Couldn't find owner root %llu\n",
13937 return PTR_ERR(root);
13940 trans = btrfs_start_transaction(root, 1);
13942 return PTR_ERR(trans);
13944 btrfs_init_path(&path);
13945 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13951 ret = btrfs_del_item(trans, root, &path);
13953 btrfs_commit_transaction(trans, root);
13954 btrfs_release_path(&path);
13958 static int zero_log_tree(struct btrfs_root *root)
13960 struct btrfs_trans_handle *trans;
13963 trans = btrfs_start_transaction(root, 1);
13964 if (IS_ERR(trans)) {
13965 ret = PTR_ERR(trans);
13968 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13969 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13970 ret = btrfs_commit_transaction(trans, root);
13974 static int populate_csum(struct btrfs_trans_handle *trans,
13975 struct btrfs_root *csum_root, char *buf, u64 start,
13978 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13983 while (offset < len) {
13984 sectorsize = fs_info->sectorsize;
13985 ret = read_extent_data(fs_info, buf, start + offset,
13989 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13990 start + offset, buf, sectorsize);
13993 offset += sectorsize;
13998 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13999 struct btrfs_root *csum_root,
14000 struct btrfs_root *cur_root)
14002 struct btrfs_path path;
14003 struct btrfs_key key;
14004 struct extent_buffer *node;
14005 struct btrfs_file_extent_item *fi;
14012 buf = malloc(cur_root->fs_info->sectorsize);
14016 btrfs_init_path(&path);
14020 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
14023 /* Iterate all regular file extents and fill its csum */
14025 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
14027 if (key.type != BTRFS_EXTENT_DATA_KEY)
14029 node = path.nodes[0];
14030 slot = path.slots[0];
14031 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14032 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14034 start = btrfs_file_extent_disk_bytenr(node, fi);
14035 len = btrfs_file_extent_disk_num_bytes(node, fi);
14037 ret = populate_csum(trans, csum_root, buf, start, len);
14038 if (ret == -EEXIST)
14044 * TODO: if next leaf is corrupted, jump to nearest next valid
14047 ret = btrfs_next_item(cur_root, &path);
14057 btrfs_release_path(&path);
14062 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14063 struct btrfs_root *csum_root)
14065 struct btrfs_fs_info *fs_info = csum_root->fs_info;
14066 struct btrfs_path path;
14067 struct btrfs_root *tree_root = fs_info->tree_root;
14068 struct btrfs_root *cur_root;
14069 struct extent_buffer *node;
14070 struct btrfs_key key;
14074 btrfs_init_path(&path);
14075 key.objectid = BTRFS_FS_TREE_OBJECTID;
14077 key.type = BTRFS_ROOT_ITEM_KEY;
14078 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14087 node = path.nodes[0];
14088 slot = path.slots[0];
14089 btrfs_item_key_to_cpu(node, &key, slot);
14090 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14092 if (key.type != BTRFS_ROOT_ITEM_KEY)
14094 if (!is_fstree(key.objectid))
14096 key.offset = (u64)-1;
14098 cur_root = btrfs_read_fs_root(fs_info, &key);
14099 if (IS_ERR(cur_root) || !cur_root) {
14100 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14104 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14109 ret = btrfs_next_item(tree_root, &path);
14119 btrfs_release_path(&path);
14123 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14124 struct btrfs_root *csum_root)
14126 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14127 struct btrfs_path path;
14128 struct btrfs_extent_item *ei;
14129 struct extent_buffer *leaf;
14131 struct btrfs_key key;
14134 btrfs_init_path(&path);
14136 key.type = BTRFS_EXTENT_ITEM_KEY;
14138 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14140 btrfs_release_path(&path);
14144 buf = malloc(csum_root->fs_info->sectorsize);
14146 btrfs_release_path(&path);
14151 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14152 ret = btrfs_next_leaf(extent_root, &path);
14160 leaf = path.nodes[0];
14162 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14163 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14168 ei = btrfs_item_ptr(leaf, path.slots[0],
14169 struct btrfs_extent_item);
14170 if (!(btrfs_extent_flags(leaf, ei) &
14171 BTRFS_EXTENT_FLAG_DATA)) {
14176 ret = populate_csum(trans, csum_root, buf, key.objectid,
14183 btrfs_release_path(&path);
14189 * Recalculate the csum and put it into the csum tree.
14191 * Extent tree init will wipe out all the extent info, so in that case, we
14192 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14193 * will use fs/subvol trees to init the csum tree.
14195 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14196 struct btrfs_root *csum_root,
14197 int search_fs_tree)
14199 if (search_fs_tree)
14200 return fill_csum_tree_from_fs(trans, csum_root);
14202 return fill_csum_tree_from_extent(trans, csum_root);
14205 static void free_roots_info_cache(void)
14207 if (!roots_info_cache)
14210 while (!cache_tree_empty(roots_info_cache)) {
14211 struct cache_extent *entry;
14212 struct root_item_info *rii;
14214 entry = first_cache_extent(roots_info_cache);
14217 remove_cache_extent(roots_info_cache, entry);
14218 rii = container_of(entry, struct root_item_info, cache_extent);
14222 free(roots_info_cache);
14223 roots_info_cache = NULL;
14226 static int build_roots_info_cache(struct btrfs_fs_info *info)
14229 struct btrfs_key key;
14230 struct extent_buffer *leaf;
14231 struct btrfs_path path;
14233 if (!roots_info_cache) {
14234 roots_info_cache = malloc(sizeof(*roots_info_cache));
14235 if (!roots_info_cache)
14237 cache_tree_init(roots_info_cache);
14240 btrfs_init_path(&path);
14242 key.type = BTRFS_EXTENT_ITEM_KEY;
14244 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14247 leaf = path.nodes[0];
14250 struct btrfs_key found_key;
14251 struct btrfs_extent_item *ei;
14252 struct btrfs_extent_inline_ref *iref;
14253 int slot = path.slots[0];
14258 struct cache_extent *entry;
14259 struct root_item_info *rii;
14261 if (slot >= btrfs_header_nritems(leaf)) {
14262 ret = btrfs_next_leaf(info->extent_root, &path);
14269 leaf = path.nodes[0];
14270 slot = path.slots[0];
14273 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14275 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14276 found_key.type != BTRFS_METADATA_ITEM_KEY)
14279 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14280 flags = btrfs_extent_flags(leaf, ei);
14282 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14283 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14286 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14287 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14288 level = found_key.offset;
14290 struct btrfs_tree_block_info *binfo;
14292 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14293 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14294 level = btrfs_tree_block_level(leaf, binfo);
14298 * For a root extent, it must be of the following type and the
14299 * first (and only one) iref in the item.
14301 type = btrfs_extent_inline_ref_type(leaf, iref);
14302 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14305 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14306 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14308 rii = malloc(sizeof(struct root_item_info));
14313 rii->cache_extent.start = root_id;
14314 rii->cache_extent.size = 1;
14315 rii->level = (u8)-1;
14316 entry = &rii->cache_extent;
14317 ret = insert_cache_extent(roots_info_cache, entry);
14320 rii = container_of(entry, struct root_item_info,
14324 ASSERT(rii->cache_extent.start == root_id);
14325 ASSERT(rii->cache_extent.size == 1);
14327 if (level > rii->level || rii->level == (u8)-1) {
14328 rii->level = level;
14329 rii->bytenr = found_key.objectid;
14330 rii->gen = btrfs_extent_generation(leaf, ei);
14331 rii->node_count = 1;
14332 } else if (level == rii->level) {
14340 btrfs_release_path(&path);
14345 static int maybe_repair_root_item(struct btrfs_path *path,
14346 const struct btrfs_key *root_key,
14347 const int read_only_mode)
14349 const u64 root_id = root_key->objectid;
14350 struct cache_extent *entry;
14351 struct root_item_info *rii;
14352 struct btrfs_root_item ri;
14353 unsigned long offset;
14355 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14358 "Error: could not find extent items for root %llu\n",
14359 root_key->objectid);
14363 rii = container_of(entry, struct root_item_info, cache_extent);
14364 ASSERT(rii->cache_extent.start == root_id);
14365 ASSERT(rii->cache_extent.size == 1);
14367 if (rii->node_count != 1) {
14369 "Error: could not find btree root extent for root %llu\n",
14374 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14375 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14377 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14378 btrfs_root_level(&ri) != rii->level ||
14379 btrfs_root_generation(&ri) != rii->gen) {
14382 * If we're in repair mode but our caller told us to not update
14383 * the root item, i.e. just check if it needs to be updated, don't
14384 * print this message, since the caller will call us again shortly
14385 * for the same root item without read only mode (the caller will
14386 * open a transaction first).
14388 if (!(read_only_mode && repair))
14390 "%sroot item for root %llu,"
14391 " current bytenr %llu, current gen %llu, current level %u,"
14392 " new bytenr %llu, new gen %llu, new level %u\n",
14393 (read_only_mode ? "" : "fixing "),
14395 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14396 btrfs_root_level(&ri),
14397 rii->bytenr, rii->gen, rii->level);
14399 if (btrfs_root_generation(&ri) > rii->gen) {
14401 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14402 root_id, btrfs_root_generation(&ri), rii->gen);
14406 if (!read_only_mode) {
14407 btrfs_set_root_bytenr(&ri, rii->bytenr);
14408 btrfs_set_root_level(&ri, rii->level);
14409 btrfs_set_root_generation(&ri, rii->gen);
14410 write_extent_buffer(path->nodes[0], &ri,
14411 offset, sizeof(ri));
14421 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14422 * caused read-only snapshots to be corrupted if they were created at a moment
14423 * when the source subvolume/snapshot had orphan items. The issue was that the
14424 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14425 * node instead of the post orphan cleanup root node.
14426 * So this function, and its callees, just detects and fixes those cases. Even
14427 * though the regression was for read-only snapshots, this function applies to
14428 * any snapshot/subvolume root.
14429 * This must be run before any other repair code - not doing it so, makes other
14430 * repair code delete or modify backrefs in the extent tree for example, which
14431 * will result in an inconsistent fs after repairing the root items.
14433 static int repair_root_items(struct btrfs_fs_info *info)
14435 struct btrfs_path path;
14436 struct btrfs_key key;
14437 struct extent_buffer *leaf;
14438 struct btrfs_trans_handle *trans = NULL;
14441 int need_trans = 0;
14443 btrfs_init_path(&path);
14445 ret = build_roots_info_cache(info);
14449 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14450 key.type = BTRFS_ROOT_ITEM_KEY;
14455 * Avoid opening and committing transactions if a leaf doesn't have
14456 * any root items that need to be fixed, so that we avoid rotating
14457 * backup roots unnecessarily.
14460 trans = btrfs_start_transaction(info->tree_root, 1);
14461 if (IS_ERR(trans)) {
14462 ret = PTR_ERR(trans);
14467 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14471 leaf = path.nodes[0];
14474 struct btrfs_key found_key;
14476 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14477 int no_more_keys = find_next_key(&path, &key);
14479 btrfs_release_path(&path);
14481 ret = btrfs_commit_transaction(trans,
14493 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14495 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14497 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14500 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14504 if (!trans && repair) {
14507 btrfs_release_path(&path);
14517 free_roots_info_cache();
14518 btrfs_release_path(&path);
14520 btrfs_commit_transaction(trans, info->tree_root);
14527 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14529 struct btrfs_trans_handle *trans;
14530 struct btrfs_block_group_cache *bg_cache;
14534 /* Clear all free space cache inodes and its extent data */
14536 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14539 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14542 current = bg_cache->key.objectid + bg_cache->key.offset;
14545 /* Don't forget to set cache_generation to -1 */
14546 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14547 if (IS_ERR(trans)) {
14548 error("failed to update super block cache generation");
14549 return PTR_ERR(trans);
14551 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14552 btrfs_commit_transaction(trans, fs_info->tree_root);
14557 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14562 if (clear_version == 1) {
14563 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14565 "free space cache v2 detected, use --clear-space-cache v2");
14569 printf("Clearing free space cache\n");
14570 ret = clear_free_space_cache(fs_info);
14572 error("failed to clear free space cache");
14575 printf("Free space cache cleared\n");
14577 } else if (clear_version == 2) {
14578 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14579 printf("no free space cache v2 to clear\n");
14583 printf("Clear free space cache v2\n");
14584 ret = btrfs_clear_free_space_tree(fs_info);
14586 error("failed to clear free space cache v2: %d", ret);
14589 printf("free space cache v2 cleared\n");
14596 const char * const cmd_check_usage[] = {
14597 "btrfs check [options] <device>",
14598 "Check structural integrity of a filesystem (unmounted).",
14599 "Check structural integrity of an unmounted filesystem. Verify internal",
14600 "trees' consistency and item connectivity. In the repair mode try to",
14601 "fix the problems found. ",
14602 "WARNING: the repair mode is considered dangerous",
14604 "-s|--super <superblock> use this superblock copy",
14605 "-b|--backup use the first valid backup root copy",
14606 "--force skip mount checks, repair is not possible",
14607 "--repair try to repair the filesystem",
14608 "--readonly run in read-only mode (default)",
14609 "--init-csum-tree create a new CRC tree",
14610 "--init-extent-tree create a new extent tree",
14611 "--mode <MODE> allows choice of memory/IO trade-offs",
14612 " where MODE is one of:",
14613 " original - read inodes and extents to memory (requires",
14614 " more memory, does less IO)",
14615 " lowmem - try to use less memory but read blocks again",
14617 "--check-data-csum verify checksums of data blocks",
14618 "-Q|--qgroup-report print a report on qgroup consistency",
14619 "-E|--subvol-extents <subvolid>",
14620 " print subvolume extents and sharing state",
14621 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14622 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14623 "-p|--progress indicate progress",
14624 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14628 int cmd_check(int argc, char **argv)
14630 struct cache_tree root_cache;
14631 struct btrfs_root *root;
14632 struct btrfs_fs_info *info;
14635 u64 tree_root_bytenr = 0;
14636 u64 chunk_root_bytenr = 0;
14637 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14641 int init_csum_tree = 0;
14643 int clear_space_cache = 0;
14644 int qgroup_report = 0;
14645 int qgroups_repaired = 0;
14646 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14651 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14652 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14653 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14654 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14655 GETOPT_VAL_FORCE };
14656 static const struct option long_options[] = {
14657 { "super", required_argument, NULL, 's' },
14658 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14659 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14660 { "init-csum-tree", no_argument, NULL,
14661 GETOPT_VAL_INIT_CSUM },
14662 { "init-extent-tree", no_argument, NULL,
14663 GETOPT_VAL_INIT_EXTENT },
14664 { "check-data-csum", no_argument, NULL,
14665 GETOPT_VAL_CHECK_CSUM },
14666 { "backup", no_argument, NULL, 'b' },
14667 { "subvol-extents", required_argument, NULL, 'E' },
14668 { "qgroup-report", no_argument, NULL, 'Q' },
14669 { "tree-root", required_argument, NULL, 'r' },
14670 { "chunk-root", required_argument, NULL,
14671 GETOPT_VAL_CHUNK_TREE },
14672 { "progress", no_argument, NULL, 'p' },
14673 { "mode", required_argument, NULL,
14675 { "clear-space-cache", required_argument, NULL,
14676 GETOPT_VAL_CLEAR_SPACE_CACHE},
14677 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14678 { NULL, 0, NULL, 0}
14681 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14685 case 'a': /* ignored */ break;
14687 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14690 num = arg_strtou64(optarg);
14691 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14693 "super mirror should be less than %d",
14694 BTRFS_SUPER_MIRROR_MAX);
14697 bytenr = btrfs_sb_offset(((int)num));
14698 printf("using SB copy %llu, bytenr %llu\n", num,
14699 (unsigned long long)bytenr);
14705 subvolid = arg_strtou64(optarg);
14708 tree_root_bytenr = arg_strtou64(optarg);
14710 case GETOPT_VAL_CHUNK_TREE:
14711 chunk_root_bytenr = arg_strtou64(optarg);
14714 ctx.progress_enabled = true;
14718 usage(cmd_check_usage);
14719 case GETOPT_VAL_REPAIR:
14720 printf("enabling repair mode\n");
14722 ctree_flags |= OPEN_CTREE_WRITES;
14724 case GETOPT_VAL_READONLY:
14727 case GETOPT_VAL_INIT_CSUM:
14728 printf("Creating a new CRC tree\n");
14729 init_csum_tree = 1;
14731 ctree_flags |= OPEN_CTREE_WRITES;
14733 case GETOPT_VAL_INIT_EXTENT:
14734 init_extent_tree = 1;
14735 ctree_flags |= (OPEN_CTREE_WRITES |
14736 OPEN_CTREE_NO_BLOCK_GROUPS);
14739 case GETOPT_VAL_CHECK_CSUM:
14740 check_data_csum = 1;
14742 case GETOPT_VAL_MODE:
14743 check_mode = parse_check_mode(optarg);
14744 if (check_mode == CHECK_MODE_UNKNOWN) {
14745 error("unknown mode: %s", optarg);
14749 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14750 if (strcmp(optarg, "v1") == 0) {
14751 clear_space_cache = 1;
14752 } else if (strcmp(optarg, "v2") == 0) {
14753 clear_space_cache = 2;
14754 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14757 "invalid argument to --clear-space-cache, must be v1 or v2");
14760 ctree_flags |= OPEN_CTREE_WRITES;
14762 case GETOPT_VAL_FORCE:
14768 if (check_argc_exact(argc - optind, 1))
14769 usage(cmd_check_usage);
14771 if (ctx.progress_enabled) {
14772 ctx.tp = TASK_NOTHING;
14773 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14776 /* This check is the only reason for --readonly to exist */
14777 if (readonly && repair) {
14778 error("repair options are not compatible with --readonly");
14783 * experimental and dangerous
14785 if (repair && check_mode == CHECK_MODE_LOWMEM)
14786 warning("low-memory mode repair support is only partial");
14789 cache_tree_init(&root_cache);
14791 ret = check_mounted(argv[optind]);
14794 error("could not check mount status: %s",
14800 "%s is currently mounted, use --force if you really intend to check the filesystem",
14808 error("repair and --force is not yet supported");
14815 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14819 "filesystem mounted, continuing because of --force");
14821 /* A block device is mounted in exclusive mode by kernel */
14822 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14825 /* only allow partial opening under repair mode */
14827 ctree_flags |= OPEN_CTREE_PARTIAL;
14829 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14830 chunk_root_bytenr, ctree_flags);
14832 error("cannot open file system");
14838 global_info = info;
14839 root = info->fs_root;
14840 uuid_unparse(info->super_copy->fsid, uuidbuf);
14842 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14845 * Check the bare minimum before starting anything else that could rely
14846 * on it, namely the tree roots, any local consistency checks
14848 if (!extent_buffer_uptodate(info->tree_root->node) ||
14849 !extent_buffer_uptodate(info->dev_root->node) ||
14850 !extent_buffer_uptodate(info->chunk_root->node)) {
14851 error("critical roots corrupted, unable to check the filesystem");
14857 if (clear_space_cache) {
14858 ret = do_clear_free_space_cache(info, clear_space_cache);
14864 * repair mode will force us to commit transaction which
14865 * will make us fail to load log tree when mounting.
14867 if (repair && btrfs_super_log_root(info->super_copy)) {
14868 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14874 ret = zero_log_tree(root);
14877 error("failed to zero log tree: %d", ret);
14882 if (qgroup_report) {
14883 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14885 ret = qgroup_verify_all(info);
14892 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14893 subvolid, argv[optind], uuidbuf);
14894 ret = print_extent_state(info, subvolid);
14899 if (init_extent_tree || init_csum_tree) {
14900 struct btrfs_trans_handle *trans;
14902 trans = btrfs_start_transaction(info->extent_root, 0);
14903 if (IS_ERR(trans)) {
14904 error("error starting transaction");
14905 ret = PTR_ERR(trans);
14910 if (init_extent_tree) {
14911 printf("Creating a new extent tree\n");
14912 ret = reinit_extent_tree(trans, info);
14918 if (init_csum_tree) {
14919 printf("Reinitialize checksum tree\n");
14920 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14922 error("checksum tree initialization failed: %d",
14929 ret = fill_csum_tree(trans, info->csum_root,
14933 error("checksum tree refilling failed: %d", ret);
14938 * Ok now we commit and run the normal fsck, which will add
14939 * extent entries for all of the items it finds.
14941 ret = btrfs_commit_transaction(trans, info->extent_root);
14946 if (!extent_buffer_uptodate(info->extent_root->node)) {
14947 error("critical: extent_root, unable to check the filesystem");
14952 if (!extent_buffer_uptodate(info->csum_root->node)) {
14953 error("critical: csum_root, unable to check the filesystem");
14959 if (!init_extent_tree) {
14960 ret = repair_root_items(info);
14963 error("failed to repair root items: %s", strerror(-ret));
14967 fprintf(stderr, "Fixed %d roots.\n", ret);
14969 } else if (ret > 0) {
14971 "Found %d roots with an outdated root item.\n",
14974 "Please run a filesystem check with the option --repair to fix them.\n");
14981 ret = do_check_chunks_and_extents(info);
14985 "errors found in extent allocation tree or chunk allocation");
14987 /* Only re-check super size after we checked and repaired the fs */
14988 err |= !is_super_size_valid(info);
14990 if (!ctx.progress_enabled) {
14991 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14992 fprintf(stderr, "checking free space tree\n");
14994 fprintf(stderr, "checking free space cache\n");
14996 ret = check_space_cache(root);
14999 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
15000 error("errors found in free space tree");
15002 error("errors found in free space cache");
15007 * We used to have to have these hole extents in between our real
15008 * extents so if we don't have this flag set we need to make sure there
15009 * are no gaps in the file extents for inodes, otherwise we can just
15010 * ignore it when this happens.
15012 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
15013 ret = do_check_fs_roots(info, &root_cache);
15016 error("errors found in fs roots");
15020 fprintf(stderr, "checking csums\n");
15021 ret = check_csums(root);
15024 error("errors found in csum tree");
15028 fprintf(stderr, "checking root refs\n");
15029 /* For low memory mode, check_fs_roots_v2 handles root refs */
15030 if (check_mode != CHECK_MODE_LOWMEM) {
15031 ret = check_root_refs(root, &root_cache);
15034 error("errors found in root refs");
15039 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15040 struct extent_buffer *eb;
15042 eb = list_first_entry(&root->fs_info->recow_ebs,
15043 struct extent_buffer, recow);
15044 list_del_init(&eb->recow);
15045 ret = recow_extent_buffer(root, eb);
15048 error("fails to fix transid errors");
15053 while (!list_empty(&delete_items)) {
15054 struct bad_item *bad;
15056 bad = list_first_entry(&delete_items, struct bad_item, list);
15057 list_del_init(&bad->list);
15059 ret = delete_bad_item(root, bad);
15065 if (info->quota_enabled) {
15066 fprintf(stderr, "checking quota groups\n");
15067 ret = qgroup_verify_all(info);
15070 error("failed to check quota groups");
15074 ret = repair_qgroups(info, &qgroups_repaired);
15077 error("failed to repair quota groups");
15083 if (!list_empty(&root->fs_info->recow_ebs)) {
15084 error("transid errors in file system");
15089 printf("found %llu bytes used, ",
15090 (unsigned long long)bytes_used);
15092 printf("error(s) found\n");
15094 printf("no error found\n");
15095 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15096 printf("total tree bytes: %llu\n",
15097 (unsigned long long)total_btree_bytes);
15098 printf("total fs tree bytes: %llu\n",
15099 (unsigned long long)total_fs_tree_bytes);
15100 printf("total extent tree bytes: %llu\n",
15101 (unsigned long long)total_extent_tree_bytes);
15102 printf("btree space waste bytes: %llu\n",
15103 (unsigned long long)btree_space_waste);
15104 printf("file data blocks allocated: %llu\n referenced %llu\n",
15105 (unsigned long long)data_bytes_allocated,
15106 (unsigned long long)data_bytes_referenced);
15108 free_qgroup_counts();
15109 free_root_recs_tree(&root_cache);
15113 if (ctx.progress_enabled)
15114 task_deinit(ctx.info);