2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2152 if (roots->nnodes == 1)
2155 node = rb_first(&roots->root);
2156 u = rb_entry(node, struct ulist_node, rb_node);
2158 * current root id is not smallest, we skip it and let it be checked
2159 * in the fs or file tree who hash the smallest root id.
2161 if (root->objectid != u->val)
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2170 struct btrfs_root *extent_root = root->fs_info->extent_root;
2171 struct btrfs_root_item *ri = &root->root_item;
2172 struct btrfs_extent_inline_ref *iref;
2173 struct btrfs_extent_item *ei;
2174 struct btrfs_key key;
2175 struct btrfs_path *path = NULL;
2186 * Except file/reloc tree, we can not have FULL BACKREF MODE
2188 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2192 if (eb->start == btrfs_root_bytenr(ri))
2195 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2198 owner = btrfs_header_owner(eb);
2199 if (owner == root->objectid)
2202 path = btrfs_alloc_path();
2206 key.objectid = btrfs_header_bytenr(eb);
2208 key.offset = (u64)-1;
2210 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2217 ret = btrfs_previous_extent_item(extent_root, path,
2223 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2225 eb = path->nodes[0];
2226 slot = path->slots[0];
2227 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2229 flags = btrfs_extent_flags(eb, ei);
2230 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2233 ptr = (unsigned long)(ei + 1);
2234 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2236 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237 ptr += sizeof(struct btrfs_tree_block_info);
2240 /* Reached extent item ends normally */
2244 /* Beyond extent item end, wrong item size */
2246 error("extent item at bytenr %llu slot %d has wrong size",
2251 iref = (struct btrfs_extent_inline_ref *)ptr;
2252 offset = btrfs_extent_inline_ref_offset(eb, iref);
2253 type = btrfs_extent_inline_ref_type(eb, iref);
2255 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2257 ptr += btrfs_extent_inline_ref_size(type);
2261 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267 btrfs_free_path(path);
2272 * for a tree node or leaf, we record its reference count, so later if we still
2273 * process this node or leaf, don't need to compute its reference count again.
2275 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278 struct extent_buffer *eb, struct node_refs *nrefs,
2279 u64 level, int check_all)
2281 struct ulist *roots;
2284 int root_level = btrfs_header_level(root->node);
2288 if (nrefs->bytenr[level] == bytenr)
2291 if (bytenr != (u64)-1) {
2292 /* the return value of this function seems a mistake */
2293 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294 level, 1, &refs, &flags);
2296 if (ret < 0 && !check_all)
2299 nrefs->bytenr[level] = bytenr;
2300 nrefs->refs[level] = refs;
2301 nrefs->full_backref[level] = 0;
2302 nrefs->checked[level] = 0;
2305 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2310 check = need_check(root, roots);
2312 nrefs->need_check[level] = check;
2315 nrefs->need_check[level] = 1;
2317 if (level == root_level) {
2318 nrefs->need_check[level] = 1;
2321 * The node refs may have not been
2322 * updated if upper needs checking (the
2323 * lowest root_objectid) the node can
2326 nrefs->need_check[level] =
2327 nrefs->need_check[level + 1];
2333 if (check_all && eb) {
2334 calc_extent_flag_v2(root, eb, &flags);
2335 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336 nrefs->full_backref[level] = 1;
2343 * @level if @level == -1 means extent data item
2344 * else normal treeblocl.
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347 struct node_refs *nrefs, int level)
2349 int root_level = btrfs_header_level(root->node);
2351 if (level > root_level || level < -1)
2353 if (level == root_level)
2356 * if the upper node is marked full backref, it should contain shared
2357 * backref of the parent (except owner == root->objectid).
2359 while (++level <= root_level)
2360 if (nrefs->refs[level] > 1)
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367 struct walk_control *wc, int *level,
2368 struct node_refs *nrefs)
2370 enum btrfs_tree_block_status status;
2373 struct btrfs_fs_info *fs_info = root->fs_info;
2374 struct extent_buffer *next;
2375 struct extent_buffer *cur;
2379 WARN_ON(*level < 0);
2380 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383 refs = nrefs->refs[*level];
2386 ret = btrfs_lookup_extent_info(NULL, root,
2387 path->nodes[*level]->start,
2388 *level, 1, &refs, NULL);
2393 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394 nrefs->refs[*level] = refs;
2398 ret = enter_shared_node(root, path->nodes[*level]->start,
2406 while (*level >= 0) {
2407 WARN_ON(*level < 0);
2408 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409 cur = path->nodes[*level];
2411 if (btrfs_header_level(cur) != *level)
2414 if (path->slots[*level] >= btrfs_header_nritems(cur))
2417 ret = process_one_leaf(root, cur, wc);
2422 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2425 if (bytenr == nrefs->bytenr[*level - 1]) {
2426 refs = nrefs->refs[*level - 1];
2428 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429 *level - 1, 1, &refs, NULL);
2433 nrefs->bytenr[*level - 1] = bytenr;
2434 nrefs->refs[*level - 1] = refs;
2439 ret = enter_shared_node(root, bytenr, refs,
2442 path->slots[*level]++;
2447 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449 free_extent_buffer(next);
2450 reada_walk_down(root, cur, path->slots[*level]);
2451 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452 if (!extent_buffer_uptodate(next)) {
2453 struct btrfs_key node_key;
2455 btrfs_node_key_to_cpu(path->nodes[*level],
2457 path->slots[*level]);
2458 btrfs_add_corrupt_extent_record(root->fs_info,
2460 path->nodes[*level]->start,
2461 root->fs_info->nodesize,
2468 ret = check_child_node(cur, path->slots[*level], next);
2470 free_extent_buffer(next);
2475 if (btrfs_is_leaf(next))
2476 status = btrfs_check_leaf(root, NULL, next);
2478 status = btrfs_check_node(root, NULL, next);
2479 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480 free_extent_buffer(next);
2485 *level = *level - 1;
2486 free_extent_buffer(path->nodes[*level]);
2487 path->nodes[*level] = next;
2488 path->slots[*level] = 0;
2491 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2495 static int fs_root_objectid(u64 objectid);
2498 * Update global fs information.
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2504 struct extent_buffer *eb = path->nodes[level];
2506 total_btree_bytes += eb->len;
2507 if (fs_root_objectid(root->objectid))
2508 total_fs_tree_bytes += eb->len;
2509 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510 total_extent_tree_bytes += eb->len;
2513 btree_space_waste += btrfs_leaf_free_space(root, eb);
2515 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516 btrfs_header_nritems(eb));
2517 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2522 * This function only handles BACKREF_MISSING,
2523 * If corresponding extent item exists, increase the ref, else insert an extent
2526 * Returns error bits after repair.
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529 struct btrfs_root *root,
2530 struct extent_buffer *node,
2531 struct node_refs *nrefs, int level, int err)
2533 struct btrfs_fs_info *fs_info = root->fs_info;
2534 struct btrfs_root *extent_root = fs_info->extent_root;
2535 struct btrfs_path path;
2536 struct btrfs_extent_item *ei;
2537 struct btrfs_tree_block_info *bi;
2538 struct btrfs_key key;
2539 struct extent_buffer *eb;
2540 u32 size = sizeof(*ei);
2541 u32 node_size = root->fs_info->nodesize;
2542 int insert_extent = 0;
2543 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544 int root_level = btrfs_header_level(root->node);
2549 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2552 if ((err & BACKREF_MISSING) == 0)
2555 WARN_ON(level > BTRFS_MAX_LEVEL);
2558 btrfs_init_path(&path);
2559 bytenr = btrfs_header_bytenr(node);
2560 owner = btrfs_header_owner(node);
2561 generation = btrfs_header_generation(node);
2563 key.objectid = bytenr;
2565 key.offset = (u64)-1;
2567 /* Search for the extent item */
2568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2574 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2578 /* calculate if the extent item flag is full backref or not */
2579 if (nrefs->full_backref[level] != 0)
2580 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2582 /* insert an extent item */
2583 if (insert_extent) {
2584 struct btrfs_disk_key copy_key;
2586 generation = btrfs_header_generation(node);
2588 if (level < root_level && nrefs->full_backref[level + 1] &&
2589 owner != root->objectid) {
2590 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2593 key.objectid = bytenr;
2594 if (!skinny_metadata) {
2595 key.type = BTRFS_EXTENT_ITEM_KEY;
2596 key.offset = node_size;
2597 size += sizeof(*bi);
2599 key.type = BTRFS_METADATA_ITEM_KEY;
2603 btrfs_release_path(&path);
2604 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2610 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2612 btrfs_set_extent_refs(eb, ei, 0);
2613 btrfs_set_extent_generation(eb, ei, generation);
2614 btrfs_set_extent_flags(eb, ei, flags);
2616 if (!skinny_metadata) {
2617 bi = (struct btrfs_tree_block_info *)(ei + 1);
2618 memset_extent_buffer(eb, 0, (unsigned long)bi,
2620 btrfs_set_disk_key_objectid(©_key, root->objectid);
2621 btrfs_set_disk_key_type(©_key, 0);
2622 btrfs_set_disk_key_offset(©_key, 0);
2624 btrfs_set_tree_block_level(eb, bi, level);
2625 btrfs_set_tree_block_key(eb, bi, ©_key);
2627 btrfs_mark_buffer_dirty(eb);
2628 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2632 nrefs->refs[level] = 0;
2633 nrefs->full_backref[level] =
2634 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635 btrfs_release_path(&path);
2638 if (level < root_level && nrefs->full_backref[level + 1] &&
2639 owner != root->objectid)
2640 parent = nrefs->bytenr[level + 1];
2642 /* increase the ref */
2643 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644 parent, root->objectid, level, 0);
2646 nrefs->refs[level]++;
2648 btrfs_release_path(&path);
2651 "failed to repair tree block ref start %llu root %llu due to %s",
2652 bytenr, root->objectid, strerror(-ret));
2654 printf("Added one tree block ref start %llu %s %llu\n",
2655 bytenr, parent ? "parent" : "root",
2656 parent ? parent : root->objectid);
2657 err &= ~BACKREF_MISSING;
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664 unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666 struct extent_buffer *eb, u64 bytenr,
2667 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669 struct btrfs_root *root, struct btrfs_path *path,
2670 struct node_refs *nrefs, int account_bytes);
2673 * @trans just for lowmem repair mode
2674 * @check all if not 0 then check all tree block backrefs and items
2675 * 0 then just check relationship of items in fs tree(s)
2677 * Returns >0 Found error, should continue
2678 * Returns <0 Fatal error, must exit the whole check
2679 * Returns 0 No errors found
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682 struct btrfs_root *root, struct btrfs_path *path,
2683 int *level, struct node_refs *nrefs, int ext_ref,
2687 enum btrfs_tree_block_status status;
2690 struct btrfs_fs_info *fs_info = root->fs_info;
2691 struct extent_buffer *next;
2692 struct extent_buffer *cur;
2696 int account_file_data = 0;
2698 WARN_ON(*level < 0);
2699 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2701 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702 path->nodes[*level], nrefs, *level, check_all);
2706 while (*level >= 0) {
2707 WARN_ON(*level < 0);
2708 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709 cur = path->nodes[*level];
2710 bytenr = btrfs_header_bytenr(cur);
2711 check = nrefs->need_check[*level];
2713 if (btrfs_header_level(cur) != *level)
2716 * Update bytes accounting and check tree block ref
2717 * NOTE: Doing accounting and check before checking nritems
2718 * is necessary because of empty node/leaf.
2720 if ((check_all && !nrefs->checked[*level]) ||
2721 (!check_all && nrefs->need_check[*level])) {
2722 ret = check_tree_block_ref(root, cur,
2723 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724 btrfs_header_owner(cur), nrefs);
2727 ret = repair_tree_block_ref(trans, root,
2728 path->nodes[*level], nrefs, *level, ret);
2731 if (check_all && nrefs->need_check[*level] &&
2732 nrefs->refs[*level]) {
2733 account_bytes(root, path, *level);
2734 account_file_data = 1;
2736 nrefs->checked[*level] = 1;
2739 if (path->slots[*level] >= btrfs_header_nritems(cur))
2742 /* Don't forgot to check leaf/node validation */
2744 /* skip duplicate check */
2745 if (check || !check_all) {
2746 ret = btrfs_check_leaf(root, NULL, cur);
2747 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2755 ret = process_one_leaf_v2(root, path, nrefs,
2758 ret = check_leaf_items(trans, root, path,
2759 nrefs, account_file_data);
2763 if (check || !check_all) {
2764 ret = btrfs_check_node(root, NULL, cur);
2765 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2772 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2775 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2780 * check all trees in check_chunks_and_extent_v2
2781 * check shared node once in check_fs_roots
2783 if (!check_all && !nrefs->need_check[*level - 1]) {
2784 path->slots[*level]++;
2788 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790 free_extent_buffer(next);
2791 reada_walk_down(root, cur, path->slots[*level]);
2792 next = read_tree_block(fs_info, bytenr, ptr_gen);
2793 if (!extent_buffer_uptodate(next)) {
2794 struct btrfs_key node_key;
2796 btrfs_node_key_to_cpu(path->nodes[*level],
2798 path->slots[*level]);
2799 btrfs_add_corrupt_extent_record(fs_info,
2800 &node_key, path->nodes[*level]->start,
2801 fs_info->nodesize, *level);
2807 ret = check_child_node(cur, path->slots[*level], next);
2812 if (btrfs_is_leaf(next))
2813 status = btrfs_check_leaf(root, NULL, next);
2815 status = btrfs_check_node(root, NULL, next);
2816 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817 free_extent_buffer(next);
2822 *level = *level - 1;
2823 free_extent_buffer(path->nodes[*level]);
2824 path->nodes[*level] = next;
2825 path->slots[*level] = 0;
2826 account_file_data = 0;
2828 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834 struct walk_control *wc, int *level)
2837 struct extent_buffer *leaf;
2839 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840 leaf = path->nodes[i];
2841 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2846 free_extent_buffer(path->nodes[*level]);
2847 path->nodes[*level] = NULL;
2848 BUG_ON(*level > wc->active_node);
2849 if (*level == wc->active_node)
2850 leave_shared_node(root, wc, *level);
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2861 struct extent_buffer *leaf;
2863 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864 leaf = path->nodes[i];
2865 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2870 free_extent_buffer(path->nodes[*level]);
2871 path->nodes[*level] = NULL;
2878 static int check_root_dir(struct inode_record *rec)
2880 struct inode_backref *backref;
2883 if (!rec->found_inode_item || rec->errors)
2885 if (rec->nlink != 1 || rec->found_link != 0)
2887 if (list_empty(&rec->backrefs))
2889 backref = to_inode_backref(rec->backrefs.next);
2890 if (!backref->found_inode_ref)
2892 if (backref->index != 0 || backref->namelen != 2 ||
2893 memcmp(backref->name, "..", 2))
2895 if (backref->found_dir_index || backref->found_dir_item)
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903 struct btrfs_root *root, struct btrfs_path *path,
2904 struct inode_record *rec)
2906 struct btrfs_inode_item *ei;
2907 struct btrfs_key key;
2910 key.objectid = rec->ino;
2911 key.type = BTRFS_INODE_ITEM_KEY;
2912 key.offset = (u64)-1;
2914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2918 if (!path->slots[0]) {
2925 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926 if (key.objectid != rec->ino) {
2931 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932 struct btrfs_inode_item);
2933 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934 btrfs_mark_buffer_dirty(path->nodes[0]);
2935 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937 root->root_key.objectid);
2939 btrfs_release_path(path);
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root,
2945 struct btrfs_path *path,
2946 struct inode_record *rec)
2950 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951 btrfs_release_path(path);
2953 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958 struct btrfs_root *root,
2959 struct btrfs_path *path,
2960 struct inode_record *rec)
2962 struct btrfs_inode_item *ei;
2963 struct btrfs_key key;
2966 key.objectid = rec->ino;
2967 key.type = BTRFS_INODE_ITEM_KEY;
2970 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2977 /* Since ret == 0, no need to check anything */
2978 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979 struct btrfs_inode_item);
2980 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981 btrfs_mark_buffer_dirty(path->nodes[0]);
2982 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983 printf("reset nbytes for ino %llu root %llu\n",
2984 rec->ino, root->root_key.objectid);
2986 btrfs_release_path(path);
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991 struct cache_tree *inode_cache,
2992 struct inode_record *rec,
2993 struct inode_backref *backref)
2995 struct btrfs_path path;
2996 struct btrfs_trans_handle *trans;
2997 struct btrfs_dir_item *dir_item;
2998 struct extent_buffer *leaf;
2999 struct btrfs_key key;
3000 struct btrfs_disk_key disk_key;
3001 struct inode_record *dir_rec;
3002 unsigned long name_ptr;
3003 u32 data_size = sizeof(*dir_item) + backref->namelen;
3006 trans = btrfs_start_transaction(root, 1);
3008 return PTR_ERR(trans);
3010 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011 (unsigned long long)rec->ino);
3013 btrfs_init_path(&path);
3014 key.objectid = backref->dir;
3015 key.type = BTRFS_DIR_INDEX_KEY;
3016 key.offset = backref->index;
3017 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3020 leaf = path.nodes[0];
3021 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3023 disk_key.objectid = cpu_to_le64(rec->ino);
3024 disk_key.type = BTRFS_INODE_ITEM_KEY;
3025 disk_key.offset = 0;
3027 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029 btrfs_set_dir_data_len(leaf, dir_item, 0);
3030 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031 name_ptr = (unsigned long)(dir_item + 1);
3032 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033 btrfs_mark_buffer_dirty(leaf);
3034 btrfs_release_path(&path);
3035 btrfs_commit_transaction(trans, root);
3037 backref->found_dir_index = 1;
3038 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039 BUG_ON(IS_ERR(dir_rec));
3042 dir_rec->found_size += backref->namelen;
3043 if (dir_rec->found_size == dir_rec->isize &&
3044 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046 if (dir_rec->found_size != dir_rec->isize)
3047 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3052 static int delete_dir_index(struct btrfs_root *root,
3053 struct inode_backref *backref)
3055 struct btrfs_trans_handle *trans;
3056 struct btrfs_dir_item *di;
3057 struct btrfs_path path;
3060 trans = btrfs_start_transaction(root, 1);
3062 return PTR_ERR(trans);
3064 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065 (unsigned long long)backref->dir,
3066 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067 (unsigned long long)root->objectid);
3069 btrfs_init_path(&path);
3070 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071 backref->name, backref->namelen,
3072 backref->index, -1);
3075 btrfs_release_path(&path);
3076 btrfs_commit_transaction(trans, root);
3083 ret = btrfs_del_item(trans, root, &path);
3085 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3087 btrfs_release_path(&path);
3088 btrfs_commit_transaction(trans, root);
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093 struct btrfs_root *root, u64 ino, u64 size,
3094 u64 nbytes, u64 nlink, u32 mode)
3096 struct btrfs_inode_item ii;
3097 time_t now = time(NULL);
3100 btrfs_set_stack_inode_size(&ii, size);
3101 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102 btrfs_set_stack_inode_nlink(&ii, nlink);
3103 btrfs_set_stack_inode_mode(&ii, mode);
3104 btrfs_set_stack_inode_generation(&ii, trans->transid);
3105 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3113 ret = btrfs_insert_inode(trans, root, ino, &ii);
3116 warning("root %llu inode %llu recreating inode item, this may "
3117 "be incomplete, please check permissions and content after "
3118 "the fsck completes.\n", (unsigned long long)root->objectid,
3119 (unsigned long long)ino);
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125 struct btrfs_root *root, u64 ino,
3128 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3130 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3133 static int create_inode_item(struct btrfs_root *root,
3134 struct inode_record *rec, int root_dir)
3136 struct btrfs_trans_handle *trans;
3142 trans = btrfs_start_transaction(root, 1);
3143 if (IS_ERR(trans)) {
3144 ret = PTR_ERR(trans);
3148 nlink = root_dir ? 1 : rec->found_link;
3149 if (rec->found_dir_item) {
3150 if (rec->found_file_extent)
3151 fprintf(stderr, "root %llu inode %llu has both a dir "
3152 "item and extents, unsure if it is a dir or a "
3153 "regular file so setting it as a directory\n",
3154 (unsigned long long)root->objectid,
3155 (unsigned long long)rec->ino);
3156 mode = S_IFDIR | 0755;
3157 size = rec->found_size;
3158 } else if (!rec->found_dir_item) {
3159 size = rec->extent_end;
3160 mode = S_IFREG | 0755;
3163 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3165 btrfs_commit_transaction(trans, root);
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170 struct inode_record *rec,
3171 struct cache_tree *inode_cache,
3174 struct inode_backref *tmp, *backref;
3175 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3179 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180 if (!delete && rec->ino == root_dirid) {
3181 if (!rec->found_inode_item) {
3182 ret = create_inode_item(root, rec, 1);
3189 /* Index 0 for root dir's are special, don't mess with it */
3190 if (rec->ino == root_dirid && backref->index == 0)
3194 ((backref->found_dir_index && !backref->found_inode_ref) ||
3195 (backref->found_dir_index && backref->found_inode_ref &&
3196 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197 ret = delete_dir_index(root, backref);
3201 list_del(&backref->list);
3206 if (!delete && !backref->found_dir_index &&
3207 backref->found_dir_item && backref->found_inode_ref) {
3208 ret = add_missing_dir_index(root, inode_cache, rec,
3213 if (backref->found_dir_item &&
3214 backref->found_dir_index) {
3215 if (!backref->errors &&
3216 backref->found_inode_ref) {
3217 list_del(&backref->list);
3224 if (!delete && (!backref->found_dir_index &&
3225 !backref->found_dir_item &&
3226 backref->found_inode_ref)) {
3227 struct btrfs_trans_handle *trans;
3228 struct btrfs_key location;
3230 ret = check_dir_conflict(root, backref->name,
3236 * let nlink fixing routine to handle it,
3237 * which can do it better.
3242 location.objectid = rec->ino;
3243 location.type = BTRFS_INODE_ITEM_KEY;
3244 location.offset = 0;
3246 trans = btrfs_start_transaction(root, 1);
3247 if (IS_ERR(trans)) {
3248 ret = PTR_ERR(trans);
3251 fprintf(stderr, "adding missing dir index/item pair "
3253 (unsigned long long)rec->ino);
3254 ret = btrfs_insert_dir_item(trans, root, backref->name,
3256 backref->dir, &location,
3257 imode_to_type(rec->imode),
3260 btrfs_commit_transaction(trans, root);
3264 if (!delete && (backref->found_inode_ref &&
3265 backref->found_dir_index &&
3266 backref->found_dir_item &&
3267 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268 !rec->found_inode_item)) {
3269 ret = create_inode_item(root, rec, 0);
3276 return ret ? ret : repaired;
3280 * To determine the file type for nlink/inode_item repair
3282 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283 * Return -ENOENT if file type is not found.
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3287 struct inode_backref *backref;
3289 /* For inode item recovered case */
3290 if (rec->found_inode_item) {
3291 *type = imode_to_type(rec->imode);
3295 list_for_each_entry(backref, &rec->backrefs, list) {
3296 if (backref->found_dir_index || backref->found_dir_item) {
3297 *type = backref->filetype;
3305 * To determine the file name for nlink repair
3307 * Return 0 if file name is found, set name and namelen.
3308 * Return -ENOENT if file name is not found.
3310 static int find_file_name(struct inode_record *rec,
3311 char *name, int *namelen)
3313 struct inode_backref *backref;
3315 list_for_each_entry(backref, &rec->backrefs, list) {
3316 if (backref->found_dir_index || backref->found_dir_item ||
3317 backref->found_inode_ref) {
3318 memcpy(name, backref->name, backref->namelen);
3319 *namelen = backref->namelen;
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root,
3329 struct btrfs_path *path,
3330 struct inode_record *rec)
3332 struct inode_backref *backref;
3333 struct inode_backref *tmp;
3334 struct btrfs_key key;
3335 struct btrfs_inode_item *inode_item;
3338 /* We don't believe this either, reset it and iterate backref */
3339 rec->found_link = 0;
3341 /* Remove all backref including the valid ones */
3342 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344 backref->index, backref->name,
3345 backref->namelen, 0);
3349 /* remove invalid backref, so it won't be added back */
3350 if (!(backref->found_dir_index &&
3351 backref->found_dir_item &&
3352 backref->found_inode_ref)) {
3353 list_del(&backref->list);
3360 /* Set nlink to 0 */
3361 key.objectid = rec->ino;
3362 key.type = BTRFS_INODE_ITEM_KEY;
3364 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3371 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372 struct btrfs_inode_item);
3373 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374 btrfs_mark_buffer_dirty(path->nodes[0]);
3375 btrfs_release_path(path);
3378 * Add back valid inode_ref/dir_item/dir_index,
3379 * add_link() will handle the nlink inc, so new nlink must be correct
3381 list_for_each_entry(backref, &rec->backrefs, list) {
3382 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383 backref->name, backref->namelen,
3384 backref->filetype, &backref->index, 1, 0);
3389 btrfs_release_path(path);
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394 struct btrfs_root *root,
3395 struct btrfs_path *path,
3398 struct btrfs_key key, found_key;
3401 btrfs_init_path(path);
3402 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3404 key.type = BTRFS_INODE_ITEM_KEY;
3405 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3407 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408 path->slots[0] - 1);
3409 *highest_ino = found_key.objectid;
3412 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3414 btrfs_release_path(path);
3419 * Link inode to dir 'lost+found'. Increase @ref_count.
3421 * Returns 0 means success.
3422 * Returns <0 means failure.
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425 struct btrfs_root *root,
3426 struct btrfs_path *path,
3427 u64 ino, char *namebuf, u32 name_len,
3428 u8 filetype, u64 *ref_count)
3430 char *dir_name = "lost+found";
3435 btrfs_release_path(path);
3436 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3441 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3445 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3448 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449 namebuf, name_len, filetype, NULL, 1, 0);
3451 * Add ".INO" suffix several times to handle case where
3452 * "FILENAME.INO" is already taken by another file.
3454 while (ret == -EEXIST) {
3456 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3458 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3462 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3464 name_len += count_digits(ino) + 1;
3465 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466 name_len, filetype, NULL, 1, 0);
3469 error("failed to link the inode %llu to %s dir: %s",
3470 ino, dir_name, strerror(-ret));
3475 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476 name_len, namebuf, dir_name);
3478 btrfs_release_path(path);
3480 error("failed to move file '%.*s' to '%s' dir", name_len,
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486 struct btrfs_root *root,
3487 struct btrfs_path *path,
3488 struct inode_record *rec)
3490 char namebuf[BTRFS_NAME_LEN] = {0};
3493 int name_recovered = 0;
3494 int type_recovered = 0;
3498 * Get file name and type first before these invalid inode ref
3499 * are deleted by remove_all_invalid_backref()
3501 name_recovered = !find_file_name(rec, namebuf, &namelen);
3502 type_recovered = !find_file_type(rec, &type);
3504 if (!name_recovered) {
3505 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506 rec->ino, rec->ino);
3507 namelen = count_digits(rec->ino);
3508 sprintf(namebuf, "%llu", rec->ino);
3511 if (!type_recovered) {
3512 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3514 type = BTRFS_FT_REG_FILE;
3518 ret = reset_nlink(trans, root, path, rec);
3521 "Failed to reset nlink for inode %llu: %s\n",
3522 rec->ino, strerror(-ret));
3526 if (rec->found_link == 0) {
3527 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528 namebuf, namelen, type,
3529 (u64 *)&rec->found_link);
3533 printf("Fixed the nlink of inode %llu\n", rec->ino);
3536 * Clear the flag anyway, or we will loop forever for the same inode
3537 * as it will not be removed from the bad inode list and the dead loop
3540 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541 btrfs_release_path(path);
3546 * Check if there is any normal(reg or prealloc) file extent for given
3548 * This is used to determine the file type when neither its dir_index/item or
3549 * inode_item exists.
3551 * This will *NOT* report error, if any error happens, just consider it does
3552 * not have any normal file extent.
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3556 struct btrfs_path path;
3557 struct btrfs_key key;
3558 struct btrfs_key found_key;
3559 struct btrfs_file_extent_item *fi;
3563 btrfs_init_path(&path);
3565 key.type = BTRFS_EXTENT_DATA_KEY;
3568 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3573 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574 ret = btrfs_next_leaf(root, &path);
3581 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3583 if (found_key.objectid != ino ||
3584 found_key.type != BTRFS_EXTENT_DATA_KEY)
3586 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587 struct btrfs_file_extent_item);
3588 type = btrfs_file_extent_type(path.nodes[0], fi);
3589 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595 btrfs_release_path(&path);
3599 static u32 btrfs_type_to_imode(u8 type)
3601 static u32 imode_by_btrfs_type[] = {
3602 [BTRFS_FT_REG_FILE] = S_IFREG,
3603 [BTRFS_FT_DIR] = S_IFDIR,
3604 [BTRFS_FT_CHRDEV] = S_IFCHR,
3605 [BTRFS_FT_BLKDEV] = S_IFBLK,
3606 [BTRFS_FT_FIFO] = S_IFIFO,
3607 [BTRFS_FT_SOCK] = S_IFSOCK,
3608 [BTRFS_FT_SYMLINK] = S_IFLNK,
3611 return imode_by_btrfs_type[(type)];
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_path *path,
3617 struct inode_record *rec)
3621 int type_recovered = 0;
3624 printf("Trying to rebuild inode:%llu\n", rec->ino);
3626 type_recovered = !find_file_type(rec, &filetype);
3629 * Try to determine inode type if type not found.
3631 * For found regular file extent, it must be FILE.
3632 * For found dir_item/index, it must be DIR.
3634 * For undetermined one, use FILE as fallback.
3637 * 1. If found backref(inode_index/item is already handled) to it,
3639 * Need new inode-inode ref structure to allow search for that.
3641 if (!type_recovered) {
3642 if (rec->found_file_extent &&
3643 find_normal_file_extent(root, rec->ino)) {
3645 filetype = BTRFS_FT_REG_FILE;
3646 } else if (rec->found_dir_item) {
3648 filetype = BTRFS_FT_DIR;
3649 } else if (!list_empty(&rec->orphan_extents)) {
3651 filetype = BTRFS_FT_REG_FILE;
3653 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3656 filetype = BTRFS_FT_REG_FILE;
3660 ret = btrfs_new_inode(trans, root, rec->ino,
3661 mode | btrfs_type_to_imode(filetype));
3666 * Here inode rebuild is done, we only rebuild the inode item,
3667 * don't repair the nlink(like move to lost+found).
3668 * That is the job of nlink repair.
3670 * We just fill the record and return
3672 rec->found_dir_item = 1;
3673 rec->imode = mode | btrfs_type_to_imode(filetype);
3675 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676 /* Ensure the inode_nlinks repair function will be called */
3677 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683 struct btrfs_root *root,
3684 struct btrfs_path *path,
3685 struct inode_record *rec)
3687 struct orphan_data_extent *orphan;
3688 struct orphan_data_extent *tmp;
3691 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3693 * Check for conflicting file extents
3695 * Here we don't know whether the extents is compressed or not,
3696 * so we can only assume it not compressed nor data offset,
3697 * and use its disk_len as extent length.
3699 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700 orphan->offset, orphan->disk_len, 0);
3701 btrfs_release_path(path);
3706 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707 orphan->disk_bytenr, orphan->disk_len);
3708 ret = btrfs_free_extent(trans,
3709 root->fs_info->extent_root,
3710 orphan->disk_bytenr, orphan->disk_len,
3711 0, root->objectid, orphan->objectid,
3716 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717 orphan->offset, orphan->disk_bytenr,
3718 orphan->disk_len, orphan->disk_len);
3722 /* Update file size info */
3723 rec->found_size += orphan->disk_len;
3724 if (rec->found_size == rec->nbytes)
3725 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3727 /* Update the file extent hole info too */
3728 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3732 if (RB_EMPTY_ROOT(&rec->holes))
3733 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3735 list_del(&orphan->list);
3738 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744 struct btrfs_root *root,
3745 struct btrfs_path *path,
3746 struct inode_record *rec)
3748 struct rb_node *node;
3749 struct file_extent_hole *hole;
3753 node = rb_first(&rec->holes);
3757 hole = rb_entry(node, struct file_extent_hole, node);
3758 ret = btrfs_punch_hole(trans, root, rec->ino,
3759 hole->start, hole->len);
3762 ret = del_file_extent_hole(&rec->holes, hole->start,
3766 if (RB_EMPTY_ROOT(&rec->holes))
3767 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768 node = rb_first(&rec->holes);
3770 /* special case for a file losing all its file extent */
3772 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773 round_up(rec->isize,
3774 root->fs_info->sectorsize));
3778 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779 rec->ino, root->objectid);
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3786 struct btrfs_trans_handle *trans;
3787 struct btrfs_path path;
3790 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791 I_ERR_NO_ORPHAN_ITEM |
3792 I_ERR_LINK_COUNT_WRONG |
3793 I_ERR_NO_INODE_ITEM |
3794 I_ERR_FILE_EXTENT_ORPHAN |
3795 I_ERR_FILE_EXTENT_DISCOUNT|
3796 I_ERR_FILE_NBYTES_WRONG)))
3800 * For nlink repair, it may create a dir and add link, so
3801 * 2 for parent(256)'s dir_index and dir_item
3802 * 2 for lost+found dir's inode_item and inode_ref
3803 * 1 for the new inode_ref of the file
3804 * 2 for lost+found dir's dir_index and dir_item for the file
3806 trans = btrfs_start_transaction(root, 7);
3808 return PTR_ERR(trans);
3810 btrfs_init_path(&path);
3811 if (rec->errors & I_ERR_NO_INODE_ITEM)
3812 ret = repair_inode_no_item(trans, root, &path, rec);
3813 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818 ret = repair_inode_isize(trans, root, &path, rec);
3819 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822 ret = repair_inode_nlinks(trans, root, &path, rec);
3823 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824 ret = repair_inode_nbytes(trans, root, &path, rec);
3825 btrfs_commit_transaction(trans, root);
3826 btrfs_release_path(&path);
3830 static int check_inode_recs(struct btrfs_root *root,
3831 struct cache_tree *inode_cache)
3833 struct cache_extent *cache;
3834 struct ptr_node *node;
3835 struct inode_record *rec;
3836 struct inode_backref *backref;
3841 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3843 if (btrfs_root_refs(&root->root_item) == 0) {
3844 if (!cache_tree_empty(inode_cache))
3845 fprintf(stderr, "warning line %d\n", __LINE__);
3850 * We need to repair backrefs first because we could change some of the
3851 * errors in the inode recs.
3853 * We also need to go through and delete invalid backrefs first and then
3854 * add the correct ones second. We do this because we may get EEXIST
3855 * when adding back the correct index because we hadn't yet deleted the
3858 * For example, if we were missing a dir index then the directories
3859 * isize would be wrong, so if we fixed the isize to what we thought it
3860 * would be and then fixed the backref we'd still have a invalid fs, so
3861 * we need to add back the dir index and then check to see if the isize
3866 if (stage == 3 && !err)
3869 cache = search_cache_extent(inode_cache, 0);
3870 while (repair && cache) {
3871 node = container_of(cache, struct ptr_node, cache);
3873 cache = next_cache_extent(cache);
3875 /* Need to free everything up and rescan */
3877 remove_cache_extent(inode_cache, &node->cache);
3879 free_inode_rec(rec);
3883 if (list_empty(&rec->backrefs))
3886 ret = repair_inode_backrefs(root, rec, inode_cache,
3900 rec = get_inode_rec(inode_cache, root_dirid, 0);
3901 BUG_ON(IS_ERR(rec));
3903 ret = check_root_dir(rec);
3905 fprintf(stderr, "root %llu root dir %llu error\n",
3906 (unsigned long long)root->root_key.objectid,
3907 (unsigned long long)root_dirid);
3908 print_inode_error(root, rec);
3913 struct btrfs_trans_handle *trans;
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 err = PTR_ERR(trans);
3922 "root %llu missing its root dir, recreating\n",
3923 (unsigned long long)root->objectid);
3925 ret = btrfs_make_root_dir(trans, root, root_dirid);
3928 btrfs_commit_transaction(trans, root);
3932 fprintf(stderr, "root %llu root dir %llu not found\n",
3933 (unsigned long long)root->root_key.objectid,
3934 (unsigned long long)root_dirid);
3938 cache = search_cache_extent(inode_cache, 0);
3941 node = container_of(cache, struct ptr_node, cache);
3943 remove_cache_extent(inode_cache, &node->cache);
3945 if (rec->ino == root_dirid ||
3946 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947 free_inode_rec(rec);
3951 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952 ret = check_orphan_item(root, rec->ino);
3954 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955 if (can_free_inode_rec(rec)) {
3956 free_inode_rec(rec);
3961 if (!rec->found_inode_item)
3962 rec->errors |= I_ERR_NO_INODE_ITEM;
3963 if (rec->found_link != rec->nlink)
3964 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3966 ret = try_repair_inode(root, rec);
3967 if (ret == 0 && can_free_inode_rec(rec)) {
3968 free_inode_rec(rec);
3974 if (!(repair && ret == 0))
3976 print_inode_error(root, rec);
3977 list_for_each_entry(backref, &rec->backrefs, list) {
3978 if (!backref->found_dir_item)
3979 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980 if (!backref->found_dir_index)
3981 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982 if (!backref->found_inode_ref)
3983 backref->errors |= REF_ERR_NO_INODE_REF;
3984 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985 " namelen %u name %s filetype %d errors %x",
3986 (unsigned long long)backref->dir,
3987 (unsigned long long)backref->index,
3988 backref->namelen, backref->name,
3989 backref->filetype, backref->errors);
3990 print_ref_error(backref->errors);
3992 free_inode_rec(rec);
3994 return (error > 0) ? -1 : 0;
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4000 struct cache_extent *cache;
4001 struct root_record *rec = NULL;
4004 cache = lookup_cache_extent(root_cache, objectid, 1);
4006 rec = container_of(cache, struct root_record, cache);
4008 rec = calloc(1, sizeof(*rec));
4010 return ERR_PTR(-ENOMEM);
4011 rec->objectid = objectid;
4012 INIT_LIST_HEAD(&rec->backrefs);
4013 rec->cache.start = objectid;
4014 rec->cache.size = 1;
4016 ret = insert_cache_extent(root_cache, &rec->cache);
4018 return ERR_PTR(-EEXIST);
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024 u64 ref_root, u64 dir, u64 index,
4025 const char *name, int namelen)
4027 struct root_backref *backref;
4029 list_for_each_entry(backref, &rec->backrefs, list) {
4030 if (backref->ref_root != ref_root || backref->dir != dir ||
4031 backref->namelen != namelen)
4033 if (memcmp(name, backref->name, namelen))
4038 backref = calloc(1, sizeof(*backref) + namelen + 1);
4041 backref->ref_root = ref_root;
4043 backref->index = index;
4044 backref->namelen = namelen;
4045 memcpy(backref->name, name, namelen);
4046 backref->name[namelen] = '\0';
4047 list_add_tail(&backref->list, &rec->backrefs);
4051 static void free_root_record(struct cache_extent *cache)
4053 struct root_record *rec;
4054 struct root_backref *backref;
4056 rec = container_of(cache, struct root_record, cache);
4057 while (!list_empty(&rec->backrefs)) {
4058 backref = to_root_backref(rec->backrefs.next);
4059 list_del(&backref->list);
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4068 static int add_root_backref(struct cache_tree *root_cache,
4069 u64 root_id, u64 ref_root, u64 dir, u64 index,
4070 const char *name, int namelen,
4071 int item_type, int errors)
4073 struct root_record *rec;
4074 struct root_backref *backref;
4076 rec = get_root_rec(root_cache, root_id);
4077 BUG_ON(IS_ERR(rec));
4078 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4081 backref->errors |= errors;
4083 if (item_type != BTRFS_DIR_ITEM_KEY) {
4084 if (backref->found_dir_index || backref->found_back_ref ||
4085 backref->found_forward_ref) {
4086 if (backref->index != index)
4087 backref->errors |= REF_ERR_INDEX_UNMATCH;
4089 backref->index = index;
4093 if (item_type == BTRFS_DIR_ITEM_KEY) {
4094 if (backref->found_forward_ref)
4096 backref->found_dir_item = 1;
4097 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098 backref->found_dir_index = 1;
4099 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100 if (backref->found_forward_ref)
4101 backref->errors |= REF_ERR_DUP_ROOT_REF;
4102 else if (backref->found_dir_item)
4104 backref->found_forward_ref = 1;
4105 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106 if (backref->found_back_ref)
4107 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108 backref->found_back_ref = 1;
4113 if (backref->found_forward_ref && backref->found_dir_item)
4114 backref->reachable = 1;
4118 static int merge_root_recs(struct btrfs_root *root,
4119 struct cache_tree *src_cache,
4120 struct cache_tree *dst_cache)
4122 struct cache_extent *cache;
4123 struct ptr_node *node;
4124 struct inode_record *rec;
4125 struct inode_backref *backref;
4128 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129 free_inode_recs_tree(src_cache);
4134 cache = search_cache_extent(src_cache, 0);
4137 node = container_of(cache, struct ptr_node, cache);
4139 remove_cache_extent(src_cache, &node->cache);
4142 ret = is_child_root(root, root->objectid, rec->ino);
4148 list_for_each_entry(backref, &rec->backrefs, list) {
4149 BUG_ON(backref->found_inode_ref);
4150 if (backref->found_dir_item)
4151 add_root_backref(dst_cache, rec->ino,
4152 root->root_key.objectid, backref->dir,
4153 backref->index, backref->name,
4154 backref->namelen, BTRFS_DIR_ITEM_KEY,
4156 if (backref->found_dir_index)
4157 add_root_backref(dst_cache, rec->ino,
4158 root->root_key.objectid, backref->dir,
4159 backref->index, backref->name,
4160 backref->namelen, BTRFS_DIR_INDEX_KEY,
4164 free_inode_rec(rec);
4171 static int check_root_refs(struct btrfs_root *root,
4172 struct cache_tree *root_cache)
4174 struct root_record *rec;
4175 struct root_record *ref_root;
4176 struct root_backref *backref;
4177 struct cache_extent *cache;
4183 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184 BUG_ON(IS_ERR(rec));
4187 /* fixme: this can not detect circular references */
4190 cache = search_cache_extent(root_cache, 0);
4194 rec = container_of(cache, struct root_record, cache);
4195 cache = next_cache_extent(cache);
4197 if (rec->found_ref == 0)
4200 list_for_each_entry(backref, &rec->backrefs, list) {
4201 if (!backref->reachable)
4204 ref_root = get_root_rec(root_cache,
4206 BUG_ON(IS_ERR(ref_root));
4207 if (ref_root->found_ref > 0)
4210 backref->reachable = 0;
4212 if (rec->found_ref == 0)
4218 cache = search_cache_extent(root_cache, 0);
4222 rec = container_of(cache, struct root_record, cache);
4223 cache = next_cache_extent(cache);
4225 if (rec->found_ref == 0 &&
4226 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228 ret = check_orphan_item(root->fs_info->tree_root,
4234 * If we don't have a root item then we likely just have
4235 * a dir item in a snapshot for this root but no actual
4236 * ref key or anything so it's meaningless.
4238 if (!rec->found_root_item)
4241 fprintf(stderr, "fs tree %llu not referenced\n",
4242 (unsigned long long)rec->objectid);
4246 if (rec->found_ref > 0 && !rec->found_root_item)
4248 list_for_each_entry(backref, &rec->backrefs, list) {
4249 if (!backref->found_dir_item)
4250 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251 if (!backref->found_dir_index)
4252 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253 if (!backref->found_back_ref)
4254 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255 if (!backref->found_forward_ref)
4256 backref->errors |= REF_ERR_NO_ROOT_REF;
4257 if (backref->reachable && backref->errors)
4264 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265 (unsigned long long)rec->objectid, rec->found_ref,
4266 rec->found_root_item ? "" : "not found");
4268 list_for_each_entry(backref, &rec->backrefs, list) {
4269 if (!backref->reachable)
4271 if (!backref->errors && rec->found_root_item)
4273 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274 " index %llu namelen %u name %s errors %x\n",
4275 (unsigned long long)backref->ref_root,
4276 (unsigned long long)backref->dir,
4277 (unsigned long long)backref->index,
4278 backref->namelen, backref->name,
4280 print_ref_error(backref->errors);
4283 return errors > 0 ? 1 : 0;
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287 struct btrfs_key *key,
4288 struct cache_tree *root_cache)
4294 struct btrfs_root_ref *ref;
4295 char namebuf[BTRFS_NAME_LEN];
4298 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4300 dirid = btrfs_root_ref_dirid(eb, ref);
4301 index = btrfs_root_ref_sequence(eb, ref);
4302 name_len = btrfs_root_ref_name_len(eb, ref);
4304 if (name_len <= BTRFS_NAME_LEN) {
4308 len = BTRFS_NAME_LEN;
4309 error = REF_ERR_NAME_TOO_LONG;
4311 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4313 if (key->type == BTRFS_ROOT_REF_KEY) {
4314 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315 index, namebuf, len, key->type, error);
4317 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318 index, namebuf, len, key->type, error);
4323 static void free_corrupt_block(struct cache_extent *cache)
4325 struct btrfs_corrupt_block *corrupt;
4327 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4334 * Repair the btree of the given root.
4336 * The fix is to remove the node key in corrupt_blocks cache_tree.
4337 * and rebalance the tree.
4338 * After the fix, the btree should be writeable.
4340 static int repair_btree(struct btrfs_root *root,
4341 struct cache_tree *corrupt_blocks)
4343 struct btrfs_trans_handle *trans;
4344 struct btrfs_path path;
4345 struct btrfs_corrupt_block *corrupt;
4346 struct cache_extent *cache;
4347 struct btrfs_key key;
4352 if (cache_tree_empty(corrupt_blocks))
4355 trans = btrfs_start_transaction(root, 1);
4356 if (IS_ERR(trans)) {
4357 ret = PTR_ERR(trans);
4358 fprintf(stderr, "Error starting transaction: %s\n",
4362 btrfs_init_path(&path);
4363 cache = first_cache_extent(corrupt_blocks);
4365 corrupt = container_of(cache, struct btrfs_corrupt_block,
4367 level = corrupt->level;
4368 path.lowest_level = level;
4369 key.objectid = corrupt->key.objectid;
4370 key.type = corrupt->key.type;
4371 key.offset = corrupt->key.offset;
4374 * Here we don't want to do any tree balance, since it may
4375 * cause a balance with corrupted brother leaf/node,
4376 * so ins_len set to 0 here.
4377 * Balance will be done after all corrupt node/leaf is deleted.
4379 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4382 offset = btrfs_node_blockptr(path.nodes[level],
4385 /* Remove the ptr */
4386 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4390 * Remove the corresponding extent
4391 * return value is not concerned.
4393 btrfs_release_path(&path);
4394 ret = btrfs_free_extent(trans, root, offset,
4395 root->fs_info->nodesize, 0,
4396 root->root_key.objectid, level - 1, 0);
4397 cache = next_cache_extent(cache);
4400 /* Balance the btree using btrfs_search_slot() */
4401 cache = first_cache_extent(corrupt_blocks);
4403 corrupt = container_of(cache, struct btrfs_corrupt_block,
4405 memcpy(&key, &corrupt->key, sizeof(key));
4406 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4409 /* return will always >0 since it won't find the item */
4411 btrfs_release_path(&path);
4412 cache = next_cache_extent(cache);
4415 btrfs_commit_transaction(trans, root);
4416 btrfs_release_path(&path);
4420 static int check_fs_root(struct btrfs_root *root,
4421 struct cache_tree *root_cache,
4422 struct walk_control *wc)
4428 struct btrfs_path path;
4429 struct shared_node root_node;
4430 struct root_record *rec;
4431 struct btrfs_root_item *root_item = &root->root_item;
4432 struct cache_tree corrupt_blocks;
4433 struct orphan_data_extent *orphan;
4434 struct orphan_data_extent *tmp;
4435 enum btrfs_tree_block_status status;
4436 struct node_refs nrefs;
4439 * Reuse the corrupt_block cache tree to record corrupted tree block
4441 * Unlike the usage in extent tree check, here we do it in a per
4442 * fs/subvol tree base.
4444 cache_tree_init(&corrupt_blocks);
4445 root->fs_info->corrupt_blocks = &corrupt_blocks;
4447 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448 rec = get_root_rec(root_cache, root->root_key.objectid);
4449 BUG_ON(IS_ERR(rec));
4450 if (btrfs_root_refs(root_item) > 0)
4451 rec->found_root_item = 1;
4454 btrfs_init_path(&path);
4455 memset(&root_node, 0, sizeof(root_node));
4456 cache_tree_init(&root_node.root_cache);
4457 cache_tree_init(&root_node.inode_cache);
4458 memset(&nrefs, 0, sizeof(nrefs));
4460 /* Move the orphan extent record to corresponding inode_record */
4461 list_for_each_entry_safe(orphan, tmp,
4462 &root->orphan_data_extents, list) {
4463 struct inode_record *inode;
4465 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4467 BUG_ON(IS_ERR(inode));
4468 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469 list_move(&orphan->list, &inode->orphan_extents);
4472 level = btrfs_header_level(root->node);
4473 memset(wc->nodes, 0, sizeof(wc->nodes));
4474 wc->nodes[level] = &root_node;
4475 wc->active_node = level;
4476 wc->root_level = level;
4478 /* We may not have checked the root block, lets do that now */
4479 if (btrfs_is_leaf(root->node))
4480 status = btrfs_check_leaf(root, NULL, root->node);
4482 status = btrfs_check_node(root, NULL, root->node);
4483 if (status != BTRFS_TREE_BLOCK_CLEAN)
4486 if (btrfs_root_refs(root_item) > 0 ||
4487 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488 path.nodes[level] = root->node;
4489 extent_buffer_get(root->node);
4490 path.slots[level] = 0;
4492 struct btrfs_key key;
4493 struct btrfs_disk_key found_key;
4495 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496 level = root_item->drop_level;
4497 path.lowest_level = level;
4498 if (level > btrfs_header_level(root->node) ||
4499 level >= BTRFS_MAX_LEVEL) {
4500 error("ignoring invalid drop level: %u", level);
4503 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4506 btrfs_node_key(path.nodes[level], &found_key,
4508 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509 sizeof(found_key)));
4513 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519 wret = walk_up_tree(root, &path, wc, &level);
4526 btrfs_release_path(&path);
4528 if (!cache_tree_empty(&corrupt_blocks)) {
4529 struct cache_extent *cache;
4530 struct btrfs_corrupt_block *corrupt;
4532 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533 root->root_key.objectid);
4534 cache = first_cache_extent(&corrupt_blocks);
4536 corrupt = container_of(cache,
4537 struct btrfs_corrupt_block,
4539 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540 cache->start, corrupt->level,
4541 corrupt->key.objectid, corrupt->key.type,
4542 corrupt->key.offset);
4543 cache = next_cache_extent(cache);
4546 printf("Try to repair the btree for root %llu\n",
4547 root->root_key.objectid);
4548 ret = repair_btree(root, &corrupt_blocks);
4550 fprintf(stderr, "Failed to repair btree: %s\n",
4553 printf("Btree for root %llu is fixed\n",
4554 root->root_key.objectid);
4558 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4562 if (root_node.current) {
4563 root_node.current->checked = 1;
4564 maybe_free_inode_rec(&root_node.inode_cache,
4568 err = check_inode_recs(root, &root_node.inode_cache);
4572 free_corrupt_blocks_tree(&corrupt_blocks);
4573 root->fs_info->corrupt_blocks = NULL;
4574 free_orphan_data_extents(&root->orphan_data_extents);
4578 static int fs_root_objectid(u64 objectid)
4580 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4583 return is_fstree(objectid);
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587 struct cache_tree *root_cache)
4589 struct btrfs_path path;
4590 struct btrfs_key key;
4591 struct walk_control wc;
4592 struct extent_buffer *leaf, *tree_node;
4593 struct btrfs_root *tmp_root;
4594 struct btrfs_root *tree_root = fs_info->tree_root;
4598 if (ctx.progress_enabled) {
4599 ctx.tp = TASK_FS_ROOTS;
4600 task_start(ctx.info);
4604 * Just in case we made any changes to the extent tree that weren't
4605 * reflected into the free space cache yet.
4608 reset_cached_block_groups(fs_info);
4609 memset(&wc, 0, sizeof(wc));
4610 cache_tree_init(&wc.shared);
4611 btrfs_init_path(&path);
4616 key.type = BTRFS_ROOT_ITEM_KEY;
4617 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4622 tree_node = tree_root->node;
4624 if (tree_node != tree_root->node) {
4625 free_root_recs_tree(root_cache);
4626 btrfs_release_path(&path);
4629 leaf = path.nodes[0];
4630 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631 ret = btrfs_next_leaf(tree_root, &path);
4637 leaf = path.nodes[0];
4639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641 fs_root_objectid(key.objectid)) {
4642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643 tmp_root = btrfs_read_fs_root_no_cache(
4646 key.offset = (u64)-1;
4647 tmp_root = btrfs_read_fs_root(
4650 if (IS_ERR(tmp_root)) {
4654 ret = check_fs_root(tmp_root, root_cache, &wc);
4655 if (ret == -EAGAIN) {
4656 free_root_recs_tree(root_cache);
4657 btrfs_release_path(&path);
4662 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663 btrfs_free_fs_root(tmp_root);
4664 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665 key.type == BTRFS_ROOT_BACKREF_KEY) {
4666 process_root_ref(leaf, path.slots[0], &key,
4673 btrfs_release_path(&path);
4675 free_extent_cache_tree(&wc.shared);
4676 if (!cache_tree_empty(&wc.shared))
4677 fprintf(stderr, "warning line %d\n", __LINE__);
4679 task_stop(ctx.info);
4685 * Find the @index according by @ino and name.
4686 * Notice:time efficiency is O(N)
4688 * @root: the root of the fs/file tree
4689 * @index_ret: the index as return value
4690 * @namebuf: the name to match
4691 * @name_len: the length of name to match
4692 * @file_type: the file_type of INODE_ITEM to match
4694 * Returns 0 if found and *@index_ret will be modified with right value
4695 * Returns< 0 not found and *@index_ret will be (u64)-1
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698 u64 *index_ret, char *namebuf, u32 name_len,
4701 struct btrfs_path path;
4702 struct extent_buffer *node;
4703 struct btrfs_dir_item *di;
4704 struct btrfs_key key;
4705 struct btrfs_key location;
4706 char name[BTRFS_NAME_LEN] = {0};
4718 /* search from the last index */
4719 key.objectid = dirid;
4720 key.offset = (u64)-1;
4721 key.type = BTRFS_DIR_INDEX_KEY;
4723 btrfs_init_path(&path);
4724 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4729 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4732 *index_ret = (64)-1;
4735 /* Check whether inode_id/filetype/name match */
4736 node = path.nodes[0];
4737 slot = path.slots[0];
4738 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4742 len = btrfs_dir_name_len(node, di);
4743 data_len = btrfs_dir_data_len(node, di);
4745 btrfs_dir_item_key_to_cpu(node, di, &location);
4746 if (location.objectid != location_id ||
4747 location.type != BTRFS_INODE_ITEM_KEY ||
4748 location.offset != 0)
4751 filetype = btrfs_dir_type(node, di);
4752 if (file_type != filetype)
4755 if (len > BTRFS_NAME_LEN)
4756 len = BTRFS_NAME_LEN;
4758 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759 if (len != name_len || strncmp(namebuf, name, len))
4762 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763 *index_ret = key.offset;
4767 len += sizeof(*di) + data_len;
4768 di = (struct btrfs_dir_item *)((char *)di + len);
4774 btrfs_release_path(&path);
4779 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780 * INODE_REF/INODE_EXTREF match.
4782 * @root: the root of the fs/file tree
4783 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784 * value while find index
4785 * @location_key: location key of the struct btrfs_dir_item to match
4786 * @name: the name to match
4787 * @namelen: the length of name
4788 * @file_type: the type of file to math
4790 * Return 0 if no error occurred.
4791 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792 * DIR_ITEM/DIR_INDEX
4793 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794 * and DIR_ITEM/DIR_INDEX mismatch
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797 struct btrfs_key *location_key, char *name,
4798 u32 namelen, u8 file_type)
4800 struct btrfs_path path;
4801 struct extent_buffer *node;
4802 struct btrfs_dir_item *di;
4803 struct btrfs_key location;
4804 char namebuf[BTRFS_NAME_LEN] = {0};
4813 /* get the index by traversing all index */
4814 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815 ret = find_dir_index(root, key->objectid,
4816 location_key->objectid, &key->offset,
4817 name, namelen, file_type);
4819 ret = DIR_INDEX_MISSING;
4823 btrfs_init_path(&path);
4824 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4826 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4831 /* Check whether inode_id/filetype/name match */
4832 node = path.nodes[0];
4833 slot = path.slots[0];
4834 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835 total = btrfs_item_size_nr(node, slot);
4836 while (cur < total) {
4837 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4840 len = btrfs_dir_name_len(node, di);
4841 data_len = btrfs_dir_data_len(node, di);
4843 btrfs_dir_item_key_to_cpu(node, di, &location);
4844 if (location.objectid != location_key->objectid ||
4845 location.type != location_key->type ||
4846 location.offset != location_key->offset)
4849 filetype = btrfs_dir_type(node, di);
4850 if (file_type != filetype)
4853 if (len > BTRFS_NAME_LEN) {
4854 len = BTRFS_NAME_LEN;
4855 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4857 key->type == BTRFS_DIR_ITEM_KEY ?
4858 "DIR_ITEM" : "DIR_INDEX",
4859 key->objectid, key->offset, len);
4861 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4863 if (len != namelen || strncmp(namebuf, name, len))
4869 len += sizeof(*di) + data_len;
4870 di = (struct btrfs_dir_item *)((char *)di + len);
4875 btrfs_release_path(&path);
4880 * Prints inode ref error message
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883 u64 index, const char *namebuf, int name_len,
4884 u8 filetype, int err)
4889 /* root dir error */
4890 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4892 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893 root->objectid, key->objectid, key->offset, namebuf);
4898 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900 root->objectid, key->offset,
4901 btrfs_name_hash(namebuf, name_len),
4902 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4904 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906 root->objectid, key->offset, index,
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4912 * Insert the missing inode item.
4914 * Returns 0 means success.
4915 * Returns <0 means error.
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4920 struct btrfs_key key;
4921 struct btrfs_trans_handle *trans;
4922 struct btrfs_path path;
4926 key.type = BTRFS_INODE_ITEM_KEY;
4929 btrfs_init_path(&path);
4930 trans = btrfs_start_transaction(root, 1);
4931 if (IS_ERR(trans)) {
4936 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937 if (ret < 0 || !ret)
4940 /* insert inode item */
4941 create_inode_item_lowmem(trans, root, ino, filetype);
4944 btrfs_commit_transaction(trans, root);
4947 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948 root->objectid, ino);
4949 btrfs_release_path(&path);
4954 * The ternary means dir item, dir index and relative inode ref.
4955 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4958 * If two of three is missing or mismatched, delete the existing one.
4959 * If one of three is missing or mismatched, add the missing one.
4961 * returns 0 means success.
4962 * returns not 0 means on error;
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965 u64 index, char *name, int name_len, u8 filetype,
4968 struct btrfs_trans_handle *trans;
4973 * stage shall be one of following valild values:
4974 * 0: Fine, nothing to do.
4975 * 1: One of three is wrong, so add missing one.
4976 * 2: Two of three is wrong, so delete existed one.
4978 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4980 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4982 if (err & (INODE_REF_MISSING))
4985 /* stage must be smllarer than 3 */
4988 trans = btrfs_start_transaction(root, 1);
4990 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4995 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996 filetype, &index, 1, 1);
5000 btrfs_commit_transaction(trans, root);
5003 error("fail to repair inode %llu name %s filetype %u",
5004 ino, name, filetype);
5006 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007 stage == 2 ? "Delete" : "Add",
5008 ino, name, filetype);
5014 * Traverse the given INODE_REF and call find_dir_item() to find related
5015 * DIR_ITEM/DIR_INDEX.
5017 * @root: the root of the fs/file tree
5018 * @ref_key: the key of the INODE_REF
5019 * @path the path provides node and slot
5020 * @refs: the count of INODE_REF
5021 * @mode: the st_mode of INODE_ITEM
5022 * @name_ret: returns with the first ref's name
5023 * @name_len_ret: len of the name_ret
5025 * Return 0 if no error occurred.
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028 struct btrfs_path *path, char *name_ret,
5029 u32 *namelen_ret, u64 *refs_ret, int mode)
5031 struct btrfs_key key;
5032 struct btrfs_key location;
5033 struct btrfs_inode_ref *ref;
5034 struct extent_buffer *node;
5035 char namebuf[BTRFS_NAME_LEN] = {0};
5045 int need_research = 0;
5053 /* since after repair, path and the dir item may be changed */
5054 if (need_research) {
5056 btrfs_release_path(path);
5057 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058 /* the item was deleted, let path point to the last checked item */
5060 if (path->slots[0] == 0)
5061 btrfs_prev_leaf(root, path);
5069 location.objectid = ref_key->objectid;
5070 location.type = BTRFS_INODE_ITEM_KEY;
5071 location.offset = 0;
5072 node = path->nodes[0];
5073 slot = path->slots[0];
5075 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077 total = btrfs_item_size_nr(node, slot);
5080 /* Update inode ref count */
5083 index = btrfs_inode_ref_index(node, ref);
5084 name_len = btrfs_inode_ref_name_len(node, ref);
5086 if (name_len <= BTRFS_NAME_LEN) {
5089 len = BTRFS_NAME_LEN;
5090 warning("root %llu INODE_REF[%llu %llu] name too long",
5091 root->objectid, ref_key->objectid, ref_key->offset);
5094 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5096 /* copy the first name found to name_ret */
5097 if (refs == 1 && name_ret) {
5098 memcpy(name_ret, namebuf, len);
5102 /* Check root dir ref */
5103 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104 if (index != 0 || len != strlen("..") ||
5105 strncmp("..", namebuf, len) ||
5106 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107 /* set err bits then repair will delete the ref */
5108 err |= DIR_INDEX_MISSING;
5109 err |= DIR_ITEM_MISSING;
5114 /* Find related DIR_INDEX */
5115 key.objectid = ref_key->offset;
5116 key.type = BTRFS_DIR_INDEX_KEY;
5118 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119 imode_to_type(mode));
5121 /* Find related dir_item */
5122 key.objectid = ref_key->offset;
5123 key.type = BTRFS_DIR_ITEM_KEY;
5124 key.offset = btrfs_name_hash(namebuf, len);
5125 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126 imode_to_type(mode));
5128 if (tmp_err && repair) {
5129 ret = repair_ternary_lowmem(root, ref_key->offset,
5130 ref_key->objectid, index, namebuf,
5131 name_len, imode_to_type(mode),
5138 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139 imode_to_type(mode), tmp_err);
5141 len = sizeof(*ref) + name_len;
5142 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5153 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154 * DIR_ITEM/DIR_INDEX.
5156 * @root: the root of the fs/file tree
5157 * @ref_key: the key of the INODE_EXTREF
5158 * @refs: the count of INODE_EXTREF
5159 * @mode: the st_mode of INODE_ITEM
5161 * Return 0 if no error occurred.
5163 static int check_inode_extref(struct btrfs_root *root,
5164 struct btrfs_key *ref_key,
5165 struct extent_buffer *node, int slot, u64 *refs,
5168 struct btrfs_key key;
5169 struct btrfs_key location;
5170 struct btrfs_inode_extref *extref;
5171 char namebuf[BTRFS_NAME_LEN] = {0};
5181 location.objectid = ref_key->objectid;
5182 location.type = BTRFS_INODE_ITEM_KEY;
5183 location.offset = 0;
5185 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186 total = btrfs_item_size_nr(node, slot);
5189 /* update inode ref count */
5191 name_len = btrfs_inode_extref_name_len(node, extref);
5192 index = btrfs_inode_extref_index(node, extref);
5193 parent = btrfs_inode_extref_parent(node, extref);
5194 if (name_len <= BTRFS_NAME_LEN) {
5197 len = BTRFS_NAME_LEN;
5198 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199 root->objectid, ref_key->objectid, ref_key->offset);
5201 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5203 /* Check root dir ref name */
5204 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206 root->objectid, ref_key->objectid, ref_key->offset,
5208 err |= ROOT_DIR_ERROR;
5211 /* find related dir_index */
5212 key.objectid = parent;
5213 key.type = BTRFS_DIR_INDEX_KEY;
5215 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5218 /* find related dir_item */
5219 key.objectid = parent;
5220 key.type = BTRFS_DIR_ITEM_KEY;
5221 key.offset = btrfs_name_hash(namebuf, len);
5222 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5225 len = sizeof(*extref) + name_len;
5226 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5236 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237 * DIR_ITEM/DIR_INDEX match.
5238 * Return with @index_ret.
5240 * @root: the root of the fs/file tree
5241 * @key: the key of the INODE_REF/INODE_EXTREF
5242 * @name: the name in the INODE_REF/INODE_EXTREF
5243 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5244 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5245 * value (64)-1 means do not check index
5246 * @ext_ref: the EXTENDED_IREF feature
5248 * Return 0 if no error occurred.
5249 * Return >0 for error bitmap
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252 char *name, int namelen, u64 *index_ret,
5253 unsigned int ext_ref)
5255 struct btrfs_path path;
5256 struct btrfs_inode_ref *ref;
5257 struct btrfs_inode_extref *extref;
5258 struct extent_buffer *node;
5259 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5272 btrfs_init_path(&path);
5273 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5275 ret = INODE_REF_MISSING;
5279 node = path.nodes[0];
5280 slot = path.slots[0];
5282 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283 total = btrfs_item_size_nr(node, slot);
5285 /* Iterate all entry of INODE_REF */
5286 while (cur < total) {
5287 ret = INODE_REF_MISSING;
5289 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290 ref_index = btrfs_inode_ref_index(node, ref);
5291 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5294 if (cur + sizeof(*ref) + ref_namelen > total ||
5295 ref_namelen > BTRFS_NAME_LEN) {
5296 warning("root %llu INODE %s[%llu %llu] name too long",
5298 key->type == BTRFS_INODE_REF_KEY ?
5300 key->objectid, key->offset);
5302 if (cur + sizeof(*ref) > total)
5304 len = min_t(u32, total - cur - sizeof(*ref),
5310 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5313 if (len != namelen || strncmp(ref_namebuf, name, len))
5316 *index_ret = ref_index;
5320 len = sizeof(*ref) + ref_namelen;
5321 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5326 /* Skip if not support EXTENDED_IREF feature */
5330 btrfs_release_path(&path);
5331 btrfs_init_path(&path);
5333 dir_id = key->offset;
5334 key->type = BTRFS_INODE_EXTREF_KEY;
5335 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5337 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5339 ret = INODE_REF_MISSING;
5343 node = path.nodes[0];
5344 slot = path.slots[0];
5346 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5348 total = btrfs_item_size_nr(node, slot);
5350 /* Iterate all entry of INODE_EXTREF */
5351 while (cur < total) {
5352 ret = INODE_REF_MISSING;
5354 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355 ref_index = btrfs_inode_extref_index(node, extref);
5356 parent = btrfs_inode_extref_parent(node, extref);
5357 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5360 if (parent != dir_id)
5363 if (ref_namelen <= BTRFS_NAME_LEN) {
5366 len = BTRFS_NAME_LEN;
5367 warning("root %llu INODE %s[%llu %llu] name too long",
5369 key->type == BTRFS_INODE_REF_KEY ?
5371 key->objectid, key->offset);
5373 read_extent_buffer(node, ref_namebuf,
5374 (unsigned long)(extref + 1), len);
5376 if (len != namelen || strncmp(ref_namebuf, name, len))
5379 *index_ret = ref_index;
5384 len = sizeof(*extref) + ref_namelen;
5385 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5390 btrfs_release_path(&path);
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395 u64 ino, u64 index, const char *namebuf,
5396 int name_len, u8 filetype, int err)
5398 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400 root->objectid, key->objectid, key->offset, namebuf,
5402 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5405 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407 root->objectid, key->objectid, index, namebuf, filetype,
5408 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5411 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5413 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414 root->objectid, ino, index, namebuf, filetype,
5415 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5418 if (err & INODE_REF_MISSING)
5420 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421 root->objectid, ino, key->objectid, namebuf, filetype);
5426 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5428 * Returns error after repair
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431 u64 index, u8 filetype, char *namebuf, u32 name_len,
5436 if (err & INODE_ITEM_MISSING) {
5437 ret = repair_inode_item_missing(root, ino, filetype);
5439 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5442 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444 name_len, filetype, err);
5446 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448 err &= ~(INODE_REF_MISSING);
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5457 struct btrfs_key key;
5458 struct btrfs_path path;
5460 struct btrfs_dir_item *di;
5470 key.offset = (u64)-1;
5472 btrfs_init_path(&path);
5473 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5478 /* if found, go to spacial case */
5483 ret = btrfs_previous_item(root, &path, ino, type);
5491 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5493 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5495 while (cur < total) {
5496 len = btrfs_dir_name_len(path.nodes[0], di);
5497 if (len > BTRFS_NAME_LEN)
5498 len = BTRFS_NAME_LEN;
5501 len += btrfs_dir_data_len(path.nodes[0], di);
5503 di = (struct btrfs_dir_item *)((char *)di + len);
5509 btrfs_release_path(&path);
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5520 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5524 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5528 *size = item_size + index_size;
5532 error("failed to count root %llu INODE[%llu] root size",
5533 root->objectid, ino);
5538 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5541 * @root: the root of the fs/file tree
5542 * @key: the key of the INODE_REF/INODE_EXTREF
5544 * @size: the st_size of the INODE_ITEM
5545 * @ext_ref: the EXTENDED_IREF feature
5547 * Return 0 if no error occurred.
5548 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551 struct btrfs_path *path, u64 *size,
5552 unsigned int ext_ref)
5554 struct btrfs_dir_item *di;
5555 struct btrfs_inode_item *ii;
5556 struct btrfs_key key;
5557 struct btrfs_key location;
5558 struct extent_buffer *node;
5560 char namebuf[BTRFS_NAME_LEN] = {0};
5572 int need_research = 0;
5575 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576 * ignore index check.
5578 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579 index = di_key->offset;
5586 /* since after repair, path and the dir item may be changed */
5587 if (need_research) {
5589 err |= DIR_COUNT_AGAIN;
5590 btrfs_release_path(path);
5591 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592 /* the item was deleted, let path point the last checked item */
5594 if (path->slots[0] == 0)
5595 btrfs_prev_leaf(root, path);
5603 node = path->nodes[0];
5604 slot = path->slots[0];
5606 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607 total = btrfs_item_size_nr(node, slot);
5608 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5610 while (cur < total) {
5611 data_len = btrfs_dir_data_len(node, di);
5614 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5616 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617 di_key->objectid, di_key->offset, data_len);
5619 name_len = btrfs_dir_name_len(node, di);
5620 if (name_len <= BTRFS_NAME_LEN) {
5623 len = BTRFS_NAME_LEN;
5624 warning("root %llu %s[%llu %llu] name too long",
5626 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627 di_key->objectid, di_key->offset);
5629 (*size) += name_len;
5630 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5632 filetype = btrfs_dir_type(node, di);
5634 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635 di_key->offset != btrfs_name_hash(namebuf, len)) {
5637 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638 root->objectid, di_key->objectid, di_key->offset,
5639 namebuf, len, filetype, di_key->offset,
5640 btrfs_name_hash(namebuf, len));
5643 btrfs_dir_item_key_to_cpu(node, di, &location);
5644 /* Ignore related ROOT_ITEM check */
5645 if (location.type == BTRFS_ROOT_ITEM_KEY)
5648 btrfs_release_path(path);
5649 /* Check relative INODE_ITEM(existence/filetype) */
5650 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5652 tmp_err |= INODE_ITEM_MISSING;
5656 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657 struct btrfs_inode_item);
5658 mode = btrfs_inode_mode(path->nodes[0], ii);
5659 if (imode_to_type(mode) != filetype) {
5660 tmp_err |= INODE_ITEM_MISMATCH;
5664 /* Check relative INODE_REF/INODE_EXTREF */
5665 key.objectid = location.objectid;
5666 key.type = BTRFS_INODE_REF_KEY;
5667 key.offset = di_key->objectid;
5668 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5671 /* check relative INDEX/ITEM */
5672 key.objectid = di_key->objectid;
5673 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674 key.type = BTRFS_DIR_INDEX_KEY;
5677 key.type = BTRFS_DIR_ITEM_KEY;
5678 key.offset = btrfs_name_hash(namebuf, name_len);
5681 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682 name_len, filetype);
5683 /* find_dir_item may find index */
5684 if (key.type == BTRFS_DIR_INDEX_KEY)
5688 if (tmp_err && repair) {
5689 ret = repair_dir_item(root, di_key->objectid,
5690 location.objectid, index,
5691 imode_to_type(mode), namebuf,
5693 if (ret != tmp_err) {
5698 btrfs_release_path(path);
5699 print_dir_item_err(root, di_key, location.objectid, index,
5700 namebuf, name_len, filetype, tmp_err);
5702 len = sizeof(*di) + name_len + data_len;
5703 di = (struct btrfs_dir_item *)((char *)di + len);
5706 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708 root->objectid, di_key->objectid,
5715 btrfs_release_path(path);
5716 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5718 err |= ret > 0 ? -ENOENT : ret;
5723 * Wrapper function of btrfs_punch_hole.
5725 * Returns 0 means success.
5726 * Returns not 0 means error.
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5731 struct btrfs_trans_handle *trans;
5734 trans = btrfs_start_transaction(root, 1);
5736 return PTR_ERR(trans);
5738 ret = btrfs_punch_hole(trans, root, ino, start, len);
5740 error("failed to add hole [%llu, %llu] in inode [%llu]",
5743 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5746 btrfs_commit_transaction(trans, root);
5751 * Check file extent datasum/hole, update the size of the file extents,
5752 * check and update the last offset of the file extent.
5754 * @root: the root of fs/file tree.
5755 * @fkey: the key of the file extent.
5756 * @nodatasum: INODE_NODATASUM feature.
5757 * @size: the sum of all EXTENT_DATA items size for this inode.
5758 * @end: the offset of the last extent.
5760 * Return 0 if no error occurred.
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763 struct extent_buffer *node, int slot,
5764 unsigned int nodatasum, u64 *size, u64 *end)
5766 struct btrfs_file_extent_item *fi;
5769 u64 extent_num_bytes;
5771 u64 csum_found; /* In byte size, sectorsize aligned */
5772 u64 search_start; /* Logical range start we search for csum */
5773 u64 search_len; /* Logical range len we search for csum */
5774 unsigned int extent_type;
5775 unsigned int is_hole;
5780 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5782 /* Check inline extent */
5783 extent_type = btrfs_file_extent_type(node, fi);
5784 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785 struct btrfs_item *e = btrfs_item_nr(slot);
5786 u32 item_inline_len;
5788 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790 compressed = btrfs_file_extent_compression(node, fi);
5791 if (extent_num_bytes == 0) {
5793 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794 root->objectid, fkey->objectid, fkey->offset);
5795 err |= FILE_EXTENT_ERROR;
5797 if (!compressed && extent_num_bytes != item_inline_len) {
5799 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800 root->objectid, fkey->objectid, fkey->offset,
5801 extent_num_bytes, item_inline_len);
5802 err |= FILE_EXTENT_ERROR;
5804 *end += extent_num_bytes;
5805 *size += extent_num_bytes;
5809 /* Check extent type */
5810 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812 err |= FILE_EXTENT_ERROR;
5813 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814 root->objectid, fkey->objectid, fkey->offset);
5818 /* Check REG_EXTENT/PREALLOC_EXTENT */
5819 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822 extent_offset = btrfs_file_extent_offset(node, fi);
5823 compressed = btrfs_file_extent_compression(node, fi);
5824 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5827 * Check EXTENT_DATA csum
5829 * For plain (uncompressed) extent, we should only check the range
5830 * we're referring to, as it's possible that part of prealloc extent
5831 * has been written, and has csum:
5833 * |<--- Original large preallocated extent A ---->|
5834 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5837 * For compressed extent, we should check the whole range.
5840 search_start = disk_bytenr + extent_offset;
5841 search_len = extent_num_bytes;
5843 search_start = disk_bytenr;
5844 search_len = disk_num_bytes;
5846 ret = count_csum_range(root, search_start, search_len, &csum_found);
5847 if (csum_found > 0 && nodatasum) {
5848 err |= ODD_CSUM_ITEM;
5849 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850 root->objectid, fkey->objectid, fkey->offset);
5851 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852 !is_hole && (ret < 0 || csum_found < search_len)) {
5853 err |= CSUM_ITEM_MISSING;
5854 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855 root->objectid, fkey->objectid, fkey->offset,
5856 csum_found, search_len);
5857 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858 err |= ODD_CSUM_ITEM;
5859 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860 root->objectid, fkey->objectid, fkey->offset, csum_found);
5863 /* Check EXTENT_DATA hole */
5864 if (!no_holes && *end != fkey->offset) {
5866 ret = punch_extent_hole(root, fkey->objectid,
5867 *end, fkey->offset - *end);
5868 if (!repair || ret) {
5869 err |= FILE_EXTENT_ERROR;
5871 "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
5872 root->objectid, fkey->objectid, fkey->offset, *end);
5876 *end += extent_num_bytes;
5878 *size += extent_num_bytes;
5884 * Set inode item nbytes to @nbytes
5886 * Returns 0 on success
5887 * Returns != 0 on error
5889 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5890 struct btrfs_path *path,
5891 u64 ino, u64 nbytes)
5893 struct btrfs_trans_handle *trans;
5894 struct btrfs_inode_item *ii;
5895 struct btrfs_key key;
5896 struct btrfs_key research_key;
5900 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5903 key.type = BTRFS_INODE_ITEM_KEY;
5906 trans = btrfs_start_transaction(root, 1);
5907 if (IS_ERR(trans)) {
5908 ret = PTR_ERR(trans);
5913 btrfs_release_path(path);
5914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5922 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5923 struct btrfs_inode_item);
5924 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5925 btrfs_mark_buffer_dirty(path->nodes[0]);
5927 btrfs_commit_transaction(trans, root);
5930 error("failed to set nbytes in inode %llu root %llu",
5931 ino, root->root_key.objectid);
5933 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5934 root->root_key.objectid, nbytes);
5937 btrfs_release_path(path);
5938 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5945 * Set directory inode isize to @isize.
5947 * Returns 0 on success.
5948 * Returns != 0 on error.
5950 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5951 struct btrfs_path *path,
5954 struct btrfs_trans_handle *trans;
5955 struct btrfs_inode_item *ii;
5956 struct btrfs_key key;
5957 struct btrfs_key research_key;
5961 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5964 key.type = BTRFS_INODE_ITEM_KEY;
5967 trans = btrfs_start_transaction(root, 1);
5968 if (IS_ERR(trans)) {
5969 ret = PTR_ERR(trans);
5974 btrfs_release_path(path);
5975 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5983 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5984 struct btrfs_inode_item);
5985 btrfs_set_inode_size(path->nodes[0], ii, isize);
5986 btrfs_mark_buffer_dirty(path->nodes[0]);
5988 btrfs_commit_transaction(trans, root);
5991 error("failed to set isize in inode %llu root %llu",
5992 ino, root->root_key.objectid);
5994 printf("Set isize in inode %llu root %llu to %llu\n",
5995 ino, root->root_key.objectid, isize);
5997 btrfs_release_path(path);
5998 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6005 * Wrapper function for btrfs_add_orphan_item().
6007 * Returns 0 on success.
6008 * Returns != 0 on error.
6010 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6011 struct btrfs_path *path, u64 ino)
6013 struct btrfs_trans_handle *trans;
6014 struct btrfs_key research_key;
6018 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6020 trans = btrfs_start_transaction(root, 1);
6021 if (IS_ERR(trans)) {
6022 ret = PTR_ERR(trans);
6027 btrfs_release_path(path);
6028 ret = btrfs_add_orphan_item(trans, root, path, ino);
6030 btrfs_commit_transaction(trans, root);
6033 error("failed to add inode %llu as orphan item root %llu",
6034 ino, root->root_key.objectid);
6036 printf("Added inode %llu as orphan item root %llu\n",
6037 ino, root->root_key.objectid);
6039 btrfs_release_path(path);
6040 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6046 /* Set inode_item nlink to @ref_count.
6047 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6049 * Returns 0 on success
6051 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6052 struct btrfs_path *path, u64 ino,
6053 const char *name, u32 namelen,
6054 u64 ref_count, u8 filetype, u64 *nlink)
6056 struct btrfs_trans_handle *trans;
6057 struct btrfs_inode_item *ii;
6058 struct btrfs_key key;
6059 struct btrfs_key old_key;
6060 char namebuf[BTRFS_NAME_LEN] = {0};
6066 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6068 if (name && namelen) {
6069 ASSERT(namelen <= BTRFS_NAME_LEN);
6070 memcpy(namebuf, name, namelen);
6073 sprintf(namebuf, "%llu", ino);
6074 name_len = count_digits(ino);
6075 printf("Can't find file name for inode %llu, use %s instead\n",
6079 trans = btrfs_start_transaction(root, 1);
6080 if (IS_ERR(trans)) {
6081 ret = PTR_ERR(trans);
6085 btrfs_release_path(path);
6086 /* if refs is 0, put it into lostfound */
6087 if (ref_count == 0) {
6088 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6089 name_len, filetype, &ref_count);
6094 /* reset inode_item's nlink to ref_count */
6096 key.type = BTRFS_INODE_ITEM_KEY;
6099 btrfs_release_path(path);
6100 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6106 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6107 struct btrfs_inode_item);
6108 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6109 btrfs_mark_buffer_dirty(path->nodes[0]);
6114 btrfs_commit_transaction(trans, root);
6118 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6119 root->objectid, ino, namebuf, filetype);
6121 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6122 root->objectid, ino, namebuf, filetype);
6125 btrfs_release_path(path);
6126 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6133 * Check INODE_ITEM and related ITEMs (the same inode number)
6134 * 1. check link count
6135 * 2. check inode ref/extref
6136 * 3. check dir item/index
6138 * @ext_ref: the EXTENDED_IREF feature
6140 * Return 0 if no error occurred.
6141 * Return >0 for error or hit the traversal is done(by error bitmap)
6143 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6144 unsigned int ext_ref)
6146 struct extent_buffer *node;
6147 struct btrfs_inode_item *ii;
6148 struct btrfs_key key;
6149 struct btrfs_key last_key;
6158 u64 extent_size = 0;
6160 unsigned int nodatasum;
6164 char namebuf[BTRFS_NAME_LEN] = {0};
6167 node = path->nodes[0];
6168 slot = path->slots[0];
6170 btrfs_item_key_to_cpu(node, &key, slot);
6171 inode_id = key.objectid;
6173 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6174 ret = btrfs_next_item(root, path);
6180 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6181 isize = btrfs_inode_size(node, ii);
6182 nbytes = btrfs_inode_nbytes(node, ii);
6183 mode = btrfs_inode_mode(node, ii);
6184 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6185 nlink = btrfs_inode_nlink(node, ii);
6186 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6189 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6190 ret = btrfs_next_item(root, path);
6192 /* out will fill 'err' rusing current statistics */
6194 } else if (ret > 0) {
6199 node = path->nodes[0];
6200 slot = path->slots[0];
6201 btrfs_item_key_to_cpu(node, &key, slot);
6202 if (key.objectid != inode_id)
6206 case BTRFS_INODE_REF_KEY:
6207 ret = check_inode_ref(root, &key, path, namebuf,
6208 &name_len, &refs, mode);
6211 case BTRFS_INODE_EXTREF_KEY:
6212 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6213 warning("root %llu EXTREF[%llu %llu] isn't supported",
6214 root->objectid, key.objectid,
6216 ret = check_inode_extref(root, &key, node, slot, &refs,
6220 case BTRFS_DIR_ITEM_KEY:
6221 case BTRFS_DIR_INDEX_KEY:
6223 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6224 root->objectid, inode_id,
6225 imode_to_type(mode), key.objectid,
6228 ret = check_dir_item(root, &key, path, &size, ext_ref);
6231 case BTRFS_EXTENT_DATA_KEY:
6233 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6234 root->objectid, inode_id, key.objectid,
6237 ret = check_file_extent(root, &key, node, slot,
6238 nodatasum, &extent_size,
6242 case BTRFS_XATTR_ITEM_KEY:
6245 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6246 key.objectid, key.type, key.offset);
6251 if (err & LAST_ITEM) {
6252 btrfs_release_path(path);
6253 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6258 /* verify INODE_ITEM nlink/isize/nbytes */
6260 if (repair && (err & DIR_COUNT_AGAIN)) {
6261 err &= ~DIR_COUNT_AGAIN;
6262 count_dir_isize(root, inode_id, &size);
6265 if ((nlink != 1 || refs != 1) && repair) {
6266 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6267 namebuf, name_len, refs, imode_to_type(mode),
6272 err |= LINK_COUNT_ERROR;
6273 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6274 root->objectid, inode_id, nlink);
6278 * Just a warning, as dir inode nbytes is just an
6279 * instructive value.
6281 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6282 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6283 root->objectid, inode_id,
6284 root->fs_info->nodesize);
6287 if (isize != size) {
6289 ret = repair_dir_isize_lowmem(root, path,
6291 if (!repair || ret) {
6294 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6295 root->objectid, inode_id, isize, size);
6299 if (nlink != refs) {
6301 ret = repair_inode_nlinks_lowmem(root, path,
6302 inode_id, namebuf, name_len, refs,
6303 imode_to_type(mode), &nlink);
6304 if (!repair || ret) {
6305 err |= LINK_COUNT_ERROR;
6307 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6308 root->objectid, inode_id, nlink, refs);
6310 } else if (!nlink) {
6312 ret = repair_inode_orphan_item_lowmem(root,
6314 if (!repair || ret) {
6316 error("root %llu INODE[%llu] is orphan item",
6317 root->objectid, inode_id);
6321 if (!nbytes && !no_holes && extent_end < isize) {
6323 ret = punch_extent_hole(root, inode_id,
6324 extent_end, isize - extent_end);
6325 if (!repair || ret) {
6326 err |= NBYTES_ERROR;
6328 "root %llu INODE[%llu] size %llu should have a file extent hole",
6329 root->objectid, inode_id, isize);
6333 if (nbytes != extent_size) {
6335 ret = repair_inode_nbytes_lowmem(root, path,
6336 inode_id, extent_size);
6337 if (!repair || ret) {
6338 err |= NBYTES_ERROR;
6340 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6341 root->objectid, inode_id, nbytes,
6347 if (err & LAST_ITEM)
6348 btrfs_next_item(root, path);
6353 * Insert the missing inode item and inode ref.
6355 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6356 * Root dir should be handled specially because root dir is the root of fs.
6358 * returns err (>0 or 0) after repair
6360 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6362 struct btrfs_trans_handle *trans;
6363 struct btrfs_key key;
6364 struct btrfs_path path;
6365 int filetype = BTRFS_FT_DIR;
6368 btrfs_init_path(&path);
6370 if (err & INODE_REF_MISSING) {
6371 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6372 key.type = BTRFS_INODE_REF_KEY;
6373 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6375 trans = btrfs_start_transaction(root, 1);
6376 if (IS_ERR(trans)) {
6377 ret = PTR_ERR(trans);
6381 btrfs_release_path(&path);
6382 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6386 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6387 BTRFS_FIRST_FREE_OBJECTID,
6388 BTRFS_FIRST_FREE_OBJECTID, 0);
6392 printf("Add INODE_REF[%llu %llu] name %s\n",
6393 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6395 err &= ~INODE_REF_MISSING;
6398 error("fail to insert first inode's ref");
6399 btrfs_commit_transaction(trans, root);
6402 if (err & INODE_ITEM_MISSING) {
6403 ret = repair_inode_item_missing(root,
6404 BTRFS_FIRST_FREE_OBJECTID, filetype);
6407 err &= ~INODE_ITEM_MISSING;
6411 error("fail to repair first inode");
6412 btrfs_release_path(&path);
6417 * check first root dir's inode_item and inode_ref
6419 * returns 0 means no error
6420 * returns >0 means error
6421 * returns <0 means fatal error
6423 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6425 struct btrfs_path path;
6426 struct btrfs_key key;
6427 struct btrfs_inode_item *ii;
6433 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6434 key.type = BTRFS_INODE_ITEM_KEY;
6437 /* For root being dropped, we don't need to check first inode */
6438 if (btrfs_root_refs(&root->root_item) == 0 &&
6439 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6440 BTRFS_FIRST_FREE_OBJECTID)
6443 btrfs_init_path(&path);
6444 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6449 err |= INODE_ITEM_MISSING;
6451 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6452 struct btrfs_inode_item);
6453 mode = btrfs_inode_mode(path.nodes[0], ii);
6454 if (imode_to_type(mode) != BTRFS_FT_DIR)
6455 err |= INODE_ITEM_MISMATCH;
6458 /* lookup first inode ref */
6459 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6460 key.type = BTRFS_INODE_REF_KEY;
6461 /* special index value */
6464 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6470 btrfs_release_path(&path);
6473 err = repair_fs_first_inode(root, err);
6475 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6476 error("root dir INODE_ITEM is %s",
6477 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6478 if (err & INODE_REF_MISSING)
6479 error("root dir INODE_REF is missing");
6481 return ret < 0 ? ret : err;
6484 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6485 u64 parent, u64 root)
6487 struct rb_node *node;
6488 struct tree_backref *back = NULL;
6489 struct tree_backref match = {
6496 match.parent = parent;
6497 match.node.full_backref = 1;
6502 node = rb_search(&rec->backref_tree, &match.node.node,
6503 (rb_compare_keys)compare_extent_backref, NULL);
6505 back = to_tree_backref(rb_node_to_extent_backref(node));
6510 static struct data_backref *find_data_backref(struct extent_record *rec,
6511 u64 parent, u64 root,
6512 u64 owner, u64 offset,
6514 u64 disk_bytenr, u64 bytes)
6516 struct rb_node *node;
6517 struct data_backref *back = NULL;
6518 struct data_backref match = {
6525 .found_ref = found_ref,
6526 .disk_bytenr = disk_bytenr,
6530 match.parent = parent;
6531 match.node.full_backref = 1;
6536 node = rb_search(&rec->backref_tree, &match.node.node,
6537 (rb_compare_keys)compare_extent_backref, NULL);
6539 back = to_data_backref(rb_node_to_extent_backref(node));
6544 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6545 * blocks and integrity of fs tree items.
6547 * @root: the root of the tree to be checked.
6548 * @ext_ref feature EXTENDED_IREF is enable or not.
6549 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6550 * otherwise means check fs tree(s) items relationship and
6551 * @root MUST be a fs tree root.
6552 * Returns 0 represents OK.
6553 * Returns not 0 represents error.
6555 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6556 struct btrfs_root *root, unsigned int ext_ref,
6560 struct btrfs_path path;
6561 struct node_refs nrefs;
6562 struct btrfs_root_item *root_item = &root->root_item;
6567 memset(&nrefs, 0, sizeof(nrefs));
6570 * We need to manually check the first inode item (256)
6571 * As the following traversal function will only start from
6572 * the first inode item in the leaf, if inode item (256) is
6573 * missing we will skip it forever.
6575 ret = check_fs_first_inode(root, ext_ref);
6581 level = btrfs_header_level(root->node);
6582 btrfs_init_path(&path);
6584 if (btrfs_root_refs(root_item) > 0 ||
6585 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6586 path.nodes[level] = root->node;
6587 path.slots[level] = 0;
6588 extent_buffer_get(root->node);
6590 struct btrfs_key key;
6592 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6593 level = root_item->drop_level;
6594 path.lowest_level = level;
6595 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6602 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6603 ext_ref, check_all);
6607 /* if ret is negative, walk shall stop */
6613 ret = walk_up_tree_v2(root, &path, &level);
6615 /* Normal exit, reset ret to err */
6622 btrfs_release_path(&path);
6626 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6629 * Iterate all items in the tree and call check_inode_item() to check.
6631 * @root: the root of the tree to be checked.
6632 * @ext_ref: the EXTENDED_IREF feature
6634 * Return 0 if no error found.
6635 * Return <0 for error.
6637 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6639 reset_cached_block_groups(root->fs_info);
6640 return check_btrfs_root(NULL, root, ext_ref, 0);
6644 * Find the relative ref for root_ref and root_backref.
6646 * @root: the root of the root tree.
6647 * @ref_key: the key of the root ref.
6649 * Return 0 if no error occurred.
6651 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6652 struct extent_buffer *node, int slot)
6654 struct btrfs_path path;
6655 struct btrfs_key key;
6656 struct btrfs_root_ref *ref;
6657 struct btrfs_root_ref *backref;
6658 char ref_name[BTRFS_NAME_LEN] = {0};
6659 char backref_name[BTRFS_NAME_LEN] = {0};
6665 u32 backref_namelen;
6670 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6671 ref_dirid = btrfs_root_ref_dirid(node, ref);
6672 ref_seq = btrfs_root_ref_sequence(node, ref);
6673 ref_namelen = btrfs_root_ref_name_len(node, ref);
6675 if (ref_namelen <= BTRFS_NAME_LEN) {
6678 len = BTRFS_NAME_LEN;
6679 warning("%s[%llu %llu] ref_name too long",
6680 ref_key->type == BTRFS_ROOT_REF_KEY ?
6681 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6684 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6686 /* Find relative root_ref */
6687 key.objectid = ref_key->offset;
6688 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6689 key.offset = ref_key->objectid;
6691 btrfs_init_path(&path);
6692 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6694 err |= ROOT_REF_MISSING;
6695 error("%s[%llu %llu] couldn't find relative ref",
6696 ref_key->type == BTRFS_ROOT_REF_KEY ?
6697 "ROOT_REF" : "ROOT_BACKREF",
6698 ref_key->objectid, ref_key->offset);
6702 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6703 struct btrfs_root_ref);
6704 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6705 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6706 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6708 if (backref_namelen <= BTRFS_NAME_LEN) {
6709 len = backref_namelen;
6711 len = BTRFS_NAME_LEN;
6712 warning("%s[%llu %llu] ref_name too long",
6713 key.type == BTRFS_ROOT_REF_KEY ?
6714 "ROOT_REF" : "ROOT_BACKREF",
6715 key.objectid, key.offset);
6717 read_extent_buffer(path.nodes[0], backref_name,
6718 (unsigned long)(backref + 1), len);
6720 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6721 ref_namelen != backref_namelen ||
6722 strncmp(ref_name, backref_name, len)) {
6723 err |= ROOT_REF_MISMATCH;
6724 error("%s[%llu %llu] mismatch relative ref",
6725 ref_key->type == BTRFS_ROOT_REF_KEY ?
6726 "ROOT_REF" : "ROOT_BACKREF",
6727 ref_key->objectid, ref_key->offset);
6730 btrfs_release_path(&path);
6735 * Check all fs/file tree in low_memory mode.
6737 * 1. for fs tree root item, call check_fs_root_v2()
6738 * 2. for fs tree root ref/backref, call check_root_ref()
6740 * Return 0 if no error occurred.
6742 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6744 struct btrfs_root *tree_root = fs_info->tree_root;
6745 struct btrfs_root *cur_root = NULL;
6746 struct btrfs_path path;
6747 struct btrfs_key key;
6748 struct extent_buffer *node;
6749 unsigned int ext_ref;
6754 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6756 btrfs_init_path(&path);
6757 key.objectid = BTRFS_FS_TREE_OBJECTID;
6759 key.type = BTRFS_ROOT_ITEM_KEY;
6761 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6765 } else if (ret > 0) {
6771 node = path.nodes[0];
6772 slot = path.slots[0];
6773 btrfs_item_key_to_cpu(node, &key, slot);
6774 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6776 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6777 fs_root_objectid(key.objectid)) {
6778 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6779 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6782 key.offset = (u64)-1;
6783 cur_root = btrfs_read_fs_root(fs_info, &key);
6786 if (IS_ERR(cur_root)) {
6787 error("Fail to read fs/subvol tree: %lld",
6793 ret = check_fs_root_v2(cur_root, ext_ref);
6796 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6797 btrfs_free_fs_root(cur_root);
6798 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6799 key.type == BTRFS_ROOT_BACKREF_KEY) {
6800 ret = check_root_ref(tree_root, &key, node, slot);
6804 ret = btrfs_next_item(tree_root, &path);
6814 btrfs_release_path(&path);
6818 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6819 struct cache_tree *root_cache)
6823 if (!ctx.progress_enabled)
6824 fprintf(stderr, "checking fs roots\n");
6825 if (check_mode == CHECK_MODE_LOWMEM)
6826 ret = check_fs_roots_v2(fs_info);
6828 ret = check_fs_roots(fs_info, root_cache);
6833 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6835 struct extent_backref *back, *tmp;
6836 struct tree_backref *tback;
6837 struct data_backref *dback;
6841 rbtree_postorder_for_each_entry_safe(back, tmp,
6842 &rec->backref_tree, node) {
6843 if (!back->found_extent_tree) {
6847 if (back->is_data) {
6848 dback = to_data_backref(back);
6849 fprintf(stderr, "Data backref %llu %s %llu"
6850 " owner %llu offset %llu num_refs %lu"
6851 " not found in extent tree\n",
6852 (unsigned long long)rec->start,
6853 back->full_backref ?
6855 back->full_backref ?
6856 (unsigned long long)dback->parent:
6857 (unsigned long long)dback->root,
6858 (unsigned long long)dback->owner,
6859 (unsigned long long)dback->offset,
6860 (unsigned long)dback->num_refs);
6862 tback = to_tree_backref(back);
6863 fprintf(stderr, "Tree backref %llu parent %llu"
6864 " root %llu not found in extent tree\n",
6865 (unsigned long long)rec->start,
6866 (unsigned long long)tback->parent,
6867 (unsigned long long)tback->root);
6870 if (!back->is_data && !back->found_ref) {
6874 tback = to_tree_backref(back);
6875 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6876 (unsigned long long)rec->start,
6877 back->full_backref ? "parent" : "root",
6878 back->full_backref ?
6879 (unsigned long long)tback->parent :
6880 (unsigned long long)tback->root, back);
6882 if (back->is_data) {
6883 dback = to_data_backref(back);
6884 if (dback->found_ref != dback->num_refs) {
6888 fprintf(stderr, "Incorrect local backref count"
6889 " on %llu %s %llu owner %llu"
6890 " offset %llu found %u wanted %u back %p\n",
6891 (unsigned long long)rec->start,
6892 back->full_backref ?
6894 back->full_backref ?
6895 (unsigned long long)dback->parent:
6896 (unsigned long long)dback->root,
6897 (unsigned long long)dback->owner,
6898 (unsigned long long)dback->offset,
6899 dback->found_ref, dback->num_refs, back);
6901 if (dback->disk_bytenr != rec->start) {
6905 fprintf(stderr, "Backref disk bytenr does not"
6906 " match extent record, bytenr=%llu, "
6907 "ref bytenr=%llu\n",
6908 (unsigned long long)rec->start,
6909 (unsigned long long)dback->disk_bytenr);
6912 if (dback->bytes != rec->nr) {
6916 fprintf(stderr, "Backref bytes do not match "
6917 "extent backref, bytenr=%llu, ref "
6918 "bytes=%llu, backref bytes=%llu\n",
6919 (unsigned long long)rec->start,
6920 (unsigned long long)rec->nr,
6921 (unsigned long long)dback->bytes);
6924 if (!back->is_data) {
6927 dback = to_data_backref(back);
6928 found += dback->found_ref;
6931 if (found != rec->refs) {
6935 fprintf(stderr, "Incorrect global backref count "
6936 "on %llu found %llu wanted %llu\n",
6937 (unsigned long long)rec->start,
6938 (unsigned long long)found,
6939 (unsigned long long)rec->refs);
6945 static void __free_one_backref(struct rb_node *node)
6947 struct extent_backref *back = rb_node_to_extent_backref(node);
6952 static void free_all_extent_backrefs(struct extent_record *rec)
6954 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6957 static void free_extent_record_cache(struct cache_tree *extent_cache)
6959 struct cache_extent *cache;
6960 struct extent_record *rec;
6963 cache = first_cache_extent(extent_cache);
6966 rec = container_of(cache, struct extent_record, cache);
6967 remove_cache_extent(extent_cache, cache);
6968 free_all_extent_backrefs(rec);
6973 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6974 struct extent_record *rec)
6976 if (rec->content_checked && rec->owner_ref_checked &&
6977 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6978 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6979 !rec->bad_full_backref && !rec->crossing_stripes &&
6980 !rec->wrong_chunk_type) {
6981 remove_cache_extent(extent_cache, &rec->cache);
6982 free_all_extent_backrefs(rec);
6983 list_del_init(&rec->list);
6989 static int check_owner_ref(struct btrfs_root *root,
6990 struct extent_record *rec,
6991 struct extent_buffer *buf)
6993 struct extent_backref *node, *tmp;
6994 struct tree_backref *back;
6995 struct btrfs_root *ref_root;
6996 struct btrfs_key key;
6997 struct btrfs_path path;
6998 struct extent_buffer *parent;
7003 rbtree_postorder_for_each_entry_safe(node, tmp,
7004 &rec->backref_tree, node) {
7007 if (!node->found_ref)
7009 if (node->full_backref)
7011 back = to_tree_backref(node);
7012 if (btrfs_header_owner(buf) == back->root)
7015 BUG_ON(rec->is_root);
7017 /* try to find the block by search corresponding fs tree */
7018 key.objectid = btrfs_header_owner(buf);
7019 key.type = BTRFS_ROOT_ITEM_KEY;
7020 key.offset = (u64)-1;
7022 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7023 if (IS_ERR(ref_root))
7026 level = btrfs_header_level(buf);
7028 btrfs_item_key_to_cpu(buf, &key, 0);
7030 btrfs_node_key_to_cpu(buf, &key, 0);
7032 btrfs_init_path(&path);
7033 path.lowest_level = level + 1;
7034 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7038 parent = path.nodes[level + 1];
7039 if (parent && buf->start == btrfs_node_blockptr(parent,
7040 path.slots[level + 1]))
7043 btrfs_release_path(&path);
7044 return found ? 0 : 1;
7047 static int is_extent_tree_record(struct extent_record *rec)
7049 struct extent_backref *node, *tmp;
7050 struct tree_backref *back;
7053 rbtree_postorder_for_each_entry_safe(node, tmp,
7054 &rec->backref_tree, node) {
7057 back = to_tree_backref(node);
7058 if (node->full_backref)
7060 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7067 static int record_bad_block_io(struct btrfs_fs_info *info,
7068 struct cache_tree *extent_cache,
7071 struct extent_record *rec;
7072 struct cache_extent *cache;
7073 struct btrfs_key key;
7075 cache = lookup_cache_extent(extent_cache, start, len);
7079 rec = container_of(cache, struct extent_record, cache);
7080 if (!is_extent_tree_record(rec))
7083 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7084 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7087 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7088 struct extent_buffer *buf, int slot)
7090 if (btrfs_header_level(buf)) {
7091 struct btrfs_key_ptr ptr1, ptr2;
7093 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7094 sizeof(struct btrfs_key_ptr));
7095 read_extent_buffer(buf, &ptr2,
7096 btrfs_node_key_ptr_offset(slot + 1),
7097 sizeof(struct btrfs_key_ptr));
7098 write_extent_buffer(buf, &ptr1,
7099 btrfs_node_key_ptr_offset(slot + 1),
7100 sizeof(struct btrfs_key_ptr));
7101 write_extent_buffer(buf, &ptr2,
7102 btrfs_node_key_ptr_offset(slot),
7103 sizeof(struct btrfs_key_ptr));
7105 struct btrfs_disk_key key;
7106 btrfs_node_key(buf, &key, 0);
7107 btrfs_fixup_low_keys(root, path, &key,
7108 btrfs_header_level(buf) + 1);
7111 struct btrfs_item *item1, *item2;
7112 struct btrfs_key k1, k2;
7113 char *item1_data, *item2_data;
7114 u32 item1_offset, item2_offset, item1_size, item2_size;
7116 item1 = btrfs_item_nr(slot);
7117 item2 = btrfs_item_nr(slot + 1);
7118 btrfs_item_key_to_cpu(buf, &k1, slot);
7119 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7120 item1_offset = btrfs_item_offset(buf, item1);
7121 item2_offset = btrfs_item_offset(buf, item2);
7122 item1_size = btrfs_item_size(buf, item1);
7123 item2_size = btrfs_item_size(buf, item2);
7125 item1_data = malloc(item1_size);
7128 item2_data = malloc(item2_size);
7134 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7135 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7137 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7138 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7142 btrfs_set_item_offset(buf, item1, item2_offset);
7143 btrfs_set_item_offset(buf, item2, item1_offset);
7144 btrfs_set_item_size(buf, item1, item2_size);
7145 btrfs_set_item_size(buf, item2, item1_size);
7147 path->slots[0] = slot;
7148 btrfs_set_item_key_unsafe(root, path, &k2);
7149 path->slots[0] = slot + 1;
7150 btrfs_set_item_key_unsafe(root, path, &k1);
7155 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7157 struct extent_buffer *buf;
7158 struct btrfs_key k1, k2;
7160 int level = path->lowest_level;
7163 buf = path->nodes[level];
7164 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7166 btrfs_node_key_to_cpu(buf, &k1, i);
7167 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7169 btrfs_item_key_to_cpu(buf, &k1, i);
7170 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7172 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7174 ret = swap_values(root, path, buf, i);
7177 btrfs_mark_buffer_dirty(buf);
7183 static int delete_bogus_item(struct btrfs_root *root,
7184 struct btrfs_path *path,
7185 struct extent_buffer *buf, int slot)
7187 struct btrfs_key key;
7188 int nritems = btrfs_header_nritems(buf);
7190 btrfs_item_key_to_cpu(buf, &key, slot);
7192 /* These are all the keys we can deal with missing. */
7193 if (key.type != BTRFS_DIR_INDEX_KEY &&
7194 key.type != BTRFS_EXTENT_ITEM_KEY &&
7195 key.type != BTRFS_METADATA_ITEM_KEY &&
7196 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7197 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7200 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7201 (unsigned long long)key.objectid, key.type,
7202 (unsigned long long)key.offset, slot, buf->start);
7203 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7204 btrfs_item_nr_offset(slot + 1),
7205 sizeof(struct btrfs_item) *
7206 (nritems - slot - 1));
7207 btrfs_set_header_nritems(buf, nritems - 1);
7209 struct btrfs_disk_key disk_key;
7211 btrfs_item_key(buf, &disk_key, 0);
7212 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7214 btrfs_mark_buffer_dirty(buf);
7218 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7220 struct extent_buffer *buf;
7224 /* We should only get this for leaves */
7225 BUG_ON(path->lowest_level);
7226 buf = path->nodes[0];
7228 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7229 unsigned int shift = 0, offset;
7231 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7232 BTRFS_LEAF_DATA_SIZE(root)) {
7233 if (btrfs_item_end_nr(buf, i) >
7234 BTRFS_LEAF_DATA_SIZE(root)) {
7235 ret = delete_bogus_item(root, path, buf, i);
7238 fprintf(stderr, "item is off the end of the "
7239 "leaf, can't fix\n");
7243 shift = BTRFS_LEAF_DATA_SIZE(root) -
7244 btrfs_item_end_nr(buf, i);
7245 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7246 btrfs_item_offset_nr(buf, i - 1)) {
7247 if (btrfs_item_end_nr(buf, i) >
7248 btrfs_item_offset_nr(buf, i - 1)) {
7249 ret = delete_bogus_item(root, path, buf, i);
7252 fprintf(stderr, "items overlap, can't fix\n");
7256 shift = btrfs_item_offset_nr(buf, i - 1) -
7257 btrfs_item_end_nr(buf, i);
7262 printf("Shifting item nr %d by %u bytes in block %llu\n",
7263 i, shift, (unsigned long long)buf->start);
7264 offset = btrfs_item_offset_nr(buf, i);
7265 memmove_extent_buffer(buf,
7266 btrfs_leaf_data(buf) + offset + shift,
7267 btrfs_leaf_data(buf) + offset,
7268 btrfs_item_size_nr(buf, i));
7269 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7271 btrfs_mark_buffer_dirty(buf);
7275 * We may have moved things, in which case we want to exit so we don't
7276 * write those changes out. Once we have proper abort functionality in
7277 * progs this can be changed to something nicer.
7284 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7285 * then just return -EIO.
7287 static int try_to_fix_bad_block(struct btrfs_root *root,
7288 struct extent_buffer *buf,
7289 enum btrfs_tree_block_status status)
7291 struct btrfs_trans_handle *trans;
7292 struct ulist *roots;
7293 struct ulist_node *node;
7294 struct btrfs_root *search_root;
7295 struct btrfs_path path;
7296 struct ulist_iterator iter;
7297 struct btrfs_key root_key, key;
7300 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7301 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7304 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7308 btrfs_init_path(&path);
7309 ULIST_ITER_INIT(&iter);
7310 while ((node = ulist_next(roots, &iter))) {
7311 root_key.objectid = node->val;
7312 root_key.type = BTRFS_ROOT_ITEM_KEY;
7313 root_key.offset = (u64)-1;
7315 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7322 trans = btrfs_start_transaction(search_root, 0);
7323 if (IS_ERR(trans)) {
7324 ret = PTR_ERR(trans);
7328 path.lowest_level = btrfs_header_level(buf);
7329 path.skip_check_block = 1;
7330 if (path.lowest_level)
7331 btrfs_node_key_to_cpu(buf, &key, 0);
7333 btrfs_item_key_to_cpu(buf, &key, 0);
7334 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7337 btrfs_commit_transaction(trans, search_root);
7340 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7341 ret = fix_key_order(search_root, &path);
7342 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7343 ret = fix_item_offset(search_root, &path);
7345 btrfs_commit_transaction(trans, search_root);
7348 btrfs_release_path(&path);
7349 btrfs_commit_transaction(trans, search_root);
7352 btrfs_release_path(&path);
7356 static int check_block(struct btrfs_root *root,
7357 struct cache_tree *extent_cache,
7358 struct extent_buffer *buf, u64 flags)
7360 struct extent_record *rec;
7361 struct cache_extent *cache;
7362 struct btrfs_key key;
7363 enum btrfs_tree_block_status status;
7367 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7370 rec = container_of(cache, struct extent_record, cache);
7371 rec->generation = btrfs_header_generation(buf);
7373 level = btrfs_header_level(buf);
7374 if (btrfs_header_nritems(buf) > 0) {
7377 btrfs_item_key_to_cpu(buf, &key, 0);
7379 btrfs_node_key_to_cpu(buf, &key, 0);
7381 rec->info_objectid = key.objectid;
7383 rec->info_level = level;
7385 if (btrfs_is_leaf(buf))
7386 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7388 status = btrfs_check_node(root, &rec->parent_key, buf);
7390 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7392 status = try_to_fix_bad_block(root, buf, status);
7393 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7395 fprintf(stderr, "bad block %llu\n",
7396 (unsigned long long)buf->start);
7399 * Signal to callers we need to start the scan over
7400 * again since we'll have cowed blocks.
7405 rec->content_checked = 1;
7406 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7407 rec->owner_ref_checked = 1;
7409 ret = check_owner_ref(root, rec, buf);
7411 rec->owner_ref_checked = 1;
7415 maybe_free_extent_rec(extent_cache, rec);
7420 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7421 u64 parent, u64 root)
7423 struct list_head *cur = rec->backrefs.next;
7424 struct extent_backref *node;
7425 struct tree_backref *back;
7427 while(cur != &rec->backrefs) {
7428 node = to_extent_backref(cur);
7432 back = to_tree_backref(node);
7434 if (!node->full_backref)
7436 if (parent == back->parent)
7439 if (node->full_backref)
7441 if (back->root == root)
7449 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7450 u64 parent, u64 root)
7452 struct tree_backref *ref = malloc(sizeof(*ref));
7456 memset(&ref->node, 0, sizeof(ref->node));
7458 ref->parent = parent;
7459 ref->node.full_backref = 1;
7462 ref->node.full_backref = 0;
7469 static struct data_backref *find_data_backref(struct extent_record *rec,
7470 u64 parent, u64 root,
7471 u64 owner, u64 offset,
7473 u64 disk_bytenr, u64 bytes)
7475 struct list_head *cur = rec->backrefs.next;
7476 struct extent_backref *node;
7477 struct data_backref *back;
7479 while(cur != &rec->backrefs) {
7480 node = to_extent_backref(cur);
7484 back = to_data_backref(node);
7486 if (!node->full_backref)
7488 if (parent == back->parent)
7491 if (node->full_backref)
7493 if (back->root == root && back->owner == owner &&
7494 back->offset == offset) {
7495 if (found_ref && node->found_ref &&
7496 (back->bytes != bytes ||
7497 back->disk_bytenr != disk_bytenr))
7507 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7508 u64 parent, u64 root,
7509 u64 owner, u64 offset,
7512 struct data_backref *ref = malloc(sizeof(*ref));
7516 memset(&ref->node, 0, sizeof(ref->node));
7517 ref->node.is_data = 1;
7520 ref->parent = parent;
7523 ref->node.full_backref = 1;
7527 ref->offset = offset;
7528 ref->node.full_backref = 0;
7530 ref->bytes = max_size;
7533 if (max_size > rec->max_size)
7534 rec->max_size = max_size;
7538 /* Check if the type of extent matches with its chunk */
7539 static void check_extent_type(struct extent_record *rec)
7541 struct btrfs_block_group_cache *bg_cache;
7543 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7547 /* data extent, check chunk directly*/
7548 if (!rec->metadata) {
7549 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7550 rec->wrong_chunk_type = 1;
7554 /* metadata extent, check the obvious case first */
7555 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7556 BTRFS_BLOCK_GROUP_METADATA))) {
7557 rec->wrong_chunk_type = 1;
7562 * Check SYSTEM extent, as it's also marked as metadata, we can only
7563 * make sure it's a SYSTEM extent by its backref
7565 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7566 struct extent_backref *node;
7567 struct tree_backref *tback;
7570 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7571 if (node->is_data) {
7572 /* tree block shouldn't have data backref */
7573 rec->wrong_chunk_type = 1;
7576 tback = container_of(node, struct tree_backref, node);
7578 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7579 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7581 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7582 if (!(bg_cache->flags & bg_type))
7583 rec->wrong_chunk_type = 1;
7588 * Allocate a new extent record, fill default values from @tmpl and insert int
7589 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7590 * the cache, otherwise it fails.
7592 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7593 struct extent_record *tmpl)
7595 struct extent_record *rec;
7598 BUG_ON(tmpl->max_size == 0);
7599 rec = malloc(sizeof(*rec));
7602 rec->start = tmpl->start;
7603 rec->max_size = tmpl->max_size;
7604 rec->nr = max(tmpl->nr, tmpl->max_size);
7605 rec->found_rec = tmpl->found_rec;
7606 rec->content_checked = tmpl->content_checked;
7607 rec->owner_ref_checked = tmpl->owner_ref_checked;
7608 rec->num_duplicates = 0;
7609 rec->metadata = tmpl->metadata;
7610 rec->flag_block_full_backref = FLAG_UNSET;
7611 rec->bad_full_backref = 0;
7612 rec->crossing_stripes = 0;
7613 rec->wrong_chunk_type = 0;
7614 rec->is_root = tmpl->is_root;
7615 rec->refs = tmpl->refs;
7616 rec->extent_item_refs = tmpl->extent_item_refs;
7617 rec->parent_generation = tmpl->parent_generation;
7618 INIT_LIST_HEAD(&rec->backrefs);
7619 INIT_LIST_HEAD(&rec->dups);
7620 INIT_LIST_HEAD(&rec->list);
7621 rec->backref_tree = RB_ROOT;
7622 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7623 rec->cache.start = tmpl->start;
7624 rec->cache.size = tmpl->nr;
7625 ret = insert_cache_extent(extent_cache, &rec->cache);
7630 bytes_used += rec->nr;
7633 rec->crossing_stripes = check_crossing_stripes(global_info,
7634 rec->start, global_info->nodesize);
7635 check_extent_type(rec);
7640 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7642 * - refs - if found, increase refs
7643 * - is_root - if found, set
7644 * - content_checked - if found, set
7645 * - owner_ref_checked - if found, set
7647 * If not found, create a new one, initialize and insert.
7649 static int add_extent_rec(struct cache_tree *extent_cache,
7650 struct extent_record *tmpl)
7652 struct extent_record *rec;
7653 struct cache_extent *cache;
7657 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7659 rec = container_of(cache, struct extent_record, cache);
7663 rec->nr = max(tmpl->nr, tmpl->max_size);
7666 * We need to make sure to reset nr to whatever the extent
7667 * record says was the real size, this way we can compare it to
7670 if (tmpl->found_rec) {
7671 if (tmpl->start != rec->start || rec->found_rec) {
7672 struct extent_record *tmp;
7675 if (list_empty(&rec->list))
7676 list_add_tail(&rec->list,
7677 &duplicate_extents);
7680 * We have to do this song and dance in case we
7681 * find an extent record that falls inside of
7682 * our current extent record but does not have
7683 * the same objectid.
7685 tmp = malloc(sizeof(*tmp));
7688 tmp->start = tmpl->start;
7689 tmp->max_size = tmpl->max_size;
7692 tmp->metadata = tmpl->metadata;
7693 tmp->extent_item_refs = tmpl->extent_item_refs;
7694 INIT_LIST_HEAD(&tmp->list);
7695 list_add_tail(&tmp->list, &rec->dups);
7696 rec->num_duplicates++;
7703 if (tmpl->extent_item_refs && !dup) {
7704 if (rec->extent_item_refs) {
7705 fprintf(stderr, "block %llu rec "
7706 "extent_item_refs %llu, passed %llu\n",
7707 (unsigned long long)tmpl->start,
7708 (unsigned long long)
7709 rec->extent_item_refs,
7710 (unsigned long long)tmpl->extent_item_refs);
7712 rec->extent_item_refs = tmpl->extent_item_refs;
7716 if (tmpl->content_checked)
7717 rec->content_checked = 1;
7718 if (tmpl->owner_ref_checked)
7719 rec->owner_ref_checked = 1;
7720 memcpy(&rec->parent_key, &tmpl->parent_key,
7721 sizeof(tmpl->parent_key));
7722 if (tmpl->parent_generation)
7723 rec->parent_generation = tmpl->parent_generation;
7724 if (rec->max_size < tmpl->max_size)
7725 rec->max_size = tmpl->max_size;
7728 * A metadata extent can't cross stripe_len boundary, otherwise
7729 * kernel scrub won't be able to handle it.
7730 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7734 rec->crossing_stripes = check_crossing_stripes(
7735 global_info, rec->start,
7736 global_info->nodesize);
7737 check_extent_type(rec);
7738 maybe_free_extent_rec(extent_cache, rec);
7742 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7747 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7748 u64 parent, u64 root, int found_ref)
7750 struct extent_record *rec;
7751 struct tree_backref *back;
7752 struct cache_extent *cache;
7754 bool insert = false;
7756 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7758 struct extent_record tmpl;
7760 memset(&tmpl, 0, sizeof(tmpl));
7761 tmpl.start = bytenr;
7766 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7770 /* really a bug in cache_extent implement now */
7771 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7776 rec = container_of(cache, struct extent_record, cache);
7777 if (rec->start != bytenr) {
7779 * Several cause, from unaligned bytenr to over lapping extents
7784 back = find_tree_backref(rec, parent, root);
7786 back = alloc_tree_backref(rec, parent, root);
7793 if (back->node.found_ref) {
7794 fprintf(stderr, "Extent back ref already exists "
7795 "for %llu parent %llu root %llu \n",
7796 (unsigned long long)bytenr,
7797 (unsigned long long)parent,
7798 (unsigned long long)root);
7800 back->node.found_ref = 1;
7802 if (back->node.found_extent_tree) {
7803 fprintf(stderr, "Extent back ref already exists "
7804 "for %llu parent %llu root %llu \n",
7805 (unsigned long long)bytenr,
7806 (unsigned long long)parent,
7807 (unsigned long long)root);
7809 back->node.found_extent_tree = 1;
7812 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7813 compare_extent_backref));
7814 check_extent_type(rec);
7815 maybe_free_extent_rec(extent_cache, rec);
7819 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7820 u64 parent, u64 root, u64 owner, u64 offset,
7821 u32 num_refs, int found_ref, u64 max_size)
7823 struct extent_record *rec;
7824 struct data_backref *back;
7825 struct cache_extent *cache;
7827 bool insert = false;
7829 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7831 struct extent_record tmpl;
7833 memset(&tmpl, 0, sizeof(tmpl));
7834 tmpl.start = bytenr;
7836 tmpl.max_size = max_size;
7838 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7842 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7847 rec = container_of(cache, struct extent_record, cache);
7848 if (rec->max_size < max_size)
7849 rec->max_size = max_size;
7852 * If found_ref is set then max_size is the real size and must match the
7853 * existing refs. So if we have already found a ref then we need to
7854 * make sure that this ref matches the existing one, otherwise we need
7855 * to add a new backref so we can notice that the backrefs don't match
7856 * and we need to figure out who is telling the truth. This is to
7857 * account for that awful fsync bug I introduced where we'd end up with
7858 * a btrfs_file_extent_item that would have its length include multiple
7859 * prealloc extents or point inside of a prealloc extent.
7861 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7864 back = alloc_data_backref(rec, parent, root, owner, offset,
7871 BUG_ON(num_refs != 1);
7872 if (back->node.found_ref)
7873 BUG_ON(back->bytes != max_size);
7874 back->node.found_ref = 1;
7875 back->found_ref += 1;
7876 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7877 back->bytes = max_size;
7878 back->disk_bytenr = bytenr;
7880 /* Need to reinsert if not already in the tree */
7882 rb_erase(&back->node.node, &rec->backref_tree);
7887 rec->content_checked = 1;
7888 rec->owner_ref_checked = 1;
7890 if (back->node.found_extent_tree) {
7891 fprintf(stderr, "Extent back ref already exists "
7892 "for %llu parent %llu root %llu "
7893 "owner %llu offset %llu num_refs %lu\n",
7894 (unsigned long long)bytenr,
7895 (unsigned long long)parent,
7896 (unsigned long long)root,
7897 (unsigned long long)owner,
7898 (unsigned long long)offset,
7899 (unsigned long)num_refs);
7901 back->num_refs = num_refs;
7902 back->node.found_extent_tree = 1;
7905 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7906 compare_extent_backref));
7908 maybe_free_extent_rec(extent_cache, rec);
7912 static int add_pending(struct cache_tree *pending,
7913 struct cache_tree *seen, u64 bytenr, u32 size)
7916 ret = add_cache_extent(seen, bytenr, size);
7919 add_cache_extent(pending, bytenr, size);
7923 static int pick_next_pending(struct cache_tree *pending,
7924 struct cache_tree *reada,
7925 struct cache_tree *nodes,
7926 u64 last, struct block_info *bits, int bits_nr,
7929 unsigned long node_start = last;
7930 struct cache_extent *cache;
7933 cache = search_cache_extent(reada, 0);
7935 bits[0].start = cache->start;
7936 bits[0].size = cache->size;
7941 if (node_start > 32768)
7942 node_start -= 32768;
7944 cache = search_cache_extent(nodes, node_start);
7946 cache = search_cache_extent(nodes, 0);
7949 cache = search_cache_extent(pending, 0);
7954 bits[ret].start = cache->start;
7955 bits[ret].size = cache->size;
7956 cache = next_cache_extent(cache);
7958 } while (cache && ret < bits_nr);
7964 bits[ret].start = cache->start;
7965 bits[ret].size = cache->size;
7966 cache = next_cache_extent(cache);
7968 } while (cache && ret < bits_nr);
7970 if (bits_nr - ret > 8) {
7971 u64 lookup = bits[0].start + bits[0].size;
7972 struct cache_extent *next;
7973 next = search_cache_extent(pending, lookup);
7975 if (next->start - lookup > 32768)
7977 bits[ret].start = next->start;
7978 bits[ret].size = next->size;
7979 lookup = next->start + next->size;
7983 next = next_cache_extent(next);
7991 static void free_chunk_record(struct cache_extent *cache)
7993 struct chunk_record *rec;
7995 rec = container_of(cache, struct chunk_record, cache);
7996 list_del_init(&rec->list);
7997 list_del_init(&rec->dextents);
8001 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8003 cache_tree_free_extents(chunk_cache, free_chunk_record);
8006 static void free_device_record(struct rb_node *node)
8008 struct device_record *rec;
8010 rec = container_of(node, struct device_record, node);
8014 FREE_RB_BASED_TREE(device_cache, free_device_record);
8016 int insert_block_group_record(struct block_group_tree *tree,
8017 struct block_group_record *bg_rec)
8021 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8025 list_add_tail(&bg_rec->list, &tree->block_groups);
8029 static void free_block_group_record(struct cache_extent *cache)
8031 struct block_group_record *rec;
8033 rec = container_of(cache, struct block_group_record, cache);
8034 list_del_init(&rec->list);
8038 void free_block_group_tree(struct block_group_tree *tree)
8040 cache_tree_free_extents(&tree->tree, free_block_group_record);
8043 int insert_device_extent_record(struct device_extent_tree *tree,
8044 struct device_extent_record *de_rec)
8049 * Device extent is a bit different from the other extents, because
8050 * the extents which belong to the different devices may have the
8051 * same start and size, so we need use the special extent cache
8052 * search/insert functions.
8054 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8058 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8059 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8063 static void free_device_extent_record(struct cache_extent *cache)
8065 struct device_extent_record *rec;
8067 rec = container_of(cache, struct device_extent_record, cache);
8068 if (!list_empty(&rec->chunk_list))
8069 list_del_init(&rec->chunk_list);
8070 if (!list_empty(&rec->device_list))
8071 list_del_init(&rec->device_list);
8075 void free_device_extent_tree(struct device_extent_tree *tree)
8077 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8080 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8081 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8082 struct extent_buffer *leaf, int slot)
8084 struct btrfs_extent_ref_v0 *ref0;
8085 struct btrfs_key key;
8088 btrfs_item_key_to_cpu(leaf, &key, slot);
8089 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8090 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8091 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8094 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8095 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8101 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8102 struct btrfs_key *key,
8105 struct btrfs_chunk *ptr;
8106 struct chunk_record *rec;
8109 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8110 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8112 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8114 fprintf(stderr, "memory allocation failed\n");
8118 INIT_LIST_HEAD(&rec->list);
8119 INIT_LIST_HEAD(&rec->dextents);
8122 rec->cache.start = key->offset;
8123 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8125 rec->generation = btrfs_header_generation(leaf);
8127 rec->objectid = key->objectid;
8128 rec->type = key->type;
8129 rec->offset = key->offset;
8131 rec->length = rec->cache.size;
8132 rec->owner = btrfs_chunk_owner(leaf, ptr);
8133 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8134 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8135 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8136 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8137 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8138 rec->num_stripes = num_stripes;
8139 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8141 for (i = 0; i < rec->num_stripes; ++i) {
8142 rec->stripes[i].devid =
8143 btrfs_stripe_devid_nr(leaf, ptr, i);
8144 rec->stripes[i].offset =
8145 btrfs_stripe_offset_nr(leaf, ptr, i);
8146 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8147 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8154 static int process_chunk_item(struct cache_tree *chunk_cache,
8155 struct btrfs_key *key, struct extent_buffer *eb,
8158 struct chunk_record *rec;
8159 struct btrfs_chunk *chunk;
8162 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8164 * Do extra check for this chunk item,
8166 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8167 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8168 * and owner<->key_type check.
8170 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8173 error("chunk(%llu, %llu) is not valid, ignore it",
8174 key->offset, btrfs_chunk_length(eb, chunk));
8177 rec = btrfs_new_chunk_record(eb, key, slot);
8178 ret = insert_cache_extent(chunk_cache, &rec->cache);
8180 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8181 rec->offset, rec->length);
8188 static int process_device_item(struct rb_root *dev_cache,
8189 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8191 struct btrfs_dev_item *ptr;
8192 struct device_record *rec;
8195 ptr = btrfs_item_ptr(eb,
8196 slot, struct btrfs_dev_item);
8198 rec = malloc(sizeof(*rec));
8200 fprintf(stderr, "memory allocation failed\n");
8204 rec->devid = key->offset;
8205 rec->generation = btrfs_header_generation(eb);
8207 rec->objectid = key->objectid;
8208 rec->type = key->type;
8209 rec->offset = key->offset;
8211 rec->devid = btrfs_device_id(eb, ptr);
8212 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8213 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8215 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8217 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8224 struct block_group_record *
8225 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8228 struct btrfs_block_group_item *ptr;
8229 struct block_group_record *rec;
8231 rec = calloc(1, sizeof(*rec));
8233 fprintf(stderr, "memory allocation failed\n");
8237 rec->cache.start = key->objectid;
8238 rec->cache.size = key->offset;
8240 rec->generation = btrfs_header_generation(leaf);
8242 rec->objectid = key->objectid;
8243 rec->type = key->type;
8244 rec->offset = key->offset;
8246 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8247 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8249 INIT_LIST_HEAD(&rec->list);
8254 static int process_block_group_item(struct block_group_tree *block_group_cache,
8255 struct btrfs_key *key,
8256 struct extent_buffer *eb, int slot)
8258 struct block_group_record *rec;
8261 rec = btrfs_new_block_group_record(eb, key, slot);
8262 ret = insert_block_group_record(block_group_cache, rec);
8264 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8265 rec->objectid, rec->offset);
8272 struct device_extent_record *
8273 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8274 struct btrfs_key *key, int slot)
8276 struct device_extent_record *rec;
8277 struct btrfs_dev_extent *ptr;
8279 rec = calloc(1, sizeof(*rec));
8281 fprintf(stderr, "memory allocation failed\n");
8285 rec->cache.objectid = key->objectid;
8286 rec->cache.start = key->offset;
8288 rec->generation = btrfs_header_generation(leaf);
8290 rec->objectid = key->objectid;
8291 rec->type = key->type;
8292 rec->offset = key->offset;
8294 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8295 rec->chunk_objecteid =
8296 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8298 btrfs_dev_extent_chunk_offset(leaf, ptr);
8299 rec->length = btrfs_dev_extent_length(leaf, ptr);
8300 rec->cache.size = rec->length;
8302 INIT_LIST_HEAD(&rec->chunk_list);
8303 INIT_LIST_HEAD(&rec->device_list);
8309 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8310 struct btrfs_key *key, struct extent_buffer *eb,
8313 struct device_extent_record *rec;
8316 rec = btrfs_new_device_extent_record(eb, key, slot);
8317 ret = insert_device_extent_record(dev_extent_cache, rec);
8320 "Device extent[%llu, %llu, %llu] existed.\n",
8321 rec->objectid, rec->offset, rec->length);
8328 static int process_extent_item(struct btrfs_root *root,
8329 struct cache_tree *extent_cache,
8330 struct extent_buffer *eb, int slot)
8332 struct btrfs_extent_item *ei;
8333 struct btrfs_extent_inline_ref *iref;
8334 struct btrfs_extent_data_ref *dref;
8335 struct btrfs_shared_data_ref *sref;
8336 struct btrfs_key key;
8337 struct extent_record tmpl;
8342 u32 item_size = btrfs_item_size_nr(eb, slot);
8348 btrfs_item_key_to_cpu(eb, &key, slot);
8350 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8352 num_bytes = root->fs_info->nodesize;
8354 num_bytes = key.offset;
8357 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8358 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8359 key.objectid, root->fs_info->sectorsize);
8362 if (item_size < sizeof(*ei)) {
8363 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8364 struct btrfs_extent_item_v0 *ei0;
8365 BUG_ON(item_size != sizeof(*ei0));
8366 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8367 refs = btrfs_extent_refs_v0(eb, ei0);
8371 memset(&tmpl, 0, sizeof(tmpl));
8372 tmpl.start = key.objectid;
8373 tmpl.nr = num_bytes;
8374 tmpl.extent_item_refs = refs;
8375 tmpl.metadata = metadata;
8377 tmpl.max_size = num_bytes;
8379 return add_extent_rec(extent_cache, &tmpl);
8382 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8383 refs = btrfs_extent_refs(eb, ei);
8384 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8388 if (metadata && num_bytes != root->fs_info->nodesize) {
8389 error("ignore invalid metadata extent, length %llu does not equal to %u",
8390 num_bytes, root->fs_info->nodesize);
8393 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8394 error("ignore invalid data extent, length %llu is not aligned to %u",
8395 num_bytes, root->fs_info->sectorsize);
8399 memset(&tmpl, 0, sizeof(tmpl));
8400 tmpl.start = key.objectid;
8401 tmpl.nr = num_bytes;
8402 tmpl.extent_item_refs = refs;
8403 tmpl.metadata = metadata;
8405 tmpl.max_size = num_bytes;
8406 add_extent_rec(extent_cache, &tmpl);
8408 ptr = (unsigned long)(ei + 1);
8409 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8410 key.type == BTRFS_EXTENT_ITEM_KEY)
8411 ptr += sizeof(struct btrfs_tree_block_info);
8413 end = (unsigned long)ei + item_size;
8415 iref = (struct btrfs_extent_inline_ref *)ptr;
8416 type = btrfs_extent_inline_ref_type(eb, iref);
8417 offset = btrfs_extent_inline_ref_offset(eb, iref);
8419 case BTRFS_TREE_BLOCK_REF_KEY:
8420 ret = add_tree_backref(extent_cache, key.objectid,
8424 "add_tree_backref failed (extent items tree block): %s",
8427 case BTRFS_SHARED_BLOCK_REF_KEY:
8428 ret = add_tree_backref(extent_cache, key.objectid,
8432 "add_tree_backref failed (extent items shared block): %s",
8435 case BTRFS_EXTENT_DATA_REF_KEY:
8436 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8437 add_data_backref(extent_cache, key.objectid, 0,
8438 btrfs_extent_data_ref_root(eb, dref),
8439 btrfs_extent_data_ref_objectid(eb,
8441 btrfs_extent_data_ref_offset(eb, dref),
8442 btrfs_extent_data_ref_count(eb, dref),
8445 case BTRFS_SHARED_DATA_REF_KEY:
8446 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8447 add_data_backref(extent_cache, key.objectid, offset,
8449 btrfs_shared_data_ref_count(eb, sref),
8453 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8454 key.objectid, key.type, num_bytes);
8457 ptr += btrfs_extent_inline_ref_size(type);
8464 static int check_cache_range(struct btrfs_root *root,
8465 struct btrfs_block_group_cache *cache,
8466 u64 offset, u64 bytes)
8468 struct btrfs_free_space *entry;
8474 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8475 bytenr = btrfs_sb_offset(i);
8476 ret = btrfs_rmap_block(root->fs_info,
8477 cache->key.objectid, bytenr, 0,
8478 &logical, &nr, &stripe_len);
8483 if (logical[nr] + stripe_len <= offset)
8485 if (offset + bytes <= logical[nr])
8487 if (logical[nr] == offset) {
8488 if (stripe_len >= bytes) {
8492 bytes -= stripe_len;
8493 offset += stripe_len;
8494 } else if (logical[nr] < offset) {
8495 if (logical[nr] + stripe_len >=
8500 bytes = (offset + bytes) -
8501 (logical[nr] + stripe_len);
8502 offset = logical[nr] + stripe_len;
8505 * Could be tricky, the super may land in the
8506 * middle of the area we're checking. First
8507 * check the easiest case, it's at the end.
8509 if (logical[nr] + stripe_len >=
8511 bytes = logical[nr] - offset;
8515 /* Check the left side */
8516 ret = check_cache_range(root, cache,
8518 logical[nr] - offset);
8524 /* Now we continue with the right side */
8525 bytes = (offset + bytes) -
8526 (logical[nr] + stripe_len);
8527 offset = logical[nr] + stripe_len;
8534 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8536 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8537 offset, offset+bytes);
8541 if (entry->offset != offset) {
8542 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8547 if (entry->bytes != bytes) {
8548 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8549 bytes, entry->bytes, offset);
8553 unlink_free_space(cache->free_space_ctl, entry);
8558 static int verify_space_cache(struct btrfs_root *root,
8559 struct btrfs_block_group_cache *cache)
8561 struct btrfs_path path;
8562 struct extent_buffer *leaf;
8563 struct btrfs_key key;
8567 root = root->fs_info->extent_root;
8569 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8571 btrfs_init_path(&path);
8572 key.objectid = last;
8574 key.type = BTRFS_EXTENT_ITEM_KEY;
8575 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8580 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8581 ret = btrfs_next_leaf(root, &path);
8589 leaf = path.nodes[0];
8590 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8591 if (key.objectid >= cache->key.offset + cache->key.objectid)
8593 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8594 key.type != BTRFS_METADATA_ITEM_KEY) {
8599 if (last == key.objectid) {
8600 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8601 last = key.objectid + key.offset;
8603 last = key.objectid + root->fs_info->nodesize;
8608 ret = check_cache_range(root, cache, last,
8609 key.objectid - last);
8612 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8613 last = key.objectid + key.offset;
8615 last = key.objectid + root->fs_info->nodesize;
8619 if (last < cache->key.objectid + cache->key.offset)
8620 ret = check_cache_range(root, cache, last,
8621 cache->key.objectid +
8622 cache->key.offset - last);
8625 btrfs_release_path(&path);
8628 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8629 fprintf(stderr, "There are still entries left in the space "
8637 static int check_space_cache(struct btrfs_root *root)
8639 struct btrfs_block_group_cache *cache;
8640 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8644 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8645 btrfs_super_generation(root->fs_info->super_copy) !=
8646 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8647 printf("cache and super generation don't match, space cache "
8648 "will be invalidated\n");
8652 if (ctx.progress_enabled) {
8653 ctx.tp = TASK_FREE_SPACE;
8654 task_start(ctx.info);
8658 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8662 start = cache->key.objectid + cache->key.offset;
8663 if (!cache->free_space_ctl) {
8664 if (btrfs_init_free_space_ctl(cache,
8665 root->fs_info->sectorsize)) {
8670 btrfs_remove_free_space_cache(cache);
8673 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8674 ret = exclude_super_stripes(root, cache);
8676 fprintf(stderr, "could not exclude super stripes: %s\n",
8681 ret = load_free_space_tree(root->fs_info, cache);
8682 free_excluded_extents(root, cache);
8684 fprintf(stderr, "could not load free space tree: %s\n",
8691 ret = load_free_space_cache(root->fs_info, cache);
8696 ret = verify_space_cache(root, cache);
8698 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8699 cache->key.objectid);
8704 task_stop(ctx.info);
8706 return error ? -EINVAL : 0;
8709 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8710 u64 num_bytes, unsigned long leaf_offset,
8711 struct extent_buffer *eb) {
8713 struct btrfs_fs_info *fs_info = root->fs_info;
8715 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8717 unsigned long csum_offset;
8721 u64 data_checked = 0;
8727 if (num_bytes % fs_info->sectorsize)
8730 data = malloc(num_bytes);
8734 while (offset < num_bytes) {
8737 read_len = num_bytes - offset;
8738 /* read as much space once a time */
8739 ret = read_extent_data(fs_info, data + offset,
8740 bytenr + offset, &read_len, mirror);
8744 /* verify every 4k data's checksum */
8745 while (data_checked < read_len) {
8747 tmp = offset + data_checked;
8749 csum = btrfs_csum_data((char *)data + tmp,
8750 csum, fs_info->sectorsize);
8751 btrfs_csum_final(csum, (u8 *)&csum);
8753 csum_offset = leaf_offset +
8754 tmp / fs_info->sectorsize * csum_size;
8755 read_extent_buffer(eb, (char *)&csum_expected,
8756 csum_offset, csum_size);
8757 /* try another mirror */
8758 if (csum != csum_expected) {
8759 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8760 mirror, bytenr + tmp,
8761 csum, csum_expected);
8762 num_copies = btrfs_num_copies(root->fs_info,
8764 if (mirror < num_copies - 1) {
8769 data_checked += fs_info->sectorsize;
8778 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8781 struct btrfs_path path;
8782 struct extent_buffer *leaf;
8783 struct btrfs_key key;
8786 btrfs_init_path(&path);
8787 key.objectid = bytenr;
8788 key.type = BTRFS_EXTENT_ITEM_KEY;
8789 key.offset = (u64)-1;
8792 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8795 fprintf(stderr, "Error looking up extent record %d\n", ret);
8796 btrfs_release_path(&path);
8799 if (path.slots[0] > 0) {
8802 ret = btrfs_prev_leaf(root, &path);
8805 } else if (ret > 0) {
8812 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8815 * Block group items come before extent items if they have the same
8816 * bytenr, so walk back one more just in case. Dear future traveller,
8817 * first congrats on mastering time travel. Now if it's not too much
8818 * trouble could you go back to 2006 and tell Chris to make the
8819 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8820 * EXTENT_ITEM_KEY please?
8822 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8823 if (path.slots[0] > 0) {
8826 ret = btrfs_prev_leaf(root, &path);
8829 } else if (ret > 0) {
8834 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8838 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8839 ret = btrfs_next_leaf(root, &path);
8841 fprintf(stderr, "Error going to next leaf "
8843 btrfs_release_path(&path);
8849 leaf = path.nodes[0];
8850 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8851 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8855 if (key.objectid + key.offset < bytenr) {
8859 if (key.objectid > bytenr + num_bytes)
8862 if (key.objectid == bytenr) {
8863 if (key.offset >= num_bytes) {
8867 num_bytes -= key.offset;
8868 bytenr += key.offset;
8869 } else if (key.objectid < bytenr) {
8870 if (key.objectid + key.offset >= bytenr + num_bytes) {
8874 num_bytes = (bytenr + num_bytes) -
8875 (key.objectid + key.offset);
8876 bytenr = key.objectid + key.offset;
8878 if (key.objectid + key.offset < bytenr + num_bytes) {
8879 u64 new_start = key.objectid + key.offset;
8880 u64 new_bytes = bytenr + num_bytes - new_start;
8883 * Weird case, the extent is in the middle of
8884 * our range, we'll have to search one side
8885 * and then the other. Not sure if this happens
8886 * in real life, but no harm in coding it up
8887 * anyway just in case.
8889 btrfs_release_path(&path);
8890 ret = check_extent_exists(root, new_start,
8893 fprintf(stderr, "Right section didn't "
8897 num_bytes = key.objectid - bytenr;
8900 num_bytes = key.objectid - bytenr;
8907 if (num_bytes && !ret) {
8908 fprintf(stderr, "There are no extents for csum range "
8909 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8913 btrfs_release_path(&path);
8917 static int check_csums(struct btrfs_root *root)
8919 struct btrfs_path path;
8920 struct extent_buffer *leaf;
8921 struct btrfs_key key;
8922 u64 offset = 0, num_bytes = 0;
8923 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8927 unsigned long leaf_offset;
8929 root = root->fs_info->csum_root;
8930 if (!extent_buffer_uptodate(root->node)) {
8931 fprintf(stderr, "No valid csum tree found\n");
8935 btrfs_init_path(&path);
8936 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8937 key.type = BTRFS_EXTENT_CSUM_KEY;
8939 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8941 fprintf(stderr, "Error searching csum tree %d\n", ret);
8942 btrfs_release_path(&path);
8946 if (ret > 0 && path.slots[0])
8951 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8952 ret = btrfs_next_leaf(root, &path);
8954 fprintf(stderr, "Error going to next leaf "
8961 leaf = path.nodes[0];
8963 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8964 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8969 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8970 csum_size) * root->fs_info->sectorsize;
8971 if (!check_data_csum)
8972 goto skip_csum_check;
8973 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8974 ret = check_extent_csums(root, key.offset, data_len,
8980 offset = key.offset;
8981 } else if (key.offset != offset + num_bytes) {
8982 ret = check_extent_exists(root, offset, num_bytes);
8984 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8985 "there is no extent record\n",
8986 offset, offset+num_bytes);
8989 offset = key.offset;
8992 num_bytes += data_len;
8996 btrfs_release_path(&path);
9000 static int is_dropped_key(struct btrfs_key *key,
9001 struct btrfs_key *drop_key) {
9002 if (key->objectid < drop_key->objectid)
9004 else if (key->objectid == drop_key->objectid) {
9005 if (key->type < drop_key->type)
9007 else if (key->type == drop_key->type) {
9008 if (key->offset < drop_key->offset)
9016 * Here are the rules for FULL_BACKREF.
9018 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9019 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9021 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9022 * if it happened after the relocation occurred since we'll have dropped the
9023 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9024 * have no real way to know for sure.
9026 * We process the blocks one root at a time, and we start from the lowest root
9027 * objectid and go to the highest. So we can just lookup the owner backref for
9028 * the record and if we don't find it then we know it doesn't exist and we have
9031 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9032 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9033 * be set or not and then we can check later once we've gathered all the refs.
9035 static int calc_extent_flag(struct cache_tree *extent_cache,
9036 struct extent_buffer *buf,
9037 struct root_item_record *ri,
9040 struct extent_record *rec;
9041 struct cache_extent *cache;
9042 struct tree_backref *tback;
9045 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9046 /* we have added this extent before */
9050 rec = container_of(cache, struct extent_record, cache);
9053 * Except file/reloc tree, we can not have
9056 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9061 if (buf->start == ri->bytenr)
9064 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9067 owner = btrfs_header_owner(buf);
9068 if (owner == ri->objectid)
9071 tback = find_tree_backref(rec, 0, owner);
9076 if (rec->flag_block_full_backref != FLAG_UNSET &&
9077 rec->flag_block_full_backref != 0)
9078 rec->bad_full_backref = 1;
9081 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9082 if (rec->flag_block_full_backref != FLAG_UNSET &&
9083 rec->flag_block_full_backref != 1)
9084 rec->bad_full_backref = 1;
9088 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9090 fprintf(stderr, "Invalid key type(");
9091 print_key_type(stderr, 0, key_type);
9092 fprintf(stderr, ") found in root(");
9093 print_objectid(stderr, rootid, 0);
9094 fprintf(stderr, ")\n");
9098 * Check if the key is valid with its extent buffer.
9100 * This is a early check in case invalid key exists in a extent buffer
9101 * This is not comprehensive yet, but should prevent wrong key/item passed
9104 static int check_type_with_root(u64 rootid, u8 key_type)
9107 /* Only valid in chunk tree */
9108 case BTRFS_DEV_ITEM_KEY:
9109 case BTRFS_CHUNK_ITEM_KEY:
9110 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9113 /* valid in csum and log tree */
9114 case BTRFS_CSUM_TREE_OBJECTID:
9115 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9119 case BTRFS_EXTENT_ITEM_KEY:
9120 case BTRFS_METADATA_ITEM_KEY:
9121 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9122 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9125 case BTRFS_ROOT_ITEM_KEY:
9126 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9129 case BTRFS_DEV_EXTENT_KEY:
9130 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9136 report_mismatch_key_root(key_type, rootid);
9140 static int run_next_block(struct btrfs_root *root,
9141 struct block_info *bits,
9144 struct cache_tree *pending,
9145 struct cache_tree *seen,
9146 struct cache_tree *reada,
9147 struct cache_tree *nodes,
9148 struct cache_tree *extent_cache,
9149 struct cache_tree *chunk_cache,
9150 struct rb_root *dev_cache,
9151 struct block_group_tree *block_group_cache,
9152 struct device_extent_tree *dev_extent_cache,
9153 struct root_item_record *ri)
9155 struct btrfs_fs_info *fs_info = root->fs_info;
9156 struct extent_buffer *buf;
9157 struct extent_record *rec = NULL;
9168 struct btrfs_key key;
9169 struct cache_extent *cache;
9172 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9173 bits_nr, &reada_bits);
9178 for(i = 0; i < nritems; i++) {
9179 ret = add_cache_extent(reada, bits[i].start,
9184 /* fixme, get the parent transid */
9185 readahead_tree_block(fs_info, bits[i].start, 0);
9188 *last = bits[0].start;
9189 bytenr = bits[0].start;
9190 size = bits[0].size;
9192 cache = lookup_cache_extent(pending, bytenr, size);
9194 remove_cache_extent(pending, cache);
9197 cache = lookup_cache_extent(reada, bytenr, size);
9199 remove_cache_extent(reada, cache);
9202 cache = lookup_cache_extent(nodes, bytenr, size);
9204 remove_cache_extent(nodes, cache);
9207 cache = lookup_cache_extent(extent_cache, bytenr, size);
9209 rec = container_of(cache, struct extent_record, cache);
9210 gen = rec->parent_generation;
9213 /* fixme, get the real parent transid */
9214 buf = read_tree_block(root->fs_info, bytenr, gen);
9215 if (!extent_buffer_uptodate(buf)) {
9216 record_bad_block_io(root->fs_info,
9217 extent_cache, bytenr, size);
9221 nritems = btrfs_header_nritems(buf);
9224 if (!init_extent_tree) {
9225 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9226 btrfs_header_level(buf), 1, NULL,
9229 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9231 fprintf(stderr, "Couldn't calc extent flags\n");
9232 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9237 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9239 fprintf(stderr, "Couldn't calc extent flags\n");
9240 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9244 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9246 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9247 ri->objectid == btrfs_header_owner(buf)) {
9249 * Ok we got to this block from it's original owner and
9250 * we have FULL_BACKREF set. Relocation can leave
9251 * converted blocks over so this is altogether possible,
9252 * however it's not possible if the generation > the
9253 * last snapshot, so check for this case.
9255 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9256 btrfs_header_generation(buf) > ri->last_snapshot) {
9257 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9258 rec->bad_full_backref = 1;
9263 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9264 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9265 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9266 rec->bad_full_backref = 1;
9270 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9271 rec->flag_block_full_backref = 1;
9275 rec->flag_block_full_backref = 0;
9277 owner = btrfs_header_owner(buf);
9280 ret = check_block(root, extent_cache, buf, flags);
9284 if (btrfs_is_leaf(buf)) {
9285 btree_space_waste += btrfs_leaf_free_space(root, buf);
9286 for (i = 0; i < nritems; i++) {
9287 struct btrfs_file_extent_item *fi;
9288 btrfs_item_key_to_cpu(buf, &key, i);
9290 * Check key type against the leaf owner.
9291 * Could filter quite a lot of early error if
9294 if (check_type_with_root(btrfs_header_owner(buf),
9296 fprintf(stderr, "ignoring invalid key\n");
9299 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9300 process_extent_item(root, extent_cache, buf,
9304 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9305 process_extent_item(root, extent_cache, buf,
9309 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9311 btrfs_item_size_nr(buf, i);
9314 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9315 process_chunk_item(chunk_cache, &key, buf, i);
9318 if (key.type == BTRFS_DEV_ITEM_KEY) {
9319 process_device_item(dev_cache, &key, buf, i);
9322 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9323 process_block_group_item(block_group_cache,
9327 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9328 process_device_extent_item(dev_extent_cache,
9333 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9334 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9335 process_extent_ref_v0(extent_cache, buf, i);
9342 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9343 ret = add_tree_backref(extent_cache,
9344 key.objectid, 0, key.offset, 0);
9347 "add_tree_backref failed (leaf tree block): %s",
9351 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9352 ret = add_tree_backref(extent_cache,
9353 key.objectid, key.offset, 0, 0);
9356 "add_tree_backref failed (leaf shared block): %s",
9360 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9361 struct btrfs_extent_data_ref *ref;
9362 ref = btrfs_item_ptr(buf, i,
9363 struct btrfs_extent_data_ref);
9364 add_data_backref(extent_cache,
9366 btrfs_extent_data_ref_root(buf, ref),
9367 btrfs_extent_data_ref_objectid(buf,
9369 btrfs_extent_data_ref_offset(buf, ref),
9370 btrfs_extent_data_ref_count(buf, ref),
9371 0, root->fs_info->sectorsize);
9374 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9375 struct btrfs_shared_data_ref *ref;
9376 ref = btrfs_item_ptr(buf, i,
9377 struct btrfs_shared_data_ref);
9378 add_data_backref(extent_cache,
9379 key.objectid, key.offset, 0, 0, 0,
9380 btrfs_shared_data_ref_count(buf, ref),
9381 0, root->fs_info->sectorsize);
9384 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9385 struct bad_item *bad;
9387 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9391 bad = malloc(sizeof(struct bad_item));
9394 INIT_LIST_HEAD(&bad->list);
9395 memcpy(&bad->key, &key,
9396 sizeof(struct btrfs_key));
9397 bad->root_id = owner;
9398 list_add_tail(&bad->list, &delete_items);
9401 if (key.type != BTRFS_EXTENT_DATA_KEY)
9403 fi = btrfs_item_ptr(buf, i,
9404 struct btrfs_file_extent_item);
9405 if (btrfs_file_extent_type(buf, fi) ==
9406 BTRFS_FILE_EXTENT_INLINE)
9408 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9411 data_bytes_allocated +=
9412 btrfs_file_extent_disk_num_bytes(buf, fi);
9413 if (data_bytes_allocated < root->fs_info->sectorsize) {
9416 data_bytes_referenced +=
9417 btrfs_file_extent_num_bytes(buf, fi);
9418 add_data_backref(extent_cache,
9419 btrfs_file_extent_disk_bytenr(buf, fi),
9420 parent, owner, key.objectid, key.offset -
9421 btrfs_file_extent_offset(buf, fi), 1, 1,
9422 btrfs_file_extent_disk_num_bytes(buf, fi));
9426 struct btrfs_key first_key;
9428 first_key.objectid = 0;
9431 btrfs_item_key_to_cpu(buf, &first_key, 0);
9432 level = btrfs_header_level(buf);
9433 for (i = 0; i < nritems; i++) {
9434 struct extent_record tmpl;
9436 ptr = btrfs_node_blockptr(buf, i);
9437 size = root->fs_info->nodesize;
9438 btrfs_node_key_to_cpu(buf, &key, i);
9440 if ((level == ri->drop_level)
9441 && is_dropped_key(&key, &ri->drop_key)) {
9446 memset(&tmpl, 0, sizeof(tmpl));
9447 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9448 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9453 tmpl.max_size = size;
9454 ret = add_extent_rec(extent_cache, &tmpl);
9458 ret = add_tree_backref(extent_cache, ptr, parent,
9462 "add_tree_backref failed (non-leaf block): %s",
9468 add_pending(nodes, seen, ptr, size);
9470 add_pending(pending, seen, ptr, size);
9473 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9474 nritems) * sizeof(struct btrfs_key_ptr);
9476 total_btree_bytes += buf->len;
9477 if (fs_root_objectid(btrfs_header_owner(buf)))
9478 total_fs_tree_bytes += buf->len;
9479 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9480 total_extent_tree_bytes += buf->len;
9482 free_extent_buffer(buf);
9486 static int add_root_to_pending(struct extent_buffer *buf,
9487 struct cache_tree *extent_cache,
9488 struct cache_tree *pending,
9489 struct cache_tree *seen,
9490 struct cache_tree *nodes,
9493 struct extent_record tmpl;
9496 if (btrfs_header_level(buf) > 0)
9497 add_pending(nodes, seen, buf->start, buf->len);
9499 add_pending(pending, seen, buf->start, buf->len);
9501 memset(&tmpl, 0, sizeof(tmpl));
9502 tmpl.start = buf->start;
9507 tmpl.max_size = buf->len;
9508 add_extent_rec(extent_cache, &tmpl);
9510 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9511 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9512 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9515 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9520 /* as we fix the tree, we might be deleting blocks that
9521 * we're tracking for repair. This hook makes sure we
9522 * remove any backrefs for blocks as we are fixing them.
9524 static int free_extent_hook(struct btrfs_trans_handle *trans,
9525 struct btrfs_root *root,
9526 u64 bytenr, u64 num_bytes, u64 parent,
9527 u64 root_objectid, u64 owner, u64 offset,
9530 struct extent_record *rec;
9531 struct cache_extent *cache;
9533 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9535 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9536 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9540 rec = container_of(cache, struct extent_record, cache);
9542 struct data_backref *back;
9543 back = find_data_backref(rec, parent, root_objectid, owner,
9544 offset, 1, bytenr, num_bytes);
9547 if (back->node.found_ref) {
9548 back->found_ref -= refs_to_drop;
9550 rec->refs -= refs_to_drop;
9552 if (back->node.found_extent_tree) {
9553 back->num_refs -= refs_to_drop;
9554 if (rec->extent_item_refs)
9555 rec->extent_item_refs -= refs_to_drop;
9557 if (back->found_ref == 0)
9558 back->node.found_ref = 0;
9559 if (back->num_refs == 0)
9560 back->node.found_extent_tree = 0;
9562 if (!back->node.found_extent_tree && back->node.found_ref) {
9563 rb_erase(&back->node.node, &rec->backref_tree);
9567 struct tree_backref *back;
9568 back = find_tree_backref(rec, parent, root_objectid);
9571 if (back->node.found_ref) {
9574 back->node.found_ref = 0;
9576 if (back->node.found_extent_tree) {
9577 if (rec->extent_item_refs)
9578 rec->extent_item_refs--;
9579 back->node.found_extent_tree = 0;
9581 if (!back->node.found_extent_tree && back->node.found_ref) {
9582 rb_erase(&back->node.node, &rec->backref_tree);
9586 maybe_free_extent_rec(extent_cache, rec);
9591 static int delete_extent_records(struct btrfs_trans_handle *trans,
9592 struct btrfs_root *root,
9593 struct btrfs_path *path,
9596 struct btrfs_key key;
9597 struct btrfs_key found_key;
9598 struct extent_buffer *leaf;
9603 key.objectid = bytenr;
9605 key.offset = (u64)-1;
9608 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9615 if (path->slots[0] == 0)
9621 leaf = path->nodes[0];
9622 slot = path->slots[0];
9624 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9625 if (found_key.objectid != bytenr)
9628 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9629 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9630 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9631 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9632 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9633 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9634 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9635 btrfs_release_path(path);
9636 if (found_key.type == 0) {
9637 if (found_key.offset == 0)
9639 key.offset = found_key.offset - 1;
9640 key.type = found_key.type;
9642 key.type = found_key.type - 1;
9643 key.offset = (u64)-1;
9647 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9648 found_key.objectid, found_key.type, found_key.offset);
9650 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9653 btrfs_release_path(path);
9655 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9656 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9657 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9658 found_key.offset : root->fs_info->nodesize;
9660 ret = btrfs_update_block_group(trans, root, bytenr,
9667 btrfs_release_path(path);
9672 * for a single backref, this will allocate a new extent
9673 * and add the backref to it.
9675 static int record_extent(struct btrfs_trans_handle *trans,
9676 struct btrfs_fs_info *info,
9677 struct btrfs_path *path,
9678 struct extent_record *rec,
9679 struct extent_backref *back,
9680 int allocated, u64 flags)
9683 struct btrfs_root *extent_root = info->extent_root;
9684 struct extent_buffer *leaf;
9685 struct btrfs_key ins_key;
9686 struct btrfs_extent_item *ei;
9687 struct data_backref *dback;
9688 struct btrfs_tree_block_info *bi;
9691 rec->max_size = max_t(u64, rec->max_size,
9695 u32 item_size = sizeof(*ei);
9698 item_size += sizeof(*bi);
9700 ins_key.objectid = rec->start;
9701 ins_key.offset = rec->max_size;
9702 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9704 ret = btrfs_insert_empty_item(trans, extent_root, path,
9705 &ins_key, item_size);
9709 leaf = path->nodes[0];
9710 ei = btrfs_item_ptr(leaf, path->slots[0],
9711 struct btrfs_extent_item);
9713 btrfs_set_extent_refs(leaf, ei, 0);
9714 btrfs_set_extent_generation(leaf, ei, rec->generation);
9716 if (back->is_data) {
9717 btrfs_set_extent_flags(leaf, ei,
9718 BTRFS_EXTENT_FLAG_DATA);
9720 struct btrfs_disk_key copy_key;;
9722 bi = (struct btrfs_tree_block_info *)(ei + 1);
9723 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9726 btrfs_set_disk_key_objectid(©_key,
9727 rec->info_objectid);
9728 btrfs_set_disk_key_type(©_key, 0);
9729 btrfs_set_disk_key_offset(©_key, 0);
9731 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9732 btrfs_set_tree_block_key(leaf, bi, ©_key);
9734 btrfs_set_extent_flags(leaf, ei,
9735 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9738 btrfs_mark_buffer_dirty(leaf);
9739 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9740 rec->max_size, 1, 0);
9743 btrfs_release_path(path);
9746 if (back->is_data) {
9750 dback = to_data_backref(back);
9751 if (back->full_backref)
9752 parent = dback->parent;
9756 for (i = 0; i < dback->found_ref; i++) {
9757 /* if parent != 0, we're doing a full backref
9758 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9759 * just makes the backref allocator create a data
9762 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9763 rec->start, rec->max_size,
9767 BTRFS_FIRST_FREE_OBJECTID :
9773 fprintf(stderr, "adding new data backref"
9774 " on %llu %s %llu owner %llu"
9775 " offset %llu found %d\n",
9776 (unsigned long long)rec->start,
9777 back->full_backref ?
9779 back->full_backref ?
9780 (unsigned long long)parent :
9781 (unsigned long long)dback->root,
9782 (unsigned long long)dback->owner,
9783 (unsigned long long)dback->offset,
9787 struct tree_backref *tback;
9789 tback = to_tree_backref(back);
9790 if (back->full_backref)
9791 parent = tback->parent;
9795 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9796 rec->start, rec->max_size,
9797 parent, tback->root, 0, 0);
9798 fprintf(stderr, "adding new tree backref on "
9799 "start %llu len %llu parent %llu root %llu\n",
9800 rec->start, rec->max_size, parent, tback->root);
9803 btrfs_release_path(path);
9807 static struct extent_entry *find_entry(struct list_head *entries,
9808 u64 bytenr, u64 bytes)
9810 struct extent_entry *entry = NULL;
9812 list_for_each_entry(entry, entries, list) {
9813 if (entry->bytenr == bytenr && entry->bytes == bytes)
9820 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9822 struct extent_entry *entry, *best = NULL, *prev = NULL;
9824 list_for_each_entry(entry, entries, list) {
9826 * If there are as many broken entries as entries then we know
9827 * not to trust this particular entry.
9829 if (entry->broken == entry->count)
9833 * Special case, when there are only two entries and 'best' is
9843 * If our current entry == best then we can't be sure our best
9844 * is really the best, so we need to keep searching.
9846 if (best && best->count == entry->count) {
9852 /* Prev == entry, not good enough, have to keep searching */
9853 if (!prev->broken && prev->count == entry->count)
9857 best = (prev->count > entry->count) ? prev : entry;
9858 else if (best->count < entry->count)
9866 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9867 struct data_backref *dback, struct extent_entry *entry)
9869 struct btrfs_trans_handle *trans;
9870 struct btrfs_root *root;
9871 struct btrfs_file_extent_item *fi;
9872 struct extent_buffer *leaf;
9873 struct btrfs_key key;
9877 key.objectid = dback->root;
9878 key.type = BTRFS_ROOT_ITEM_KEY;
9879 key.offset = (u64)-1;
9880 root = btrfs_read_fs_root(info, &key);
9882 fprintf(stderr, "Couldn't find root for our ref\n");
9887 * The backref points to the original offset of the extent if it was
9888 * split, so we need to search down to the offset we have and then walk
9889 * forward until we find the backref we're looking for.
9891 key.objectid = dback->owner;
9892 key.type = BTRFS_EXTENT_DATA_KEY;
9893 key.offset = dback->offset;
9894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9896 fprintf(stderr, "Error looking up ref %d\n", ret);
9901 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9902 ret = btrfs_next_leaf(root, path);
9904 fprintf(stderr, "Couldn't find our ref, next\n");
9908 leaf = path->nodes[0];
9909 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9910 if (key.objectid != dback->owner ||
9911 key.type != BTRFS_EXTENT_DATA_KEY) {
9912 fprintf(stderr, "Couldn't find our ref, search\n");
9915 fi = btrfs_item_ptr(leaf, path->slots[0],
9916 struct btrfs_file_extent_item);
9917 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9918 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9920 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9925 btrfs_release_path(path);
9927 trans = btrfs_start_transaction(root, 1);
9929 return PTR_ERR(trans);
9932 * Ok we have the key of the file extent we want to fix, now we can cow
9933 * down to the thing and fix it.
9935 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9937 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9938 key.objectid, key.type, key.offset, ret);
9942 fprintf(stderr, "Well that's odd, we just found this key "
9943 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9948 leaf = path->nodes[0];
9949 fi = btrfs_item_ptr(leaf, path->slots[0],
9950 struct btrfs_file_extent_item);
9952 if (btrfs_file_extent_compression(leaf, fi) &&
9953 dback->disk_bytenr != entry->bytenr) {
9954 fprintf(stderr, "Ref doesn't match the record start and is "
9955 "compressed, please take a btrfs-image of this file "
9956 "system and send it to a btrfs developer so they can "
9957 "complete this functionality for bytenr %Lu\n",
9958 dback->disk_bytenr);
9963 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9964 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9965 } else if (dback->disk_bytenr > entry->bytenr) {
9966 u64 off_diff, offset;
9968 off_diff = dback->disk_bytenr - entry->bytenr;
9969 offset = btrfs_file_extent_offset(leaf, fi);
9970 if (dback->disk_bytenr + offset +
9971 btrfs_file_extent_num_bytes(leaf, fi) >
9972 entry->bytenr + entry->bytes) {
9973 fprintf(stderr, "Ref is past the entry end, please "
9974 "take a btrfs-image of this file system and "
9975 "send it to a btrfs developer, ref %Lu\n",
9976 dback->disk_bytenr);
9981 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9982 btrfs_set_file_extent_offset(leaf, fi, offset);
9983 } else if (dback->disk_bytenr < entry->bytenr) {
9986 offset = btrfs_file_extent_offset(leaf, fi);
9987 if (dback->disk_bytenr + offset < entry->bytenr) {
9988 fprintf(stderr, "Ref is before the entry start, please"
9989 " take a btrfs-image of this file system and "
9990 "send it to a btrfs developer, ref %Lu\n",
9991 dback->disk_bytenr);
9996 offset += dback->disk_bytenr;
9997 offset -= entry->bytenr;
9998 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9999 btrfs_set_file_extent_offset(leaf, fi, offset);
10002 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10005 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10006 * only do this if we aren't using compression, otherwise it's a
10009 if (!btrfs_file_extent_compression(leaf, fi))
10010 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10012 printf("ram bytes may be wrong?\n");
10013 btrfs_mark_buffer_dirty(leaf);
10015 err = btrfs_commit_transaction(trans, root);
10016 btrfs_release_path(path);
10017 return ret ? ret : err;
10020 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10021 struct extent_record *rec)
10023 struct extent_backref *back, *tmp;
10024 struct data_backref *dback;
10025 struct extent_entry *entry, *best = NULL;
10026 LIST_HEAD(entries);
10027 int nr_entries = 0;
10028 int broken_entries = 0;
10030 short mismatch = 0;
10033 * Metadata is easy and the backrefs should always agree on bytenr and
10034 * size, if not we've got bigger issues.
10039 rbtree_postorder_for_each_entry_safe(back, tmp,
10040 &rec->backref_tree, node) {
10041 if (back->full_backref || !back->is_data)
10044 dback = to_data_backref(back);
10047 * We only pay attention to backrefs that we found a real
10050 if (dback->found_ref == 0)
10054 * For now we only catch when the bytes don't match, not the
10055 * bytenr. We can easily do this at the same time, but I want
10056 * to have a fs image to test on before we just add repair
10057 * functionality willy-nilly so we know we won't screw up the
10061 entry = find_entry(&entries, dback->disk_bytenr,
10064 entry = malloc(sizeof(struct extent_entry));
10069 memset(entry, 0, sizeof(*entry));
10070 entry->bytenr = dback->disk_bytenr;
10071 entry->bytes = dback->bytes;
10072 list_add_tail(&entry->list, &entries);
10077 * If we only have on entry we may think the entries agree when
10078 * in reality they don't so we have to do some extra checking.
10080 if (dback->disk_bytenr != rec->start ||
10081 dback->bytes != rec->nr || back->broken)
10084 if (back->broken) {
10092 /* Yay all the backrefs agree, carry on good sir */
10093 if (nr_entries <= 1 && !mismatch)
10096 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10097 "%Lu\n", rec->start);
10100 * First we want to see if the backrefs can agree amongst themselves who
10101 * is right, so figure out which one of the entries has the highest
10104 best = find_most_right_entry(&entries);
10107 * Ok so we may have an even split between what the backrefs think, so
10108 * this is where we use the extent ref to see what it thinks.
10111 entry = find_entry(&entries, rec->start, rec->nr);
10112 if (!entry && (!broken_entries || !rec->found_rec)) {
10113 fprintf(stderr, "Backrefs don't agree with each other "
10114 "and extent record doesn't agree with anybody,"
10115 " so we can't fix bytenr %Lu bytes %Lu\n",
10116 rec->start, rec->nr);
10119 } else if (!entry) {
10121 * Ok our backrefs were broken, we'll assume this is the
10122 * correct value and add an entry for this range.
10124 entry = malloc(sizeof(struct extent_entry));
10129 memset(entry, 0, sizeof(*entry));
10130 entry->bytenr = rec->start;
10131 entry->bytes = rec->nr;
10132 list_add_tail(&entry->list, &entries);
10136 best = find_most_right_entry(&entries);
10138 fprintf(stderr, "Backrefs and extent record evenly "
10139 "split on who is right, this is going to "
10140 "require user input to fix bytenr %Lu bytes "
10141 "%Lu\n", rec->start, rec->nr);
10148 * I don't think this can happen currently as we'll abort() if we catch
10149 * this case higher up, but in case somebody removes that we still can't
10150 * deal with it properly here yet, so just bail out of that's the case.
10152 if (best->bytenr != rec->start) {
10153 fprintf(stderr, "Extent start and backref starts don't match, "
10154 "please use btrfs-image on this file system and send "
10155 "it to a btrfs developer so they can make fsck fix "
10156 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10157 rec->start, rec->nr);
10163 * Ok great we all agreed on an extent record, let's go find the real
10164 * references and fix up the ones that don't match.
10166 rbtree_postorder_for_each_entry_safe(back, tmp,
10167 &rec->backref_tree, node) {
10168 if (back->full_backref || !back->is_data)
10171 dback = to_data_backref(back);
10174 * Still ignoring backrefs that don't have a real ref attached
10177 if (dback->found_ref == 0)
10180 if (dback->bytes == best->bytes &&
10181 dback->disk_bytenr == best->bytenr)
10184 ret = repair_ref(info, path, dback, best);
10190 * Ok we messed with the actual refs, which means we need to drop our
10191 * entire cache and go back and rescan. I know this is a huge pain and
10192 * adds a lot of extra work, but it's the only way to be safe. Once all
10193 * the backrefs agree we may not need to do anything to the extent
10198 while (!list_empty(&entries)) {
10199 entry = list_entry(entries.next, struct extent_entry, list);
10200 list_del_init(&entry->list);
10206 static int process_duplicates(struct cache_tree *extent_cache,
10207 struct extent_record *rec)
10209 struct extent_record *good, *tmp;
10210 struct cache_extent *cache;
10214 * If we found a extent record for this extent then return, or if we
10215 * have more than one duplicate we are likely going to need to delete
10218 if (rec->found_rec || rec->num_duplicates > 1)
10221 /* Shouldn't happen but just in case */
10222 BUG_ON(!rec->num_duplicates);
10225 * So this happens if we end up with a backref that doesn't match the
10226 * actual extent entry. So either the backref is bad or the extent
10227 * entry is bad. Either way we want to have the extent_record actually
10228 * reflect what we found in the extent_tree, so we need to take the
10229 * duplicate out and use that as the extent_record since the only way we
10230 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10232 remove_cache_extent(extent_cache, &rec->cache);
10234 good = to_extent_record(rec->dups.next);
10235 list_del_init(&good->list);
10236 INIT_LIST_HEAD(&good->backrefs);
10237 INIT_LIST_HEAD(&good->dups);
10238 good->cache.start = good->start;
10239 good->cache.size = good->nr;
10240 good->content_checked = 0;
10241 good->owner_ref_checked = 0;
10242 good->num_duplicates = 0;
10243 good->refs = rec->refs;
10244 list_splice_init(&rec->backrefs, &good->backrefs);
10246 cache = lookup_cache_extent(extent_cache, good->start,
10250 tmp = container_of(cache, struct extent_record, cache);
10253 * If we find another overlapping extent and it's found_rec is
10254 * set then it's a duplicate and we need to try and delete
10257 if (tmp->found_rec || tmp->num_duplicates > 0) {
10258 if (list_empty(&good->list))
10259 list_add_tail(&good->list,
10260 &duplicate_extents);
10261 good->num_duplicates += tmp->num_duplicates + 1;
10262 list_splice_init(&tmp->dups, &good->dups);
10263 list_del_init(&tmp->list);
10264 list_add_tail(&tmp->list, &good->dups);
10265 remove_cache_extent(extent_cache, &tmp->cache);
10270 * Ok we have another non extent item backed extent rec, so lets
10271 * just add it to this extent and carry on like we did above.
10273 good->refs += tmp->refs;
10274 list_splice_init(&tmp->backrefs, &good->backrefs);
10275 remove_cache_extent(extent_cache, &tmp->cache);
10278 ret = insert_cache_extent(extent_cache, &good->cache);
10281 return good->num_duplicates ? 0 : 1;
10284 static int delete_duplicate_records(struct btrfs_root *root,
10285 struct extent_record *rec)
10287 struct btrfs_trans_handle *trans;
10288 LIST_HEAD(delete_list);
10289 struct btrfs_path path;
10290 struct extent_record *tmp, *good, *n;
10293 struct btrfs_key key;
10295 btrfs_init_path(&path);
10298 /* Find the record that covers all of the duplicates. */
10299 list_for_each_entry(tmp, &rec->dups, list) {
10300 if (good->start < tmp->start)
10302 if (good->nr > tmp->nr)
10305 if (tmp->start + tmp->nr < good->start + good->nr) {
10306 fprintf(stderr, "Ok we have overlapping extents that "
10307 "aren't completely covered by each other, this "
10308 "is going to require more careful thought. "
10309 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10310 tmp->start, tmp->nr, good->start, good->nr);
10317 list_add_tail(&rec->list, &delete_list);
10319 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10322 list_move_tail(&tmp->list, &delete_list);
10325 root = root->fs_info->extent_root;
10326 trans = btrfs_start_transaction(root, 1);
10327 if (IS_ERR(trans)) {
10328 ret = PTR_ERR(trans);
10332 list_for_each_entry(tmp, &delete_list, list) {
10333 if (tmp->found_rec == 0)
10335 key.objectid = tmp->start;
10336 key.type = BTRFS_EXTENT_ITEM_KEY;
10337 key.offset = tmp->nr;
10339 /* Shouldn't happen but just in case */
10340 if (tmp->metadata) {
10341 fprintf(stderr, "Well this shouldn't happen, extent "
10342 "record overlaps but is metadata? "
10343 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10347 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10353 ret = btrfs_del_item(trans, root, &path);
10356 btrfs_release_path(&path);
10359 err = btrfs_commit_transaction(trans, root);
10363 while (!list_empty(&delete_list)) {
10364 tmp = to_extent_record(delete_list.next);
10365 list_del_init(&tmp->list);
10371 while (!list_empty(&rec->dups)) {
10372 tmp = to_extent_record(rec->dups.next);
10373 list_del_init(&tmp->list);
10377 btrfs_release_path(&path);
10379 if (!ret && !nr_del)
10380 rec->num_duplicates = 0;
10382 return ret ? ret : nr_del;
10385 static int find_possible_backrefs(struct btrfs_fs_info *info,
10386 struct btrfs_path *path,
10387 struct cache_tree *extent_cache,
10388 struct extent_record *rec)
10390 struct btrfs_root *root;
10391 struct extent_backref *back, *tmp;
10392 struct data_backref *dback;
10393 struct cache_extent *cache;
10394 struct btrfs_file_extent_item *fi;
10395 struct btrfs_key key;
10399 rbtree_postorder_for_each_entry_safe(back, tmp,
10400 &rec->backref_tree, node) {
10401 /* Don't care about full backrefs (poor unloved backrefs) */
10402 if (back->full_backref || !back->is_data)
10405 dback = to_data_backref(back);
10407 /* We found this one, we don't need to do a lookup */
10408 if (dback->found_ref)
10411 key.objectid = dback->root;
10412 key.type = BTRFS_ROOT_ITEM_KEY;
10413 key.offset = (u64)-1;
10415 root = btrfs_read_fs_root(info, &key);
10417 /* No root, definitely a bad ref, skip */
10418 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10420 /* Other err, exit */
10422 return PTR_ERR(root);
10424 key.objectid = dback->owner;
10425 key.type = BTRFS_EXTENT_DATA_KEY;
10426 key.offset = dback->offset;
10427 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10429 btrfs_release_path(path);
10432 /* Didn't find it, we can carry on */
10437 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10438 struct btrfs_file_extent_item);
10439 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10440 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10441 btrfs_release_path(path);
10442 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10444 struct extent_record *tmp;
10445 tmp = container_of(cache, struct extent_record, cache);
10448 * If we found an extent record for the bytenr for this
10449 * particular backref then we can't add it to our
10450 * current extent record. We only want to add backrefs
10451 * that don't have a corresponding extent item in the
10452 * extent tree since they likely belong to this record
10453 * and we need to fix it if it doesn't match bytenrs.
10455 if (tmp->found_rec)
10459 dback->found_ref += 1;
10460 dback->disk_bytenr = bytenr;
10461 dback->bytes = bytes;
10464 * Set this so the verify backref code knows not to trust the
10465 * values in this backref.
10474 * Record orphan data ref into corresponding root.
10476 * Return 0 if the extent item contains data ref and recorded.
10477 * Return 1 if the extent item contains no useful data ref
10478 * On that case, it may contains only shared_dataref or metadata backref
10479 * or the file extent exists(this should be handled by the extent bytenr
10480 * recovery routine)
10481 * Return <0 if something goes wrong.
10483 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10484 struct extent_record *rec)
10486 struct btrfs_key key;
10487 struct btrfs_root *dest_root;
10488 struct extent_backref *back, *tmp;
10489 struct data_backref *dback;
10490 struct orphan_data_extent *orphan;
10491 struct btrfs_path path;
10492 int recorded_data_ref = 0;
10497 btrfs_init_path(&path);
10498 rbtree_postorder_for_each_entry_safe(back, tmp,
10499 &rec->backref_tree, node) {
10500 if (back->full_backref || !back->is_data ||
10501 !back->found_extent_tree)
10503 dback = to_data_backref(back);
10504 if (dback->found_ref)
10506 key.objectid = dback->root;
10507 key.type = BTRFS_ROOT_ITEM_KEY;
10508 key.offset = (u64)-1;
10510 dest_root = btrfs_read_fs_root(fs_info, &key);
10512 /* For non-exist root we just skip it */
10513 if (IS_ERR(dest_root) || !dest_root)
10516 key.objectid = dback->owner;
10517 key.type = BTRFS_EXTENT_DATA_KEY;
10518 key.offset = dback->offset;
10520 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10521 btrfs_release_path(&path);
10523 * For ret < 0, it's OK since the fs-tree may be corrupted,
10524 * we need to record it for inode/file extent rebuild.
10525 * For ret > 0, we record it only for file extent rebuild.
10526 * For ret == 0, the file extent exists but only bytenr
10527 * mismatch, let the original bytenr fix routine to handle,
10533 orphan = malloc(sizeof(*orphan));
10538 INIT_LIST_HEAD(&orphan->list);
10539 orphan->root = dback->root;
10540 orphan->objectid = dback->owner;
10541 orphan->offset = dback->offset;
10542 orphan->disk_bytenr = rec->cache.start;
10543 orphan->disk_len = rec->cache.size;
10544 list_add(&dest_root->orphan_data_extents, &orphan->list);
10545 recorded_data_ref = 1;
10548 btrfs_release_path(&path);
10550 return !recorded_data_ref;
10556 * when an incorrect extent item is found, this will delete
10557 * all of the existing entries for it and recreate them
10558 * based on what the tree scan found.
10560 static int fixup_extent_refs(struct btrfs_fs_info *info,
10561 struct cache_tree *extent_cache,
10562 struct extent_record *rec)
10564 struct btrfs_trans_handle *trans = NULL;
10566 struct btrfs_path path;
10567 struct cache_extent *cache;
10568 struct extent_backref *back, *tmp;
10572 if (rec->flag_block_full_backref)
10573 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10575 btrfs_init_path(&path);
10576 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10578 * Sometimes the backrefs themselves are so broken they don't
10579 * get attached to any meaningful rec, so first go back and
10580 * check any of our backrefs that we couldn't find and throw
10581 * them into the list if we find the backref so that
10582 * verify_backrefs can figure out what to do.
10584 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10589 /* step one, make sure all of the backrefs agree */
10590 ret = verify_backrefs(info, &path, rec);
10594 trans = btrfs_start_transaction(info->extent_root, 1);
10595 if (IS_ERR(trans)) {
10596 ret = PTR_ERR(trans);
10600 /* step two, delete all the existing records */
10601 ret = delete_extent_records(trans, info->extent_root, &path,
10607 /* was this block corrupt? If so, don't add references to it */
10608 cache = lookup_cache_extent(info->corrupt_blocks,
10609 rec->start, rec->max_size);
10615 /* step three, recreate all the refs we did find */
10616 rbtree_postorder_for_each_entry_safe(back, tmp,
10617 &rec->backref_tree, node) {
10619 * if we didn't find any references, don't create a
10620 * new extent record
10622 if (!back->found_ref)
10625 rec->bad_full_backref = 0;
10626 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10634 int err = btrfs_commit_transaction(trans, info->extent_root);
10640 fprintf(stderr, "Repaired extent references for %llu\n",
10641 (unsigned long long)rec->start);
10643 btrfs_release_path(&path);
10647 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10648 struct extent_record *rec)
10650 struct btrfs_trans_handle *trans;
10651 struct btrfs_root *root = fs_info->extent_root;
10652 struct btrfs_path path;
10653 struct btrfs_extent_item *ei;
10654 struct btrfs_key key;
10658 key.objectid = rec->start;
10659 if (rec->metadata) {
10660 key.type = BTRFS_METADATA_ITEM_KEY;
10661 key.offset = rec->info_level;
10663 key.type = BTRFS_EXTENT_ITEM_KEY;
10664 key.offset = rec->max_size;
10667 trans = btrfs_start_transaction(root, 0);
10669 return PTR_ERR(trans);
10671 btrfs_init_path(&path);
10672 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10674 btrfs_release_path(&path);
10675 btrfs_commit_transaction(trans, root);
10678 fprintf(stderr, "Didn't find extent for %llu\n",
10679 (unsigned long long)rec->start);
10680 btrfs_release_path(&path);
10681 btrfs_commit_transaction(trans, root);
10685 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10686 struct btrfs_extent_item);
10687 flags = btrfs_extent_flags(path.nodes[0], ei);
10688 if (rec->flag_block_full_backref) {
10689 fprintf(stderr, "setting full backref on %llu\n",
10690 (unsigned long long)key.objectid);
10691 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10693 fprintf(stderr, "clearing full backref on %llu\n",
10694 (unsigned long long)key.objectid);
10695 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10697 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10698 btrfs_mark_buffer_dirty(path.nodes[0]);
10699 btrfs_release_path(&path);
10700 ret = btrfs_commit_transaction(trans, root);
10702 fprintf(stderr, "Repaired extent flags for %llu\n",
10703 (unsigned long long)rec->start);
10708 /* right now we only prune from the extent allocation tree */
10709 static int prune_one_block(struct btrfs_trans_handle *trans,
10710 struct btrfs_fs_info *info,
10711 struct btrfs_corrupt_block *corrupt)
10714 struct btrfs_path path;
10715 struct extent_buffer *eb;
10719 int level = corrupt->level + 1;
10721 btrfs_init_path(&path);
10723 /* we want to stop at the parent to our busted block */
10724 path.lowest_level = level;
10726 ret = btrfs_search_slot(trans, info->extent_root,
10727 &corrupt->key, &path, -1, 1);
10732 eb = path.nodes[level];
10739 * hopefully the search gave us the block we want to prune,
10740 * lets try that first
10742 slot = path.slots[level];
10743 found = btrfs_node_blockptr(eb, slot);
10744 if (found == corrupt->cache.start)
10747 nritems = btrfs_header_nritems(eb);
10749 /* the search failed, lets scan this node and hope we find it */
10750 for (slot = 0; slot < nritems; slot++) {
10751 found = btrfs_node_blockptr(eb, slot);
10752 if (found == corrupt->cache.start)
10756 * we couldn't find the bad block. TODO, search all the nodes for pointers
10759 if (eb == info->extent_root->node) {
10764 btrfs_release_path(&path);
10769 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10770 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10773 btrfs_release_path(&path);
10777 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10779 struct btrfs_trans_handle *trans = NULL;
10780 struct cache_extent *cache;
10781 struct btrfs_corrupt_block *corrupt;
10784 cache = search_cache_extent(info->corrupt_blocks, 0);
10788 trans = btrfs_start_transaction(info->extent_root, 1);
10790 return PTR_ERR(trans);
10792 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10793 prune_one_block(trans, info, corrupt);
10794 remove_cache_extent(info->corrupt_blocks, cache);
10797 return btrfs_commit_transaction(trans, info->extent_root);
10801 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10803 struct btrfs_block_group_cache *cache;
10808 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10809 &start, &end, EXTENT_DIRTY);
10812 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10817 cache = btrfs_lookup_first_block_group(fs_info, start);
10822 start = cache->key.objectid + cache->key.offset;
10826 static int check_extent_refs(struct btrfs_root *root,
10827 struct cache_tree *extent_cache)
10829 struct extent_record *rec;
10830 struct cache_extent *cache;
10837 * if we're doing a repair, we have to make sure
10838 * we don't allocate from the problem extents.
10839 * In the worst case, this will be all the
10840 * extents in the FS
10842 cache = search_cache_extent(extent_cache, 0);
10844 rec = container_of(cache, struct extent_record, cache);
10845 set_extent_dirty(root->fs_info->excluded_extents,
10847 rec->start + rec->max_size - 1);
10848 cache = next_cache_extent(cache);
10851 /* pin down all the corrupted blocks too */
10852 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10854 set_extent_dirty(root->fs_info->excluded_extents,
10856 cache->start + cache->size - 1);
10857 cache = next_cache_extent(cache);
10859 prune_corrupt_blocks(root->fs_info);
10860 reset_cached_block_groups(root->fs_info);
10863 reset_cached_block_groups(root->fs_info);
10866 * We need to delete any duplicate entries we find first otherwise we
10867 * could mess up the extent tree when we have backrefs that actually
10868 * belong to a different extent item and not the weird duplicate one.
10870 while (repair && !list_empty(&duplicate_extents)) {
10871 rec = to_extent_record(duplicate_extents.next);
10872 list_del_init(&rec->list);
10874 /* Sometimes we can find a backref before we find an actual
10875 * extent, so we need to process it a little bit to see if there
10876 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10877 * if this is a backref screwup. If we need to delete stuff
10878 * process_duplicates() will return 0, otherwise it will return
10881 if (process_duplicates(extent_cache, rec))
10883 ret = delete_duplicate_records(root, rec);
10887 * delete_duplicate_records will return the number of entries
10888 * deleted, so if it's greater than 0 then we know we actually
10889 * did something and we need to remove.
10902 cache = search_cache_extent(extent_cache, 0);
10905 rec = container_of(cache, struct extent_record, cache);
10906 if (rec->num_duplicates) {
10907 fprintf(stderr, "extent item %llu has multiple extent "
10908 "items\n", (unsigned long long)rec->start);
10912 if (rec->refs != rec->extent_item_refs) {
10913 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10914 (unsigned long long)rec->start,
10915 (unsigned long long)rec->nr);
10916 fprintf(stderr, "extent item %llu, found %llu\n",
10917 (unsigned long long)rec->extent_item_refs,
10918 (unsigned long long)rec->refs);
10919 ret = record_orphan_data_extents(root->fs_info, rec);
10925 if (all_backpointers_checked(rec, 1)) {
10926 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10927 (unsigned long long)rec->start,
10928 (unsigned long long)rec->nr);
10932 if (!rec->owner_ref_checked) {
10933 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10934 (unsigned long long)rec->start,
10935 (unsigned long long)rec->nr);
10940 if (repair && fix) {
10941 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10947 if (rec->bad_full_backref) {
10948 fprintf(stderr, "bad full backref, on [%llu]\n",
10949 (unsigned long long)rec->start);
10951 ret = fixup_extent_flags(root->fs_info, rec);
10959 * Although it's not a extent ref's problem, we reuse this
10960 * routine for error reporting.
10961 * No repair function yet.
10963 if (rec->crossing_stripes) {
10965 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10966 rec->start, rec->start + rec->max_size);
10970 if (rec->wrong_chunk_type) {
10972 "bad extent [%llu, %llu), type mismatch with chunk\n",
10973 rec->start, rec->start + rec->max_size);
10978 remove_cache_extent(extent_cache, cache);
10979 free_all_extent_backrefs(rec);
10980 if (!init_extent_tree && repair && (!cur_err || fix))
10981 clear_extent_dirty(root->fs_info->excluded_extents,
10983 rec->start + rec->max_size - 1);
10988 if (ret && ret != -EAGAIN) {
10989 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10992 struct btrfs_trans_handle *trans;
10994 root = root->fs_info->extent_root;
10995 trans = btrfs_start_transaction(root, 1);
10996 if (IS_ERR(trans)) {
10997 ret = PTR_ERR(trans);
11001 ret = btrfs_fix_block_accounting(trans, root);
11004 ret = btrfs_commit_transaction(trans, root);
11016 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11020 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11021 stripe_size = length;
11022 stripe_size /= num_stripes;
11023 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11024 stripe_size = length * 2;
11025 stripe_size /= num_stripes;
11026 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11027 stripe_size = length;
11028 stripe_size /= (num_stripes - 1);
11029 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11030 stripe_size = length;
11031 stripe_size /= (num_stripes - 2);
11033 stripe_size = length;
11035 return stripe_size;
11039 * Check the chunk with its block group/dev list ref:
11040 * Return 0 if all refs seems valid.
11041 * Return 1 if part of refs seems valid, need later check for rebuild ref
11042 * like missing block group and needs to search extent tree to rebuild them.
11043 * Return -1 if essential refs are missing and unable to rebuild.
11045 static int check_chunk_refs(struct chunk_record *chunk_rec,
11046 struct block_group_tree *block_group_cache,
11047 struct device_extent_tree *dev_extent_cache,
11050 struct cache_extent *block_group_item;
11051 struct block_group_record *block_group_rec;
11052 struct cache_extent *dev_extent_item;
11053 struct device_extent_record *dev_extent_rec;
11057 int metadump_v2 = 0;
11061 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11063 chunk_rec->length);
11064 if (block_group_item) {
11065 block_group_rec = container_of(block_group_item,
11066 struct block_group_record,
11068 if (chunk_rec->length != block_group_rec->offset ||
11069 chunk_rec->offset != block_group_rec->objectid ||
11071 chunk_rec->type_flags != block_group_rec->flags)) {
11074 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11075 chunk_rec->objectid,
11080 chunk_rec->type_flags,
11081 block_group_rec->objectid,
11082 block_group_rec->type,
11083 block_group_rec->offset,
11084 block_group_rec->offset,
11085 block_group_rec->objectid,
11086 block_group_rec->flags);
11089 list_del_init(&block_group_rec->list);
11090 chunk_rec->bg_rec = block_group_rec;
11095 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11096 chunk_rec->objectid,
11101 chunk_rec->type_flags);
11108 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11109 chunk_rec->num_stripes);
11110 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11111 devid = chunk_rec->stripes[i].devid;
11112 offset = chunk_rec->stripes[i].offset;
11113 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11114 devid, offset, length);
11115 if (dev_extent_item) {
11116 dev_extent_rec = container_of(dev_extent_item,
11117 struct device_extent_record,
11119 if (dev_extent_rec->objectid != devid ||
11120 dev_extent_rec->offset != offset ||
11121 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11122 dev_extent_rec->length != length) {
11125 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11126 chunk_rec->objectid,
11129 chunk_rec->stripes[i].devid,
11130 chunk_rec->stripes[i].offset,
11131 dev_extent_rec->objectid,
11132 dev_extent_rec->offset,
11133 dev_extent_rec->length);
11136 list_move(&dev_extent_rec->chunk_list,
11137 &chunk_rec->dextents);
11142 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11143 chunk_rec->objectid,
11146 chunk_rec->stripes[i].devid,
11147 chunk_rec->stripes[i].offset);
11154 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11155 int check_chunks(struct cache_tree *chunk_cache,
11156 struct block_group_tree *block_group_cache,
11157 struct device_extent_tree *dev_extent_cache,
11158 struct list_head *good, struct list_head *bad,
11159 struct list_head *rebuild, int silent)
11161 struct cache_extent *chunk_item;
11162 struct chunk_record *chunk_rec;
11163 struct block_group_record *bg_rec;
11164 struct device_extent_record *dext_rec;
11168 chunk_item = first_cache_extent(chunk_cache);
11169 while (chunk_item) {
11170 chunk_rec = container_of(chunk_item, struct chunk_record,
11172 err = check_chunk_refs(chunk_rec, block_group_cache,
11173 dev_extent_cache, silent);
11176 if (err == 0 && good)
11177 list_add_tail(&chunk_rec->list, good);
11178 if (err > 0 && rebuild)
11179 list_add_tail(&chunk_rec->list, rebuild);
11180 if (err < 0 && bad)
11181 list_add_tail(&chunk_rec->list, bad);
11182 chunk_item = next_cache_extent(chunk_item);
11185 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11188 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11196 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11200 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11201 dext_rec->objectid,
11211 static int check_device_used(struct device_record *dev_rec,
11212 struct device_extent_tree *dext_cache)
11214 struct cache_extent *cache;
11215 struct device_extent_record *dev_extent_rec;
11216 u64 total_byte = 0;
11218 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11220 dev_extent_rec = container_of(cache,
11221 struct device_extent_record,
11223 if (dev_extent_rec->objectid != dev_rec->devid)
11226 list_del_init(&dev_extent_rec->device_list);
11227 total_byte += dev_extent_rec->length;
11228 cache = next_cache_extent(cache);
11231 if (total_byte != dev_rec->byte_used) {
11233 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11234 total_byte, dev_rec->byte_used, dev_rec->objectid,
11235 dev_rec->type, dev_rec->offset);
11243 * Extra (optional) check for dev_item size to report possbile problem on a new
11246 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11248 if (!IS_ALIGNED(total_bytes, sectorsize)) {
11250 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11251 devid, total_bytes, sectorsize);
11253 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11254 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11259 * Unlike device size alignment check above, some super total_bytes check
11260 * failure can lead to mount failure for newer kernel.
11262 * So this function will return the error for a fatal super total_bytes problem.
11264 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11266 struct btrfs_device *dev;
11267 struct list_head *dev_list = &fs_info->fs_devices->devices;
11268 u64 total_bytes = 0;
11269 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11271 list_for_each_entry(dev, dev_list, dev_list)
11272 total_bytes += dev->total_bytes;
11274 /* Important check, which can cause unmountable fs */
11275 if (super_bytes < total_bytes) {
11276 error("super total bytes %llu smaller than real device(s) size %llu",
11277 super_bytes, total_bytes);
11278 error("mounting this fs may fail for newer kernels");
11279 error("this can be fixed by 'btrfs rescue fix-device-size'");
11284 * Optional check, just to make everything aligned and match with each
11287 * For a btrfs-image restored fs, we don't need to check it anyway.
11289 if (btrfs_super_flags(fs_info->super_copy) &
11290 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11292 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11293 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11294 super_bytes != total_bytes) {
11295 warning("minor unaligned/mismatch device size detected");
11297 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11302 /* check btrfs_dev_item -> btrfs_dev_extent */
11303 static int check_devices(struct rb_root *dev_cache,
11304 struct device_extent_tree *dev_extent_cache)
11306 struct rb_node *dev_node;
11307 struct device_record *dev_rec;
11308 struct device_extent_record *dext_rec;
11312 dev_node = rb_first(dev_cache);
11314 dev_rec = container_of(dev_node, struct device_record, node);
11315 err = check_device_used(dev_rec, dev_extent_cache);
11319 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11320 global_info->sectorsize);
11321 dev_node = rb_next(dev_node);
11323 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11326 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11327 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11334 static int add_root_item_to_list(struct list_head *head,
11335 u64 objectid, u64 bytenr, u64 last_snapshot,
11336 u8 level, u8 drop_level,
11337 struct btrfs_key *drop_key)
11340 struct root_item_record *ri_rec;
11341 ri_rec = malloc(sizeof(*ri_rec));
11344 ri_rec->bytenr = bytenr;
11345 ri_rec->objectid = objectid;
11346 ri_rec->level = level;
11347 ri_rec->drop_level = drop_level;
11348 ri_rec->last_snapshot = last_snapshot;
11350 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11351 list_add_tail(&ri_rec->list, head);
11356 static void free_root_item_list(struct list_head *list)
11358 struct root_item_record *ri_rec;
11360 while (!list_empty(list)) {
11361 ri_rec = list_first_entry(list, struct root_item_record,
11363 list_del_init(&ri_rec->list);
11368 static int deal_root_from_list(struct list_head *list,
11369 struct btrfs_root *root,
11370 struct block_info *bits,
11372 struct cache_tree *pending,
11373 struct cache_tree *seen,
11374 struct cache_tree *reada,
11375 struct cache_tree *nodes,
11376 struct cache_tree *extent_cache,
11377 struct cache_tree *chunk_cache,
11378 struct rb_root *dev_cache,
11379 struct block_group_tree *block_group_cache,
11380 struct device_extent_tree *dev_extent_cache)
11385 while (!list_empty(list)) {
11386 struct root_item_record *rec;
11387 struct extent_buffer *buf;
11388 rec = list_entry(list->next,
11389 struct root_item_record, list);
11391 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11392 if (!extent_buffer_uptodate(buf)) {
11393 free_extent_buffer(buf);
11397 ret = add_root_to_pending(buf, extent_cache, pending,
11398 seen, nodes, rec->objectid);
11402 * To rebuild extent tree, we need deal with snapshot
11403 * one by one, otherwise we deal with node firstly which
11404 * can maximize readahead.
11407 ret = run_next_block(root, bits, bits_nr, &last,
11408 pending, seen, reada, nodes,
11409 extent_cache, chunk_cache,
11410 dev_cache, block_group_cache,
11411 dev_extent_cache, rec);
11415 free_extent_buffer(buf);
11416 list_del(&rec->list);
11422 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11423 reada, nodes, extent_cache, chunk_cache,
11424 dev_cache, block_group_cache,
11425 dev_extent_cache, NULL);
11435 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11437 struct rb_root dev_cache;
11438 struct cache_tree chunk_cache;
11439 struct block_group_tree block_group_cache;
11440 struct device_extent_tree dev_extent_cache;
11441 struct cache_tree extent_cache;
11442 struct cache_tree seen;
11443 struct cache_tree pending;
11444 struct cache_tree reada;
11445 struct cache_tree nodes;
11446 struct extent_io_tree excluded_extents;
11447 struct cache_tree corrupt_blocks;
11448 struct btrfs_path path;
11449 struct btrfs_key key;
11450 struct btrfs_key found_key;
11452 struct block_info *bits;
11454 struct extent_buffer *leaf;
11456 struct btrfs_root_item ri;
11457 struct list_head dropping_trees;
11458 struct list_head normal_trees;
11459 struct btrfs_root *root1;
11460 struct btrfs_root *root;
11464 root = fs_info->fs_root;
11465 dev_cache = RB_ROOT;
11466 cache_tree_init(&chunk_cache);
11467 block_group_tree_init(&block_group_cache);
11468 device_extent_tree_init(&dev_extent_cache);
11470 cache_tree_init(&extent_cache);
11471 cache_tree_init(&seen);
11472 cache_tree_init(&pending);
11473 cache_tree_init(&nodes);
11474 cache_tree_init(&reada);
11475 cache_tree_init(&corrupt_blocks);
11476 extent_io_tree_init(&excluded_extents);
11477 INIT_LIST_HEAD(&dropping_trees);
11478 INIT_LIST_HEAD(&normal_trees);
11481 fs_info->excluded_extents = &excluded_extents;
11482 fs_info->fsck_extent_cache = &extent_cache;
11483 fs_info->free_extent_hook = free_extent_hook;
11484 fs_info->corrupt_blocks = &corrupt_blocks;
11488 bits = malloc(bits_nr * sizeof(struct block_info));
11494 if (ctx.progress_enabled) {
11495 ctx.tp = TASK_EXTENTS;
11496 task_start(ctx.info);
11500 root1 = fs_info->tree_root;
11501 level = btrfs_header_level(root1->node);
11502 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11503 root1->node->start, 0, level, 0, NULL);
11506 root1 = fs_info->chunk_root;
11507 level = btrfs_header_level(root1->node);
11508 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11509 root1->node->start, 0, level, 0, NULL);
11512 btrfs_init_path(&path);
11515 key.type = BTRFS_ROOT_ITEM_KEY;
11516 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11520 leaf = path.nodes[0];
11521 slot = path.slots[0];
11522 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11523 ret = btrfs_next_leaf(root, &path);
11526 leaf = path.nodes[0];
11527 slot = path.slots[0];
11529 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11530 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11531 unsigned long offset;
11534 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11535 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11536 last_snapshot = btrfs_root_last_snapshot(&ri);
11537 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11538 level = btrfs_root_level(&ri);
11539 ret = add_root_item_to_list(&normal_trees,
11540 found_key.objectid,
11541 btrfs_root_bytenr(&ri),
11542 last_snapshot, level,
11547 level = btrfs_root_level(&ri);
11548 objectid = found_key.objectid;
11549 btrfs_disk_key_to_cpu(&found_key,
11550 &ri.drop_progress);
11551 ret = add_root_item_to_list(&dropping_trees,
11553 btrfs_root_bytenr(&ri),
11554 last_snapshot, level,
11555 ri.drop_level, &found_key);
11562 btrfs_release_path(&path);
11565 * check_block can return -EAGAIN if it fixes something, please keep
11566 * this in mind when dealing with return values from these functions, if
11567 * we get -EAGAIN we want to fall through and restart the loop.
11569 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11570 &seen, &reada, &nodes, &extent_cache,
11571 &chunk_cache, &dev_cache, &block_group_cache,
11572 &dev_extent_cache);
11574 if (ret == -EAGAIN)
11578 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11579 &pending, &seen, &reada, &nodes,
11580 &extent_cache, &chunk_cache, &dev_cache,
11581 &block_group_cache, &dev_extent_cache);
11583 if (ret == -EAGAIN)
11588 ret = check_chunks(&chunk_cache, &block_group_cache,
11589 &dev_extent_cache, NULL, NULL, NULL, 0);
11591 if (ret == -EAGAIN)
11596 ret = check_extent_refs(root, &extent_cache);
11598 if (ret == -EAGAIN)
11603 ret = check_devices(&dev_cache, &dev_extent_cache);
11608 task_stop(ctx.info);
11610 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11611 extent_io_tree_cleanup(&excluded_extents);
11612 fs_info->fsck_extent_cache = NULL;
11613 fs_info->free_extent_hook = NULL;
11614 fs_info->corrupt_blocks = NULL;
11615 fs_info->excluded_extents = NULL;
11618 free_chunk_cache_tree(&chunk_cache);
11619 free_device_cache_tree(&dev_cache);
11620 free_block_group_tree(&block_group_cache);
11621 free_device_extent_tree(&dev_extent_cache);
11622 free_extent_cache_tree(&seen);
11623 free_extent_cache_tree(&pending);
11624 free_extent_cache_tree(&reada);
11625 free_extent_cache_tree(&nodes);
11626 free_root_item_list(&normal_trees);
11627 free_root_item_list(&dropping_trees);
11630 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11631 free_extent_cache_tree(&seen);
11632 free_extent_cache_tree(&pending);
11633 free_extent_cache_tree(&reada);
11634 free_extent_cache_tree(&nodes);
11635 free_chunk_cache_tree(&chunk_cache);
11636 free_block_group_tree(&block_group_cache);
11637 free_device_cache_tree(&dev_cache);
11638 free_device_extent_tree(&dev_extent_cache);
11639 free_extent_record_cache(&extent_cache);
11640 free_root_item_list(&normal_trees);
11641 free_root_item_list(&dropping_trees);
11642 extent_io_tree_cleanup(&excluded_extents);
11646 static int check_extent_inline_ref(struct extent_buffer *eb,
11647 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11650 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11653 case BTRFS_TREE_BLOCK_REF_KEY:
11654 case BTRFS_EXTENT_DATA_REF_KEY:
11655 case BTRFS_SHARED_BLOCK_REF_KEY:
11656 case BTRFS_SHARED_DATA_REF_KEY:
11660 error("extent[%llu %u %llu] has unknown ref type: %d",
11661 key->objectid, key->type, key->offset, type);
11662 ret = UNKNOWN_TYPE;
11670 * Check backrefs of a tree block given by @bytenr or @eb.
11672 * @root: the root containing the @bytenr or @eb
11673 * @eb: tree block extent buffer, can be NULL
11674 * @bytenr: bytenr of the tree block to search
11675 * @level: tree level of the tree block
11676 * @owner: owner of the tree block
11678 * Return >0 for any error found and output error message
11679 * Return 0 for no error found
11681 static int check_tree_block_ref(struct btrfs_root *root,
11682 struct extent_buffer *eb, u64 bytenr,
11683 int level, u64 owner, struct node_refs *nrefs)
11685 struct btrfs_key key;
11686 struct btrfs_root *extent_root = root->fs_info->extent_root;
11687 struct btrfs_path path;
11688 struct btrfs_extent_item *ei;
11689 struct btrfs_extent_inline_ref *iref;
11690 struct extent_buffer *leaf;
11695 int root_level = btrfs_header_level(root->node);
11697 u32 nodesize = root->fs_info->nodesize;
11700 int tree_reloc_root = 0;
11707 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11708 btrfs_header_bytenr(root->node) == bytenr)
11709 tree_reloc_root = 1;
11710 btrfs_init_path(&path);
11711 key.objectid = bytenr;
11712 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11713 key.type = BTRFS_METADATA_ITEM_KEY;
11715 key.type = BTRFS_EXTENT_ITEM_KEY;
11716 key.offset = (u64)-1;
11718 /* Search for the backref in extent tree */
11719 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11721 err |= BACKREF_MISSING;
11724 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11726 err |= BACKREF_MISSING;
11730 leaf = path.nodes[0];
11731 slot = path.slots[0];
11732 btrfs_item_key_to_cpu(leaf, &key, slot);
11734 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11736 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11737 skinny_level = (int)key.offset;
11738 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11740 struct btrfs_tree_block_info *info;
11742 info = (struct btrfs_tree_block_info *)(ei + 1);
11743 skinny_level = btrfs_tree_block_level(leaf, info);
11744 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11753 * Due to the feature of shared tree blocks, if the upper node
11754 * is a fs root or shared node, the extent of checked node may
11755 * not be updated until the next CoW.
11758 strict = should_check_extent_strictly(root, nrefs,
11760 if (!(btrfs_extent_flags(leaf, ei) &
11761 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11763 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11764 key.objectid, nodesize,
11765 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11766 err = BACKREF_MISMATCH;
11768 header_gen = btrfs_header_generation(eb);
11769 extent_gen = btrfs_extent_generation(leaf, ei);
11770 if (header_gen != extent_gen) {
11772 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11773 key.objectid, nodesize, header_gen,
11775 err = BACKREF_MISMATCH;
11777 if (level != skinny_level) {
11779 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11780 key.objectid, nodesize, level, skinny_level);
11781 err = BACKREF_MISMATCH;
11783 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11785 "extent[%llu %u] is referred by other roots than %llu",
11786 key.objectid, nodesize, root->objectid);
11787 err = BACKREF_MISMATCH;
11792 * Iterate the extent/metadata item to find the exact backref
11794 item_size = btrfs_item_size_nr(leaf, slot);
11795 ptr = (unsigned long)iref;
11796 end = (unsigned long)ei + item_size;
11798 while (ptr < end) {
11799 iref = (struct btrfs_extent_inline_ref *)ptr;
11800 type = btrfs_extent_inline_ref_type(leaf, iref);
11801 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11803 ret = check_extent_inline_ref(leaf, &key, iref);
11808 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11809 if (offset == root->objectid)
11811 if (!strict && owner == offset)
11813 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11815 * Backref of tree reloc root points to itself, no need
11816 * to check backref any more.
11818 if (tree_reloc_root) {
11822 * Check if the backref points to valid
11825 found_ref = !check_tree_block_ref( root, NULL,
11826 offset, level + 1, owner,
11833 ptr += btrfs_extent_inline_ref_size(type);
11837 * Inlined extent item doesn't have what we need, check
11838 * TREE_BLOCK_REF_KEY
11841 btrfs_release_path(&path);
11842 key.objectid = bytenr;
11843 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11844 key.offset = root->objectid;
11846 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11851 err |= BACKREF_MISSING;
11853 btrfs_release_path(&path);
11854 if (nrefs && strict &&
11855 level < root_level && nrefs->full_backref[level + 1])
11856 parent = nrefs->bytenr[level + 1];
11857 if (eb && (err & BACKREF_MISSING))
11859 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11860 bytenr, nodesize, owner, level,
11861 parent ? "parent" : "root",
11862 parent ? parent : root->objectid);
11867 * If @err contains BACKREF_MISSING then add extent of the
11868 * file_extent_data_item.
11870 * Returns error bits after reapir.
11872 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11873 struct btrfs_root *root,
11874 struct btrfs_path *pathp,
11875 struct node_refs *nrefs,
11878 struct btrfs_file_extent_item *fi;
11879 struct btrfs_key fi_key;
11880 struct btrfs_key key;
11881 struct btrfs_extent_item *ei;
11882 struct btrfs_path path;
11883 struct btrfs_root *extent_root = root->fs_info->extent_root;
11884 struct extent_buffer *eb;
11896 eb = pathp->nodes[0];
11897 slot = pathp->slots[0];
11898 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11899 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11901 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11902 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11905 file_offset = fi_key.offset;
11906 generation = btrfs_file_extent_generation(eb, fi);
11907 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11908 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11909 extent_offset = btrfs_file_extent_offset(eb, fi);
11910 offset = file_offset - extent_offset;
11912 /* now repair only adds backref */
11913 if ((err & BACKREF_MISSING) == 0)
11916 /* search extent item */
11917 key.objectid = disk_bytenr;
11918 key.type = BTRFS_EXTENT_ITEM_KEY;
11919 key.offset = num_bytes;
11921 btrfs_init_path(&path);
11922 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11928 /* insert an extent item */
11930 key.objectid = disk_bytenr;
11931 key.type = BTRFS_EXTENT_ITEM_KEY;
11932 key.offset = num_bytes;
11933 size = sizeof(*ei);
11935 btrfs_release_path(&path);
11936 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11940 eb = path.nodes[0];
11941 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11943 btrfs_set_extent_refs(eb, ei, 0);
11944 btrfs_set_extent_generation(eb, ei, generation);
11945 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11947 btrfs_mark_buffer_dirty(eb);
11948 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11950 btrfs_release_path(&path);
11953 if (nrefs->full_backref[0])
11954 parent = btrfs_header_bytenr(eb);
11958 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11960 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11964 "failed to increase extent data backref[%llu %llu] root %llu",
11965 disk_bytenr, num_bytes, root->objectid);
11968 printf("Add one extent data backref [%llu %llu]\n",
11969 disk_bytenr, num_bytes);
11972 err &= ~BACKREF_MISSING;
11975 error("can't repair root %llu extent data item[%llu %llu]",
11976 root->objectid, disk_bytenr, num_bytes);
11981 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11983 * Return >0 any error found and output error message
11984 * Return 0 for no error found
11986 static int check_extent_data_item(struct btrfs_root *root,
11987 struct btrfs_path *pathp,
11988 struct node_refs *nrefs, int account_bytes)
11990 struct btrfs_file_extent_item *fi;
11991 struct extent_buffer *eb = pathp->nodes[0];
11992 struct btrfs_path path;
11993 struct btrfs_root *extent_root = root->fs_info->extent_root;
11994 struct btrfs_key fi_key;
11995 struct btrfs_key dbref_key;
11996 struct extent_buffer *leaf;
11997 struct btrfs_extent_item *ei;
11998 struct btrfs_extent_inline_ref *iref;
11999 struct btrfs_extent_data_ref *dref;
12002 u64 disk_num_bytes;
12003 u64 extent_num_bytes;
12010 int found_dbackref = 0;
12011 int slot = pathp->slots[0];
12016 btrfs_item_key_to_cpu(eb, &fi_key, slot);
12017 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12019 /* Nothing to check for hole and inline data extents */
12020 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12021 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12024 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12025 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12026 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12028 /* Check unaligned disk_num_bytes and num_bytes */
12029 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12031 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12032 fi_key.objectid, fi_key.offset, disk_num_bytes,
12033 root->fs_info->sectorsize);
12034 err |= BYTES_UNALIGNED;
12035 } else if (account_bytes) {
12036 data_bytes_allocated += disk_num_bytes;
12038 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12040 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12041 fi_key.objectid, fi_key.offset, extent_num_bytes,
12042 root->fs_info->sectorsize);
12043 err |= BYTES_UNALIGNED;
12044 } else if (account_bytes) {
12045 data_bytes_referenced += extent_num_bytes;
12047 owner = btrfs_header_owner(eb);
12049 /* Check the extent item of the file extent in extent tree */
12050 btrfs_init_path(&path);
12051 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12052 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12053 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12055 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12059 leaf = path.nodes[0];
12060 slot = path.slots[0];
12061 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12063 extent_flags = btrfs_extent_flags(leaf, ei);
12065 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12067 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12068 disk_bytenr, disk_num_bytes,
12069 BTRFS_EXTENT_FLAG_DATA);
12070 err |= BACKREF_MISMATCH;
12073 /* Check data backref inside that extent item */
12074 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12075 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12076 ptr = (unsigned long)iref;
12077 end = (unsigned long)ei + item_size;
12078 strict = should_check_extent_strictly(root, nrefs, -1);
12080 while (ptr < end) {
12081 iref = (struct btrfs_extent_inline_ref *)ptr;
12082 type = btrfs_extent_inline_ref_type(leaf, iref);
12083 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12085 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12090 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12091 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12092 if (ref_root == root->objectid)
12093 found_dbackref = 1;
12094 else if (!strict && owner == ref_root)
12095 found_dbackref = 1;
12096 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12097 found_dbackref = !check_tree_block_ref(root, NULL,
12098 btrfs_extent_inline_ref_offset(leaf, iref),
12102 if (found_dbackref)
12104 ptr += btrfs_extent_inline_ref_size(type);
12107 if (!found_dbackref) {
12108 btrfs_release_path(&path);
12110 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12111 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12112 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12113 dbref_key.offset = hash_extent_data_ref(root->objectid,
12114 fi_key.objectid, fi_key.offset);
12116 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12117 &dbref_key, &path, 0, 0);
12119 found_dbackref = 1;
12123 btrfs_release_path(&path);
12126 * Neither inlined nor EXTENT_DATA_REF found, try
12127 * SHARED_DATA_REF as last chance.
12129 dbref_key.objectid = disk_bytenr;
12130 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12131 dbref_key.offset = eb->start;
12133 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12134 &dbref_key, &path, 0, 0);
12136 found_dbackref = 1;
12142 if (!found_dbackref)
12143 err |= BACKREF_MISSING;
12144 btrfs_release_path(&path);
12145 if (err & BACKREF_MISSING) {
12146 error("data extent[%llu %llu] backref lost",
12147 disk_bytenr, disk_num_bytes);
12153 * Get real tree block level for the case like shared block
12154 * Return >= 0 as tree level
12155 * Return <0 for error
12157 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12159 struct extent_buffer *eb;
12160 struct btrfs_path path;
12161 struct btrfs_key key;
12162 struct btrfs_extent_item *ei;
12169 /* Search extent tree for extent generation and level */
12170 key.objectid = bytenr;
12171 key.type = BTRFS_METADATA_ITEM_KEY;
12172 key.offset = (u64)-1;
12174 btrfs_init_path(&path);
12175 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12178 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12186 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12187 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12188 struct btrfs_extent_item);
12189 flags = btrfs_extent_flags(path.nodes[0], ei);
12190 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12195 /* Get transid for later read_tree_block() check */
12196 transid = btrfs_extent_generation(path.nodes[0], ei);
12198 /* Get backref level as one source */
12199 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12200 backref_level = key.offset;
12202 struct btrfs_tree_block_info *info;
12204 info = (struct btrfs_tree_block_info *)(ei + 1);
12205 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12207 btrfs_release_path(&path);
12209 /* Get level from tree block as an alternative source */
12210 eb = read_tree_block(fs_info, bytenr, transid);
12211 if (!extent_buffer_uptodate(eb)) {
12212 free_extent_buffer(eb);
12215 header_level = btrfs_header_level(eb);
12216 free_extent_buffer(eb);
12218 if (header_level != backref_level)
12220 return header_level;
12223 btrfs_release_path(&path);
12228 * Check if a tree block backref is valid (points to a valid tree block)
12229 * if level == -1, level will be resolved
12230 * Return >0 for any error found and print error message
12232 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12233 u64 bytenr, int level)
12235 struct btrfs_root *root;
12236 struct btrfs_key key;
12237 struct btrfs_path path;
12238 struct extent_buffer *eb;
12239 struct extent_buffer *node;
12240 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12244 /* Query level for level == -1 special case */
12246 level = query_tree_block_level(fs_info, bytenr);
12248 err |= REFERENCER_MISSING;
12252 key.objectid = root_id;
12253 key.type = BTRFS_ROOT_ITEM_KEY;
12254 key.offset = (u64)-1;
12256 root = btrfs_read_fs_root(fs_info, &key);
12257 if (IS_ERR(root)) {
12258 err |= REFERENCER_MISSING;
12262 /* Read out the tree block to get item/node key */
12263 eb = read_tree_block(fs_info, bytenr, 0);
12264 if (!extent_buffer_uptodate(eb)) {
12265 err |= REFERENCER_MISSING;
12266 free_extent_buffer(eb);
12270 /* Empty tree, no need to check key */
12271 if (!btrfs_header_nritems(eb) && !level) {
12272 free_extent_buffer(eb);
12277 btrfs_node_key_to_cpu(eb, &key, 0);
12279 btrfs_item_key_to_cpu(eb, &key, 0);
12281 free_extent_buffer(eb);
12283 btrfs_init_path(&path);
12284 path.lowest_level = level;
12285 /* Search with the first key, to ensure we can reach it */
12286 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12288 err |= REFERENCER_MISSING;
12292 node = path.nodes[level];
12293 if (btrfs_header_bytenr(node) != bytenr) {
12295 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12296 bytenr, nodesize, bytenr,
12297 btrfs_header_bytenr(node));
12298 err |= REFERENCER_MISMATCH;
12300 if (btrfs_header_level(node) != level) {
12302 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12303 bytenr, nodesize, level,
12304 btrfs_header_level(node));
12305 err |= REFERENCER_MISMATCH;
12309 btrfs_release_path(&path);
12311 if (err & REFERENCER_MISSING) {
12313 error("extent [%llu %d] lost referencer (owner: %llu)",
12314 bytenr, nodesize, root_id);
12317 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12318 bytenr, nodesize, root_id, level);
12325 * Check if tree block @eb is tree reloc root.
12326 * Return 0 if it's not or any problem happens
12327 * Return 1 if it's a tree reloc root
12329 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12330 struct extent_buffer *eb)
12332 struct btrfs_root *tree_reloc_root;
12333 struct btrfs_key key;
12334 u64 bytenr = btrfs_header_bytenr(eb);
12335 u64 owner = btrfs_header_owner(eb);
12338 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12339 key.offset = owner;
12340 key.type = BTRFS_ROOT_ITEM_KEY;
12342 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12343 if (IS_ERR(tree_reloc_root))
12346 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12348 btrfs_free_fs_root(tree_reloc_root);
12353 * Check referencer for shared block backref
12354 * If level == -1, this function will resolve the level.
12356 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12357 u64 parent, u64 bytenr, int level)
12359 struct extent_buffer *eb;
12361 int found_parent = 0;
12364 eb = read_tree_block(fs_info, parent, 0);
12365 if (!extent_buffer_uptodate(eb))
12369 level = query_tree_block_level(fs_info, bytenr);
12373 /* It's possible it's a tree reloc root */
12374 if (parent == bytenr) {
12375 if (is_tree_reloc_root(fs_info, eb))
12380 if (level + 1 != btrfs_header_level(eb))
12383 nr = btrfs_header_nritems(eb);
12384 for (i = 0; i < nr; i++) {
12385 if (bytenr == btrfs_node_blockptr(eb, i)) {
12391 free_extent_buffer(eb);
12392 if (!found_parent) {
12394 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12395 bytenr, fs_info->nodesize, parent, level);
12396 return REFERENCER_MISSING;
12402 * Check referencer for normal (inlined) data ref
12403 * If len == 0, it will be resolved by searching in extent tree
12405 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12406 u64 root_id, u64 objectid, u64 offset,
12407 u64 bytenr, u64 len, u32 count)
12409 struct btrfs_root *root;
12410 struct btrfs_root *extent_root = fs_info->extent_root;
12411 struct btrfs_key key;
12412 struct btrfs_path path;
12413 struct extent_buffer *leaf;
12414 struct btrfs_file_extent_item *fi;
12415 u32 found_count = 0;
12420 key.objectid = bytenr;
12421 key.type = BTRFS_EXTENT_ITEM_KEY;
12422 key.offset = (u64)-1;
12424 btrfs_init_path(&path);
12425 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12428 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12431 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12432 if (key.objectid != bytenr ||
12433 key.type != BTRFS_EXTENT_ITEM_KEY)
12436 btrfs_release_path(&path);
12438 key.objectid = root_id;
12439 key.type = BTRFS_ROOT_ITEM_KEY;
12440 key.offset = (u64)-1;
12441 btrfs_init_path(&path);
12443 root = btrfs_read_fs_root(fs_info, &key);
12447 key.objectid = objectid;
12448 key.type = BTRFS_EXTENT_DATA_KEY;
12450 * It can be nasty as data backref offset is
12451 * file offset - file extent offset, which is smaller or
12452 * equal to original backref offset. The only special case is
12453 * overflow. So we need to special check and do further search.
12455 key.offset = offset & (1ULL << 63) ? 0 : offset;
12457 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12462 * Search afterwards to get correct one
12463 * NOTE: As we must do a comprehensive check on the data backref to
12464 * make sure the dref count also matches, we must iterate all file
12465 * extents for that inode.
12468 leaf = path.nodes[0];
12469 slot = path.slots[0];
12471 if (slot >= btrfs_header_nritems(leaf))
12473 btrfs_item_key_to_cpu(leaf, &key, slot);
12474 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12476 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12478 * Except normal disk bytenr and disk num bytes, we still
12479 * need to do extra check on dbackref offset as
12480 * dbackref offset = file_offset - file_extent_offset
12482 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12483 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12484 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12489 ret = btrfs_next_item(root, &path);
12494 btrfs_release_path(&path);
12495 if (found_count != count) {
12497 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12498 bytenr, len, root_id, objectid, offset, count, found_count);
12499 return REFERENCER_MISSING;
12505 * Check if the referencer of a shared data backref exists
12507 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12508 u64 parent, u64 bytenr)
12510 struct extent_buffer *eb;
12511 struct btrfs_key key;
12512 struct btrfs_file_extent_item *fi;
12514 int found_parent = 0;
12517 eb = read_tree_block(fs_info, parent, 0);
12518 if (!extent_buffer_uptodate(eb))
12521 nr = btrfs_header_nritems(eb);
12522 for (i = 0; i < nr; i++) {
12523 btrfs_item_key_to_cpu(eb, &key, i);
12524 if (key.type != BTRFS_EXTENT_DATA_KEY)
12527 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12528 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12531 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12538 free_extent_buffer(eb);
12539 if (!found_parent) {
12540 error("shared extent %llu referencer lost (parent: %llu)",
12542 return REFERENCER_MISSING;
12548 * Only delete backref if REFERENCER_MISSING now
12550 * Returns <0 the extent was deleted
12551 * Returns >0 the backref was deleted but extent still exists, returned value
12552 * means error after repair
12553 * Returns 0 nothing happened
12555 static int repair_extent_item(struct btrfs_trans_handle *trans,
12556 struct btrfs_root *root, struct btrfs_path *path,
12557 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12558 u64 owner, u64 offset, int err)
12560 struct btrfs_key old_key;
12564 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12566 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12567 /* delete the backref */
12568 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12569 num_bytes, parent, root_objectid, owner, offset);
12572 err &= ~REFERENCER_MISSING;
12573 printf("Delete backref in extent [%llu %llu]\n",
12574 bytenr, num_bytes);
12576 error("fail to delete backref in extent [%llu %llu]",
12577 bytenr, num_bytes);
12581 /* btrfs_free_extent may delete the extent */
12582 btrfs_release_path(path);
12583 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12593 * This function will check a given extent item, including its backref and
12594 * itself (like crossing stripe boundary and type)
12596 * Since we don't use extent_record anymore, introduce new error bit
12598 static int check_extent_item(struct btrfs_trans_handle *trans,
12599 struct btrfs_fs_info *fs_info,
12600 struct btrfs_path *path)
12602 struct btrfs_extent_item *ei;
12603 struct btrfs_extent_inline_ref *iref;
12604 struct btrfs_extent_data_ref *dref;
12605 struct extent_buffer *eb = path->nodes[0];
12608 int slot = path->slots[0];
12610 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12611 u32 item_size = btrfs_item_size_nr(eb, slot);
12621 struct btrfs_key key;
12625 btrfs_item_key_to_cpu(eb, &key, slot);
12626 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12627 bytes_used += key.offset;
12628 num_bytes = key.offset;
12630 bytes_used += nodesize;
12631 num_bytes = nodesize;
12634 if (item_size < sizeof(*ei)) {
12636 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12637 * old thing when on disk format is still un-determined.
12638 * No need to care about it anymore
12640 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12644 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12645 flags = btrfs_extent_flags(eb, ei);
12647 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12649 if (metadata && check_crossing_stripes(global_info, key.objectid,
12651 error("bad metadata [%llu, %llu) crossing stripe boundary",
12652 key.objectid, key.objectid + nodesize);
12653 err |= CROSSING_STRIPE_BOUNDARY;
12656 ptr = (unsigned long)(ei + 1);
12658 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12659 /* Old EXTENT_ITEM metadata */
12660 struct btrfs_tree_block_info *info;
12662 info = (struct btrfs_tree_block_info *)ptr;
12663 level = btrfs_tree_block_level(eb, info);
12664 ptr += sizeof(struct btrfs_tree_block_info);
12666 /* New METADATA_ITEM */
12667 level = key.offset;
12669 end = (unsigned long)ei + item_size;
12672 /* Reached extent item end normally */
12676 /* Beyond extent item end, wrong item size */
12678 err |= ITEM_SIZE_MISMATCH;
12679 error("extent item at bytenr %llu slot %d has wrong size",
12688 /* Now check every backref in this extent item */
12689 iref = (struct btrfs_extent_inline_ref *)ptr;
12690 type = btrfs_extent_inline_ref_type(eb, iref);
12691 offset = btrfs_extent_inline_ref_offset(eb, iref);
12693 case BTRFS_TREE_BLOCK_REF_KEY:
12694 root_objectid = offset;
12696 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12700 case BTRFS_SHARED_BLOCK_REF_KEY:
12702 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12706 case BTRFS_EXTENT_DATA_REF_KEY:
12707 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12708 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12709 owner = btrfs_extent_data_ref_objectid(eb, dref);
12710 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12711 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12712 owner_offset, key.objectid, key.offset,
12713 btrfs_extent_data_ref_count(eb, dref));
12716 case BTRFS_SHARED_DATA_REF_KEY:
12718 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12722 error("extent[%llu %d %llu] has unknown ref type: %d",
12723 key.objectid, key.type, key.offset, type);
12724 ret = UNKNOWN_TYPE;
12729 if (err && repair) {
12730 ret = repair_extent_item(trans, fs_info->extent_root, path,
12731 key.objectid, num_bytes, parent, root_objectid,
12732 owner, owner_offset, ret);
12741 ptr += btrfs_extent_inline_ref_size(type);
12749 * Check if a dev extent item is referred correctly by its chunk
12751 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12752 struct extent_buffer *eb, int slot)
12754 struct btrfs_root *chunk_root = fs_info->chunk_root;
12755 struct btrfs_dev_extent *ptr;
12756 struct btrfs_path path;
12757 struct btrfs_key chunk_key;
12758 struct btrfs_key devext_key;
12759 struct btrfs_chunk *chunk;
12760 struct extent_buffer *l;
12764 int found_chunk = 0;
12767 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12768 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12769 length = btrfs_dev_extent_length(eb, ptr);
12771 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12772 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12773 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12775 btrfs_init_path(&path);
12776 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12781 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12782 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12787 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12790 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12791 for (i = 0; i < num_stripes; i++) {
12792 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12793 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12795 if (devid == devext_key.objectid &&
12796 offset == devext_key.offset) {
12802 btrfs_release_path(&path);
12803 if (!found_chunk) {
12805 "device extent[%llu, %llu, %llu] did not find the related chunk",
12806 devext_key.objectid, devext_key.offset, length);
12807 return REFERENCER_MISSING;
12813 * Check if the used space is correct with the dev item
12815 static int check_dev_item(struct btrfs_fs_info *fs_info,
12816 struct extent_buffer *eb, int slot)
12818 struct btrfs_root *dev_root = fs_info->dev_root;
12819 struct btrfs_dev_item *dev_item;
12820 struct btrfs_path path;
12821 struct btrfs_key key;
12822 struct btrfs_dev_extent *ptr;
12829 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12830 dev_id = btrfs_device_id(eb, dev_item);
12831 used = btrfs_device_bytes_used(eb, dev_item);
12832 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12834 key.objectid = dev_id;
12835 key.type = BTRFS_DEV_EXTENT_KEY;
12838 btrfs_init_path(&path);
12839 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12841 btrfs_item_key_to_cpu(eb, &key, slot);
12842 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12843 key.objectid, key.type, key.offset);
12844 btrfs_release_path(&path);
12845 return REFERENCER_MISSING;
12848 /* Iterate dev_extents to calculate the used space of a device */
12850 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12853 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12854 if (key.objectid > dev_id)
12856 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12859 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12860 struct btrfs_dev_extent);
12861 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12863 ret = btrfs_next_item(dev_root, &path);
12867 btrfs_release_path(&path);
12869 if (used != total) {
12870 btrfs_item_key_to_cpu(eb, &key, slot);
12872 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12873 total, used, BTRFS_ROOT_TREE_OBJECTID,
12874 BTRFS_DEV_EXTENT_KEY, dev_id);
12875 return ACCOUNTING_MISMATCH;
12877 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12883 * Check a block group item with its referener (chunk) and its used space
12884 * with extent/metadata item
12886 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12887 struct extent_buffer *eb, int slot)
12889 struct btrfs_root *extent_root = fs_info->extent_root;
12890 struct btrfs_root *chunk_root = fs_info->chunk_root;
12891 struct btrfs_block_group_item *bi;
12892 struct btrfs_block_group_item bg_item;
12893 struct btrfs_path path;
12894 struct btrfs_key bg_key;
12895 struct btrfs_key chunk_key;
12896 struct btrfs_key extent_key;
12897 struct btrfs_chunk *chunk;
12898 struct extent_buffer *leaf;
12899 struct btrfs_extent_item *ei;
12900 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12908 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12909 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12910 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12911 used = btrfs_block_group_used(&bg_item);
12912 bg_flags = btrfs_block_group_flags(&bg_item);
12914 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12915 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12916 chunk_key.offset = bg_key.objectid;
12918 btrfs_init_path(&path);
12919 /* Search for the referencer chunk */
12920 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12923 "block group[%llu %llu] did not find the related chunk item",
12924 bg_key.objectid, bg_key.offset);
12925 err |= REFERENCER_MISSING;
12927 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12928 struct btrfs_chunk);
12929 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12932 "block group[%llu %llu] related chunk item length does not match",
12933 bg_key.objectid, bg_key.offset);
12934 err |= REFERENCER_MISMATCH;
12937 btrfs_release_path(&path);
12939 /* Search from the block group bytenr */
12940 extent_key.objectid = bg_key.objectid;
12941 extent_key.type = 0;
12942 extent_key.offset = 0;
12944 btrfs_init_path(&path);
12945 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12949 /* Iterate extent tree to account used space */
12951 leaf = path.nodes[0];
12953 /* Search slot can point to the last item beyond leaf nritems */
12954 if (path.slots[0] >= btrfs_header_nritems(leaf))
12957 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12958 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12961 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12962 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12964 if (extent_key.objectid < bg_key.objectid)
12967 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12970 total += extent_key.offset;
12972 ei = btrfs_item_ptr(leaf, path.slots[0],
12973 struct btrfs_extent_item);
12974 flags = btrfs_extent_flags(leaf, ei);
12975 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12976 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12978 "bad extent[%llu, %llu) type mismatch with chunk",
12979 extent_key.objectid,
12980 extent_key.objectid + extent_key.offset);
12981 err |= CHUNK_TYPE_MISMATCH;
12983 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12984 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12985 BTRFS_BLOCK_GROUP_METADATA))) {
12987 "bad extent[%llu, %llu) type mismatch with chunk",
12988 extent_key.objectid,
12989 extent_key.objectid + nodesize);
12990 err |= CHUNK_TYPE_MISMATCH;
12994 ret = btrfs_next_item(extent_root, &path);
13000 btrfs_release_path(&path);
13002 if (total != used) {
13004 "block group[%llu %llu] used %llu but extent items used %llu",
13005 bg_key.objectid, bg_key.offset, used, total);
13006 err |= BG_ACCOUNTING_ERROR;
13012 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13013 * FIXME: We still need to repair error of dev_item.
13015 * Returns error after repair.
13017 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13018 struct btrfs_root *chunk_root,
13019 struct btrfs_path *path, int err)
13021 struct btrfs_chunk *chunk;
13022 struct btrfs_key chunk_key;
13023 struct extent_buffer *eb = path->nodes[0];
13025 int slot = path->slots[0];
13029 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13030 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13032 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13033 type = btrfs_chunk_type(path->nodes[0], chunk);
13034 length = btrfs_chunk_length(eb, chunk);
13036 if (err & REFERENCER_MISSING) {
13037 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13038 type, chunk_key.objectid, chunk_key.offset, length);
13040 error("fail to add block group item[%llu %llu]",
13041 chunk_key.offset, length);
13044 err &= ~REFERENCER_MISSING;
13045 printf("Added block group item[%llu %llu]\n",
13046 chunk_key.offset, length);
13055 * Check a chunk item.
13056 * Including checking all referred dev_extents and block group
13058 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13059 struct extent_buffer *eb, int slot)
13061 struct btrfs_root *extent_root = fs_info->extent_root;
13062 struct btrfs_root *dev_root = fs_info->dev_root;
13063 struct btrfs_path path;
13064 struct btrfs_key chunk_key;
13065 struct btrfs_key bg_key;
13066 struct btrfs_key devext_key;
13067 struct btrfs_chunk *chunk;
13068 struct extent_buffer *leaf;
13069 struct btrfs_block_group_item *bi;
13070 struct btrfs_block_group_item bg_item;
13071 struct btrfs_dev_extent *ptr;
13083 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13084 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13085 length = btrfs_chunk_length(eb, chunk);
13086 chunk_end = chunk_key.offset + length;
13087 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13090 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13092 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13095 type = btrfs_chunk_type(eb, chunk);
13097 bg_key.objectid = chunk_key.offset;
13098 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13099 bg_key.offset = length;
13101 btrfs_init_path(&path);
13102 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13105 "chunk[%llu %llu) did not find the related block group item",
13106 chunk_key.offset, chunk_end);
13107 err |= REFERENCER_MISSING;
13109 leaf = path.nodes[0];
13110 bi = btrfs_item_ptr(leaf, path.slots[0],
13111 struct btrfs_block_group_item);
13112 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13114 if (btrfs_block_group_flags(&bg_item) != type) {
13116 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13117 chunk_key.offset, chunk_end, type,
13118 btrfs_block_group_flags(&bg_item));
13119 err |= REFERENCER_MISSING;
13123 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13124 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13125 for (i = 0; i < num_stripes; i++) {
13126 btrfs_release_path(&path);
13127 btrfs_init_path(&path);
13128 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13129 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13130 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13132 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13135 goto not_match_dev;
13137 leaf = path.nodes[0];
13138 ptr = btrfs_item_ptr(leaf, path.slots[0],
13139 struct btrfs_dev_extent);
13140 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13141 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13142 if (objectid != chunk_key.objectid ||
13143 offset != chunk_key.offset ||
13144 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13145 goto not_match_dev;
13148 err |= BACKREF_MISSING;
13150 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13151 chunk_key.objectid, chunk_end, i);
13154 btrfs_release_path(&path);
13159 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13160 struct btrfs_root *root,
13161 struct btrfs_path *path)
13163 struct btrfs_key key;
13166 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13167 btrfs_release_path(path);
13168 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13174 ret = btrfs_del_item(trans, root, path);
13178 if (path->slots[0] == 0)
13179 btrfs_prev_leaf(root, path);
13184 error("failed to delete root %llu item[%llu, %u, %llu]",
13185 root->objectid, key.objectid, key.type, key.offset);
13187 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13188 root->objectid, key.objectid, key.type, key.offset);
13193 * Main entry function to check known items and update related accounting info
13195 static int check_leaf_items(struct btrfs_trans_handle *trans,
13196 struct btrfs_root *root, struct btrfs_path *path,
13197 struct node_refs *nrefs, int account_bytes)
13199 struct btrfs_fs_info *fs_info = root->fs_info;
13200 struct btrfs_key key;
13201 struct extent_buffer *eb;
13204 struct btrfs_extent_data_ref *dref;
13209 eb = path->nodes[0];
13210 slot = path->slots[0];
13211 if (slot >= btrfs_header_nritems(eb)) {
13213 error("empty leaf [%llu %u] root %llu", eb->start,
13214 root->fs_info->nodesize, root->objectid);
13220 btrfs_item_key_to_cpu(eb, &key, slot);
13224 case BTRFS_EXTENT_DATA_KEY:
13225 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13227 ret = repair_extent_data_item(trans, root, path, nrefs,
13231 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13232 ret = check_block_group_item(fs_info, eb, slot);
13234 ret & REFERENCER_MISSING)
13235 ret = delete_extent_tree_item(trans, root, path);
13238 case BTRFS_DEV_ITEM_KEY:
13239 ret = check_dev_item(fs_info, eb, slot);
13242 case BTRFS_CHUNK_ITEM_KEY:
13243 ret = check_chunk_item(fs_info, eb, slot);
13245 ret = repair_chunk_item(trans, root, path, ret);
13248 case BTRFS_DEV_EXTENT_KEY:
13249 ret = check_dev_extent_item(fs_info, eb, slot);
13252 case BTRFS_EXTENT_ITEM_KEY:
13253 case BTRFS_METADATA_ITEM_KEY:
13254 ret = check_extent_item(trans, fs_info, path);
13257 case BTRFS_EXTENT_CSUM_KEY:
13258 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13261 case BTRFS_TREE_BLOCK_REF_KEY:
13262 ret = check_tree_block_backref(fs_info, key.offset,
13265 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13266 ret = delete_extent_tree_item(trans, root, path);
13269 case BTRFS_EXTENT_DATA_REF_KEY:
13270 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13271 ret = check_extent_data_backref(fs_info,
13272 btrfs_extent_data_ref_root(eb, dref),
13273 btrfs_extent_data_ref_objectid(eb, dref),
13274 btrfs_extent_data_ref_offset(eb, dref),
13276 btrfs_extent_data_ref_count(eb, dref));
13278 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13279 ret = delete_extent_tree_item(trans, root, path);
13282 case BTRFS_SHARED_BLOCK_REF_KEY:
13283 ret = check_shared_block_backref(fs_info, key.offset,
13286 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13287 ret = delete_extent_tree_item(trans, root, path);
13290 case BTRFS_SHARED_DATA_REF_KEY:
13291 ret = check_shared_data_backref(fs_info, key.offset,
13294 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13295 ret = delete_extent_tree_item(trans, root, path);
13308 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13311 * Low memory usage version check_chunks_and_extents.
13313 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13315 struct btrfs_trans_handle *trans = NULL;
13316 struct btrfs_path path;
13317 struct btrfs_key old_key;
13318 struct btrfs_key key;
13319 struct btrfs_root *root1;
13320 struct btrfs_root *root;
13321 struct btrfs_root *cur_root;
13325 root = fs_info->fs_root;
13328 /* pin every tree block to avoid extent overwrite */
13329 ret = pin_metadata_blocks(fs_info);
13331 error("failed to pin metadata blocks");
13334 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13335 if (IS_ERR(trans)) {
13336 error("failed to start transaction before check");
13337 return PTR_ERR(trans);
13341 root1 = root->fs_info->chunk_root;
13342 ret = check_btrfs_root(trans, root1, 0, 1);
13345 root1 = root->fs_info->tree_root;
13346 ret = check_btrfs_root(trans, root1, 0, 1);
13349 btrfs_init_path(&path);
13350 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13352 key.type = BTRFS_ROOT_ITEM_KEY;
13354 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13356 error("cannot find extent tree in tree_root");
13361 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13362 if (key.type != BTRFS_ROOT_ITEM_KEY)
13365 key.offset = (u64)-1;
13367 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13368 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13371 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13372 if (IS_ERR(cur_root) || !cur_root) {
13373 error("failed to read tree: %lld", key.objectid);
13377 ret = check_btrfs_root(trans, cur_root, 0, 1);
13380 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13381 btrfs_free_fs_root(cur_root);
13383 btrfs_release_path(&path);
13384 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13385 &old_key, &path, 0, 0);
13389 ret = btrfs_next_item(root1, &path);
13395 /* if repair, update block accounting */
13397 ret = btrfs_fix_block_accounting(trans, root);
13401 err &= ~BG_ACCOUNTING_ERROR;
13405 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13407 btrfs_release_path(&path);
13412 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13416 if (!ctx.progress_enabled)
13417 fprintf(stderr, "checking extents\n");
13418 if (check_mode == CHECK_MODE_LOWMEM)
13419 ret = check_chunks_and_extents_v2(fs_info);
13421 ret = check_chunks_and_extents(fs_info);
13423 /* Also repair device size related problems */
13424 if (repair && !ret) {
13425 ret = btrfs_fix_device_and_super_size(fs_info);
13432 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13433 struct btrfs_root *root, int overwrite)
13435 struct extent_buffer *c;
13436 struct extent_buffer *old = root->node;
13439 struct btrfs_disk_key disk_key = {0,0,0};
13445 extent_buffer_get(c);
13448 c = btrfs_alloc_free_block(trans, root,
13449 root->fs_info->nodesize,
13450 root->root_key.objectid,
13451 &disk_key, level, 0, 0);
13454 extent_buffer_get(c);
13458 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13459 btrfs_set_header_level(c, level);
13460 btrfs_set_header_bytenr(c, c->start);
13461 btrfs_set_header_generation(c, trans->transid);
13462 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13463 btrfs_set_header_owner(c, root->root_key.objectid);
13465 write_extent_buffer(c, root->fs_info->fsid,
13466 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13468 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13469 btrfs_header_chunk_tree_uuid(c),
13472 btrfs_mark_buffer_dirty(c);
13474 * this case can happen in the following case:
13476 * 1.overwrite previous root.
13478 * 2.reinit reloc data root, this is because we skip pin
13479 * down reloc data tree before which means we can allocate
13480 * same block bytenr here.
13482 if (old->start == c->start) {
13483 btrfs_set_root_generation(&root->root_item,
13485 root->root_item.level = btrfs_header_level(root->node);
13486 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13487 &root->root_key, &root->root_item);
13489 free_extent_buffer(c);
13493 free_extent_buffer(old);
13495 add_root_to_dirty_list(root);
13499 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13500 struct extent_buffer *eb, int tree_root)
13502 struct extent_buffer *tmp;
13503 struct btrfs_root_item *ri;
13504 struct btrfs_key key;
13506 int level = btrfs_header_level(eb);
13512 * If we have pinned this block before, don't pin it again.
13513 * This can not only avoid forever loop with broken filesystem
13514 * but also give us some speedups.
13516 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13517 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13520 btrfs_pin_extent(fs_info, eb->start, eb->len);
13522 nritems = btrfs_header_nritems(eb);
13523 for (i = 0; i < nritems; i++) {
13525 btrfs_item_key_to_cpu(eb, &key, i);
13526 if (key.type != BTRFS_ROOT_ITEM_KEY)
13528 /* Skip the extent root and reloc roots */
13529 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13530 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13531 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13533 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13534 bytenr = btrfs_disk_root_bytenr(eb, ri);
13537 * If at any point we start needing the real root we
13538 * will have to build a stump root for the root we are
13539 * in, but for now this doesn't actually use the root so
13540 * just pass in extent_root.
13542 tmp = read_tree_block(fs_info, bytenr, 0);
13543 if (!extent_buffer_uptodate(tmp)) {
13544 fprintf(stderr, "Error reading root block\n");
13547 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13548 free_extent_buffer(tmp);
13552 bytenr = btrfs_node_blockptr(eb, i);
13554 /* If we aren't the tree root don't read the block */
13555 if (level == 1 && !tree_root) {
13556 btrfs_pin_extent(fs_info, bytenr,
13557 fs_info->nodesize);
13561 tmp = read_tree_block(fs_info, bytenr, 0);
13562 if (!extent_buffer_uptodate(tmp)) {
13563 fprintf(stderr, "Error reading tree block\n");
13566 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13567 free_extent_buffer(tmp);
13576 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13580 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13584 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13587 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13589 struct btrfs_block_group_cache *cache;
13590 struct btrfs_path path;
13591 struct extent_buffer *leaf;
13592 struct btrfs_chunk *chunk;
13593 struct btrfs_key key;
13597 btrfs_init_path(&path);
13599 key.type = BTRFS_CHUNK_ITEM_KEY;
13601 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13603 btrfs_release_path(&path);
13608 * We do this in case the block groups were screwed up and had alloc
13609 * bits that aren't actually set on the chunks. This happens with
13610 * restored images every time and could happen in real life I guess.
13612 fs_info->avail_data_alloc_bits = 0;
13613 fs_info->avail_metadata_alloc_bits = 0;
13614 fs_info->avail_system_alloc_bits = 0;
13616 /* First we need to create the in-memory block groups */
13618 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13619 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13621 btrfs_release_path(&path);
13629 leaf = path.nodes[0];
13630 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13631 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13636 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13637 btrfs_add_block_group(fs_info, 0,
13638 btrfs_chunk_type(leaf, chunk),
13639 key.objectid, key.offset,
13640 btrfs_chunk_length(leaf, chunk));
13641 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13642 key.offset + btrfs_chunk_length(leaf, chunk));
13647 cache = btrfs_lookup_first_block_group(fs_info, start);
13651 start = cache->key.objectid + cache->key.offset;
13654 btrfs_release_path(&path);
13658 static int reset_balance(struct btrfs_trans_handle *trans,
13659 struct btrfs_fs_info *fs_info)
13661 struct btrfs_root *root = fs_info->tree_root;
13662 struct btrfs_path path;
13663 struct extent_buffer *leaf;
13664 struct btrfs_key key;
13665 int del_slot, del_nr = 0;
13669 btrfs_init_path(&path);
13670 key.objectid = BTRFS_BALANCE_OBJECTID;
13671 key.type = BTRFS_BALANCE_ITEM_KEY;
13673 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13678 goto reinit_data_reloc;
13683 ret = btrfs_del_item(trans, root, &path);
13686 btrfs_release_path(&path);
13688 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13689 key.type = BTRFS_ROOT_ITEM_KEY;
13691 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13695 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13700 ret = btrfs_del_items(trans, root, &path,
13707 btrfs_release_path(&path);
13710 ret = btrfs_search_slot(trans, root, &key, &path,
13717 leaf = path.nodes[0];
13718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13719 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13721 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13726 del_slot = path.slots[0];
13735 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13739 btrfs_release_path(&path);
13742 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13743 key.type = BTRFS_ROOT_ITEM_KEY;
13744 key.offset = (u64)-1;
13745 root = btrfs_read_fs_root(fs_info, &key);
13746 if (IS_ERR(root)) {
13747 fprintf(stderr, "Error reading data reloc tree\n");
13748 ret = PTR_ERR(root);
13751 record_root_in_trans(trans, root);
13752 ret = btrfs_fsck_reinit_root(trans, root, 0);
13755 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13757 btrfs_release_path(&path);
13761 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13762 struct btrfs_fs_info *fs_info)
13768 * The only reason we don't do this is because right now we're just
13769 * walking the trees we find and pinning down their bytes, we don't look
13770 * at any of the leaves. In order to do mixed groups we'd have to check
13771 * the leaves of any fs roots and pin down the bytes for any file
13772 * extents we find. Not hard but why do it if we don't have to?
13774 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13775 fprintf(stderr, "We don't support re-initing the extent tree "
13776 "for mixed block groups yet, please notify a btrfs "
13777 "developer you want to do this so they can add this "
13778 "functionality.\n");
13783 * first we need to walk all of the trees except the extent tree and pin
13784 * down the bytes that are in use so we don't overwrite any existing
13787 ret = pin_metadata_blocks(fs_info);
13789 fprintf(stderr, "error pinning down used bytes\n");
13794 * Need to drop all the block groups since we're going to recreate all
13797 btrfs_free_block_groups(fs_info);
13798 ret = reset_block_groups(fs_info);
13800 fprintf(stderr, "error resetting the block groups\n");
13804 /* Ok we can allocate now, reinit the extent root */
13805 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13807 fprintf(stderr, "extent root initialization failed\n");
13809 * When the transaction code is updated we should end the
13810 * transaction, but for now progs only knows about commit so
13811 * just return an error.
13817 * Now we have all the in-memory block groups setup so we can make
13818 * allocations properly, and the metadata we care about is safe since we
13819 * pinned all of it above.
13822 struct btrfs_block_group_cache *cache;
13824 cache = btrfs_lookup_first_block_group(fs_info, start);
13827 start = cache->key.objectid + cache->key.offset;
13828 ret = btrfs_insert_item(trans, fs_info->extent_root,
13829 &cache->key, &cache->item,
13830 sizeof(cache->item));
13832 fprintf(stderr, "Error adding block group\n");
13835 btrfs_extent_post_op(trans, fs_info->extent_root);
13838 ret = reset_balance(trans, fs_info);
13840 fprintf(stderr, "error resetting the pending balance\n");
13845 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13847 struct btrfs_path path;
13848 struct btrfs_trans_handle *trans;
13849 struct btrfs_key key;
13852 printf("Recowing metadata block %llu\n", eb->start);
13853 key.objectid = btrfs_header_owner(eb);
13854 key.type = BTRFS_ROOT_ITEM_KEY;
13855 key.offset = (u64)-1;
13857 root = btrfs_read_fs_root(root->fs_info, &key);
13858 if (IS_ERR(root)) {
13859 fprintf(stderr, "Couldn't find owner root %llu\n",
13861 return PTR_ERR(root);
13864 trans = btrfs_start_transaction(root, 1);
13866 return PTR_ERR(trans);
13868 btrfs_init_path(&path);
13869 path.lowest_level = btrfs_header_level(eb);
13870 if (path.lowest_level)
13871 btrfs_node_key_to_cpu(eb, &key, 0);
13873 btrfs_item_key_to_cpu(eb, &key, 0);
13875 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13876 btrfs_commit_transaction(trans, root);
13877 btrfs_release_path(&path);
13881 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13883 struct btrfs_path path;
13884 struct btrfs_trans_handle *trans;
13885 struct btrfs_key key;
13888 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13889 bad->key.type, bad->key.offset);
13890 key.objectid = bad->root_id;
13891 key.type = BTRFS_ROOT_ITEM_KEY;
13892 key.offset = (u64)-1;
13894 root = btrfs_read_fs_root(root->fs_info, &key);
13895 if (IS_ERR(root)) {
13896 fprintf(stderr, "Couldn't find owner root %llu\n",
13898 return PTR_ERR(root);
13901 trans = btrfs_start_transaction(root, 1);
13903 return PTR_ERR(trans);
13905 btrfs_init_path(&path);
13906 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13912 ret = btrfs_del_item(trans, root, &path);
13914 btrfs_commit_transaction(trans, root);
13915 btrfs_release_path(&path);
13919 static int zero_log_tree(struct btrfs_root *root)
13921 struct btrfs_trans_handle *trans;
13924 trans = btrfs_start_transaction(root, 1);
13925 if (IS_ERR(trans)) {
13926 ret = PTR_ERR(trans);
13929 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13930 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13931 ret = btrfs_commit_transaction(trans, root);
13935 static int populate_csum(struct btrfs_trans_handle *trans,
13936 struct btrfs_root *csum_root, char *buf, u64 start,
13939 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13944 while (offset < len) {
13945 sectorsize = fs_info->sectorsize;
13946 ret = read_extent_data(fs_info, buf, start + offset,
13950 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13951 start + offset, buf, sectorsize);
13954 offset += sectorsize;
13959 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13960 struct btrfs_root *csum_root,
13961 struct btrfs_root *cur_root)
13963 struct btrfs_path path;
13964 struct btrfs_key key;
13965 struct extent_buffer *node;
13966 struct btrfs_file_extent_item *fi;
13973 buf = malloc(cur_root->fs_info->sectorsize);
13977 btrfs_init_path(&path);
13981 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13984 /* Iterate all regular file extents and fill its csum */
13986 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13988 if (key.type != BTRFS_EXTENT_DATA_KEY)
13990 node = path.nodes[0];
13991 slot = path.slots[0];
13992 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13993 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13995 start = btrfs_file_extent_disk_bytenr(node, fi);
13996 len = btrfs_file_extent_disk_num_bytes(node, fi);
13998 ret = populate_csum(trans, csum_root, buf, start, len);
13999 if (ret == -EEXIST)
14005 * TODO: if next leaf is corrupted, jump to nearest next valid
14008 ret = btrfs_next_item(cur_root, &path);
14018 btrfs_release_path(&path);
14023 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14024 struct btrfs_root *csum_root)
14026 struct btrfs_fs_info *fs_info = csum_root->fs_info;
14027 struct btrfs_path path;
14028 struct btrfs_root *tree_root = fs_info->tree_root;
14029 struct btrfs_root *cur_root;
14030 struct extent_buffer *node;
14031 struct btrfs_key key;
14035 btrfs_init_path(&path);
14036 key.objectid = BTRFS_FS_TREE_OBJECTID;
14038 key.type = BTRFS_ROOT_ITEM_KEY;
14039 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14048 node = path.nodes[0];
14049 slot = path.slots[0];
14050 btrfs_item_key_to_cpu(node, &key, slot);
14051 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14053 if (key.type != BTRFS_ROOT_ITEM_KEY)
14055 if (!is_fstree(key.objectid))
14057 key.offset = (u64)-1;
14059 cur_root = btrfs_read_fs_root(fs_info, &key);
14060 if (IS_ERR(cur_root) || !cur_root) {
14061 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14065 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14070 ret = btrfs_next_item(tree_root, &path);
14080 btrfs_release_path(&path);
14084 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14085 struct btrfs_root *csum_root)
14087 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14088 struct btrfs_path path;
14089 struct btrfs_extent_item *ei;
14090 struct extent_buffer *leaf;
14092 struct btrfs_key key;
14095 btrfs_init_path(&path);
14097 key.type = BTRFS_EXTENT_ITEM_KEY;
14099 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14101 btrfs_release_path(&path);
14105 buf = malloc(csum_root->fs_info->sectorsize);
14107 btrfs_release_path(&path);
14112 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14113 ret = btrfs_next_leaf(extent_root, &path);
14121 leaf = path.nodes[0];
14123 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14124 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14129 ei = btrfs_item_ptr(leaf, path.slots[0],
14130 struct btrfs_extent_item);
14131 if (!(btrfs_extent_flags(leaf, ei) &
14132 BTRFS_EXTENT_FLAG_DATA)) {
14137 ret = populate_csum(trans, csum_root, buf, key.objectid,
14144 btrfs_release_path(&path);
14150 * Recalculate the csum and put it into the csum tree.
14152 * Extent tree init will wipe out all the extent info, so in that case, we
14153 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14154 * will use fs/subvol trees to init the csum tree.
14156 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14157 struct btrfs_root *csum_root,
14158 int search_fs_tree)
14160 if (search_fs_tree)
14161 return fill_csum_tree_from_fs(trans, csum_root);
14163 return fill_csum_tree_from_extent(trans, csum_root);
14166 static void free_roots_info_cache(void)
14168 if (!roots_info_cache)
14171 while (!cache_tree_empty(roots_info_cache)) {
14172 struct cache_extent *entry;
14173 struct root_item_info *rii;
14175 entry = first_cache_extent(roots_info_cache);
14178 remove_cache_extent(roots_info_cache, entry);
14179 rii = container_of(entry, struct root_item_info, cache_extent);
14183 free(roots_info_cache);
14184 roots_info_cache = NULL;
14187 static int build_roots_info_cache(struct btrfs_fs_info *info)
14190 struct btrfs_key key;
14191 struct extent_buffer *leaf;
14192 struct btrfs_path path;
14194 if (!roots_info_cache) {
14195 roots_info_cache = malloc(sizeof(*roots_info_cache));
14196 if (!roots_info_cache)
14198 cache_tree_init(roots_info_cache);
14201 btrfs_init_path(&path);
14203 key.type = BTRFS_EXTENT_ITEM_KEY;
14205 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14208 leaf = path.nodes[0];
14211 struct btrfs_key found_key;
14212 struct btrfs_extent_item *ei;
14213 struct btrfs_extent_inline_ref *iref;
14214 int slot = path.slots[0];
14219 struct cache_extent *entry;
14220 struct root_item_info *rii;
14222 if (slot >= btrfs_header_nritems(leaf)) {
14223 ret = btrfs_next_leaf(info->extent_root, &path);
14230 leaf = path.nodes[0];
14231 slot = path.slots[0];
14234 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14236 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14237 found_key.type != BTRFS_METADATA_ITEM_KEY)
14240 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14241 flags = btrfs_extent_flags(leaf, ei);
14243 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14244 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14247 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14248 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14249 level = found_key.offset;
14251 struct btrfs_tree_block_info *binfo;
14253 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14254 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14255 level = btrfs_tree_block_level(leaf, binfo);
14259 * For a root extent, it must be of the following type and the
14260 * first (and only one) iref in the item.
14262 type = btrfs_extent_inline_ref_type(leaf, iref);
14263 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14266 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14267 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14269 rii = malloc(sizeof(struct root_item_info));
14274 rii->cache_extent.start = root_id;
14275 rii->cache_extent.size = 1;
14276 rii->level = (u8)-1;
14277 entry = &rii->cache_extent;
14278 ret = insert_cache_extent(roots_info_cache, entry);
14281 rii = container_of(entry, struct root_item_info,
14285 ASSERT(rii->cache_extent.start == root_id);
14286 ASSERT(rii->cache_extent.size == 1);
14288 if (level > rii->level || rii->level == (u8)-1) {
14289 rii->level = level;
14290 rii->bytenr = found_key.objectid;
14291 rii->gen = btrfs_extent_generation(leaf, ei);
14292 rii->node_count = 1;
14293 } else if (level == rii->level) {
14301 btrfs_release_path(&path);
14306 static int maybe_repair_root_item(struct btrfs_path *path,
14307 const struct btrfs_key *root_key,
14308 const int read_only_mode)
14310 const u64 root_id = root_key->objectid;
14311 struct cache_extent *entry;
14312 struct root_item_info *rii;
14313 struct btrfs_root_item ri;
14314 unsigned long offset;
14316 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14319 "Error: could not find extent items for root %llu\n",
14320 root_key->objectid);
14324 rii = container_of(entry, struct root_item_info, cache_extent);
14325 ASSERT(rii->cache_extent.start == root_id);
14326 ASSERT(rii->cache_extent.size == 1);
14328 if (rii->node_count != 1) {
14330 "Error: could not find btree root extent for root %llu\n",
14335 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14336 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14338 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14339 btrfs_root_level(&ri) != rii->level ||
14340 btrfs_root_generation(&ri) != rii->gen) {
14343 * If we're in repair mode but our caller told us to not update
14344 * the root item, i.e. just check if it needs to be updated, don't
14345 * print this message, since the caller will call us again shortly
14346 * for the same root item without read only mode (the caller will
14347 * open a transaction first).
14349 if (!(read_only_mode && repair))
14351 "%sroot item for root %llu,"
14352 " current bytenr %llu, current gen %llu, current level %u,"
14353 " new bytenr %llu, new gen %llu, new level %u\n",
14354 (read_only_mode ? "" : "fixing "),
14356 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14357 btrfs_root_level(&ri),
14358 rii->bytenr, rii->gen, rii->level);
14360 if (btrfs_root_generation(&ri) > rii->gen) {
14362 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14363 root_id, btrfs_root_generation(&ri), rii->gen);
14367 if (!read_only_mode) {
14368 btrfs_set_root_bytenr(&ri, rii->bytenr);
14369 btrfs_set_root_level(&ri, rii->level);
14370 btrfs_set_root_generation(&ri, rii->gen);
14371 write_extent_buffer(path->nodes[0], &ri,
14372 offset, sizeof(ri));
14382 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14383 * caused read-only snapshots to be corrupted if they were created at a moment
14384 * when the source subvolume/snapshot had orphan items. The issue was that the
14385 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14386 * node instead of the post orphan cleanup root node.
14387 * So this function, and its callees, just detects and fixes those cases. Even
14388 * though the regression was for read-only snapshots, this function applies to
14389 * any snapshot/subvolume root.
14390 * This must be run before any other repair code - not doing it so, makes other
14391 * repair code delete or modify backrefs in the extent tree for example, which
14392 * will result in an inconsistent fs after repairing the root items.
14394 static int repair_root_items(struct btrfs_fs_info *info)
14396 struct btrfs_path path;
14397 struct btrfs_key key;
14398 struct extent_buffer *leaf;
14399 struct btrfs_trans_handle *trans = NULL;
14402 int need_trans = 0;
14404 btrfs_init_path(&path);
14406 ret = build_roots_info_cache(info);
14410 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14411 key.type = BTRFS_ROOT_ITEM_KEY;
14416 * Avoid opening and committing transactions if a leaf doesn't have
14417 * any root items that need to be fixed, so that we avoid rotating
14418 * backup roots unnecessarily.
14421 trans = btrfs_start_transaction(info->tree_root, 1);
14422 if (IS_ERR(trans)) {
14423 ret = PTR_ERR(trans);
14428 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14432 leaf = path.nodes[0];
14435 struct btrfs_key found_key;
14437 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14438 int no_more_keys = find_next_key(&path, &key);
14440 btrfs_release_path(&path);
14442 ret = btrfs_commit_transaction(trans,
14454 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14456 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14458 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14461 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14465 if (!trans && repair) {
14468 btrfs_release_path(&path);
14478 free_roots_info_cache();
14479 btrfs_release_path(&path);
14481 btrfs_commit_transaction(trans, info->tree_root);
14488 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14490 struct btrfs_trans_handle *trans;
14491 struct btrfs_block_group_cache *bg_cache;
14495 /* Clear all free space cache inodes and its extent data */
14497 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14500 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14503 current = bg_cache->key.objectid + bg_cache->key.offset;
14506 /* Don't forget to set cache_generation to -1 */
14507 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14508 if (IS_ERR(trans)) {
14509 error("failed to update super block cache generation");
14510 return PTR_ERR(trans);
14512 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14513 btrfs_commit_transaction(trans, fs_info->tree_root);
14518 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14523 if (clear_version == 1) {
14524 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14526 "free space cache v2 detected, use --clear-space-cache v2");
14530 printf("Clearing free space cache\n");
14531 ret = clear_free_space_cache(fs_info);
14533 error("failed to clear free space cache");
14536 printf("Free space cache cleared\n");
14538 } else if (clear_version == 2) {
14539 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14540 printf("no free space cache v2 to clear\n");
14544 printf("Clear free space cache v2\n");
14545 ret = btrfs_clear_free_space_tree(fs_info);
14547 error("failed to clear free space cache v2: %d", ret);
14550 printf("free space cache v2 cleared\n");
14557 const char * const cmd_check_usage[] = {
14558 "btrfs check [options] <device>",
14559 "Check structural integrity of a filesystem (unmounted).",
14560 "Check structural integrity of an unmounted filesystem. Verify internal",
14561 "trees' consistency and item connectivity. In the repair mode try to",
14562 "fix the problems found. ",
14563 "WARNING: the repair mode is considered dangerous",
14565 "-s|--super <superblock> use this superblock copy",
14566 "-b|--backup use the first valid backup root copy",
14567 "--force skip mount checks, repair is not possible",
14568 "--repair try to repair the filesystem",
14569 "--readonly run in read-only mode (default)",
14570 "--init-csum-tree create a new CRC tree",
14571 "--init-extent-tree create a new extent tree",
14572 "--mode <MODE> allows choice of memory/IO trade-offs",
14573 " where MODE is one of:",
14574 " original - read inodes and extents to memory (requires",
14575 " more memory, does less IO)",
14576 " lowmem - try to use less memory but read blocks again",
14578 "--check-data-csum verify checksums of data blocks",
14579 "-Q|--qgroup-report print a report on qgroup consistency",
14580 "-E|--subvol-extents <subvolid>",
14581 " print subvolume extents and sharing state",
14582 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14583 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14584 "-p|--progress indicate progress",
14585 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14589 int cmd_check(int argc, char **argv)
14591 struct cache_tree root_cache;
14592 struct btrfs_root *root;
14593 struct btrfs_fs_info *info;
14596 u64 tree_root_bytenr = 0;
14597 u64 chunk_root_bytenr = 0;
14598 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14602 int init_csum_tree = 0;
14604 int clear_space_cache = 0;
14605 int qgroup_report = 0;
14606 int qgroups_repaired = 0;
14607 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14612 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14613 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14614 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14615 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14616 GETOPT_VAL_FORCE };
14617 static const struct option long_options[] = {
14618 { "super", required_argument, NULL, 's' },
14619 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14620 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14621 { "init-csum-tree", no_argument, NULL,
14622 GETOPT_VAL_INIT_CSUM },
14623 { "init-extent-tree", no_argument, NULL,
14624 GETOPT_VAL_INIT_EXTENT },
14625 { "check-data-csum", no_argument, NULL,
14626 GETOPT_VAL_CHECK_CSUM },
14627 { "backup", no_argument, NULL, 'b' },
14628 { "subvol-extents", required_argument, NULL, 'E' },
14629 { "qgroup-report", no_argument, NULL, 'Q' },
14630 { "tree-root", required_argument, NULL, 'r' },
14631 { "chunk-root", required_argument, NULL,
14632 GETOPT_VAL_CHUNK_TREE },
14633 { "progress", no_argument, NULL, 'p' },
14634 { "mode", required_argument, NULL,
14636 { "clear-space-cache", required_argument, NULL,
14637 GETOPT_VAL_CLEAR_SPACE_CACHE},
14638 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14639 { NULL, 0, NULL, 0}
14642 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14646 case 'a': /* ignored */ break;
14648 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14651 num = arg_strtou64(optarg);
14652 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14654 "super mirror should be less than %d",
14655 BTRFS_SUPER_MIRROR_MAX);
14658 bytenr = btrfs_sb_offset(((int)num));
14659 printf("using SB copy %llu, bytenr %llu\n", num,
14660 (unsigned long long)bytenr);
14666 subvolid = arg_strtou64(optarg);
14669 tree_root_bytenr = arg_strtou64(optarg);
14671 case GETOPT_VAL_CHUNK_TREE:
14672 chunk_root_bytenr = arg_strtou64(optarg);
14675 ctx.progress_enabled = true;
14679 usage(cmd_check_usage);
14680 case GETOPT_VAL_REPAIR:
14681 printf("enabling repair mode\n");
14683 ctree_flags |= OPEN_CTREE_WRITES;
14685 case GETOPT_VAL_READONLY:
14688 case GETOPT_VAL_INIT_CSUM:
14689 printf("Creating a new CRC tree\n");
14690 init_csum_tree = 1;
14692 ctree_flags |= OPEN_CTREE_WRITES;
14694 case GETOPT_VAL_INIT_EXTENT:
14695 init_extent_tree = 1;
14696 ctree_flags |= (OPEN_CTREE_WRITES |
14697 OPEN_CTREE_NO_BLOCK_GROUPS);
14700 case GETOPT_VAL_CHECK_CSUM:
14701 check_data_csum = 1;
14703 case GETOPT_VAL_MODE:
14704 check_mode = parse_check_mode(optarg);
14705 if (check_mode == CHECK_MODE_UNKNOWN) {
14706 error("unknown mode: %s", optarg);
14710 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14711 if (strcmp(optarg, "v1") == 0) {
14712 clear_space_cache = 1;
14713 } else if (strcmp(optarg, "v2") == 0) {
14714 clear_space_cache = 2;
14715 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14718 "invalid argument to --clear-space-cache, must be v1 or v2");
14721 ctree_flags |= OPEN_CTREE_WRITES;
14723 case GETOPT_VAL_FORCE:
14729 if (check_argc_exact(argc - optind, 1))
14730 usage(cmd_check_usage);
14732 if (ctx.progress_enabled) {
14733 ctx.tp = TASK_NOTHING;
14734 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14737 /* This check is the only reason for --readonly to exist */
14738 if (readonly && repair) {
14739 error("repair options are not compatible with --readonly");
14744 * experimental and dangerous
14746 if (repair && check_mode == CHECK_MODE_LOWMEM)
14747 warning("low-memory mode repair support is only partial");
14750 cache_tree_init(&root_cache);
14752 ret = check_mounted(argv[optind]);
14755 error("could not check mount status: %s",
14761 "%s is currently mounted, use --force if you really intend to check the filesystem",
14769 error("repair and --force is not yet supported");
14776 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14780 "filesystem mounted, continuing because of --force");
14782 /* A block device is mounted in exclusive mode by kernel */
14783 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14786 /* only allow partial opening under repair mode */
14788 ctree_flags |= OPEN_CTREE_PARTIAL;
14790 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14791 chunk_root_bytenr, ctree_flags);
14793 error("cannot open file system");
14799 global_info = info;
14800 root = info->fs_root;
14801 uuid_unparse(info->super_copy->fsid, uuidbuf);
14803 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14806 * Check the bare minimum before starting anything else that could rely
14807 * on it, namely the tree roots, any local consistency checks
14809 if (!extent_buffer_uptodate(info->tree_root->node) ||
14810 !extent_buffer_uptodate(info->dev_root->node) ||
14811 !extent_buffer_uptodate(info->chunk_root->node)) {
14812 error("critical roots corrupted, unable to check the filesystem");
14818 if (clear_space_cache) {
14819 ret = do_clear_free_space_cache(info, clear_space_cache);
14825 * repair mode will force us to commit transaction which
14826 * will make us fail to load log tree when mounting.
14828 if (repair && btrfs_super_log_root(info->super_copy)) {
14829 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14835 ret = zero_log_tree(root);
14838 error("failed to zero log tree: %d", ret);
14843 if (qgroup_report) {
14844 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14846 ret = qgroup_verify_all(info);
14853 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14854 subvolid, argv[optind], uuidbuf);
14855 ret = print_extent_state(info, subvolid);
14860 if (init_extent_tree || init_csum_tree) {
14861 struct btrfs_trans_handle *trans;
14863 trans = btrfs_start_transaction(info->extent_root, 0);
14864 if (IS_ERR(trans)) {
14865 error("error starting transaction");
14866 ret = PTR_ERR(trans);
14871 if (init_extent_tree) {
14872 printf("Creating a new extent tree\n");
14873 ret = reinit_extent_tree(trans, info);
14879 if (init_csum_tree) {
14880 printf("Reinitialize checksum tree\n");
14881 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14883 error("checksum tree initialization failed: %d",
14890 ret = fill_csum_tree(trans, info->csum_root,
14894 error("checksum tree refilling failed: %d", ret);
14899 * Ok now we commit and run the normal fsck, which will add
14900 * extent entries for all of the items it finds.
14902 ret = btrfs_commit_transaction(trans, info->extent_root);
14907 if (!extent_buffer_uptodate(info->extent_root->node)) {
14908 error("critical: extent_root, unable to check the filesystem");
14913 if (!extent_buffer_uptodate(info->csum_root->node)) {
14914 error("critical: csum_root, unable to check the filesystem");
14920 if (!init_extent_tree) {
14921 ret = repair_root_items(info);
14924 error("failed to repair root items: %s", strerror(-ret));
14928 fprintf(stderr, "Fixed %d roots.\n", ret);
14930 } else if (ret > 0) {
14932 "Found %d roots with an outdated root item.\n",
14935 "Please run a filesystem check with the option --repair to fix them.\n");
14942 ret = do_check_chunks_and_extents(info);
14946 "errors found in extent allocation tree or chunk allocation");
14948 /* Only re-check super size after we checked and repaired the fs */
14949 err |= !is_super_size_valid(info);
14951 if (!ctx.progress_enabled) {
14952 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14953 fprintf(stderr, "checking free space tree\n");
14955 fprintf(stderr, "checking free space cache\n");
14957 ret = check_space_cache(root);
14960 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14961 error("errors found in free space tree");
14963 error("errors found in free space cache");
14968 * We used to have to have these hole extents in between our real
14969 * extents so if we don't have this flag set we need to make sure there
14970 * are no gaps in the file extents for inodes, otherwise we can just
14971 * ignore it when this happens.
14973 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14974 ret = do_check_fs_roots(info, &root_cache);
14977 error("errors found in fs roots");
14981 fprintf(stderr, "checking csums\n");
14982 ret = check_csums(root);
14985 error("errors found in csum tree");
14989 fprintf(stderr, "checking root refs\n");
14990 /* For low memory mode, check_fs_roots_v2 handles root refs */
14991 if (check_mode != CHECK_MODE_LOWMEM) {
14992 ret = check_root_refs(root, &root_cache);
14995 error("errors found in root refs");
15000 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15001 struct extent_buffer *eb;
15003 eb = list_first_entry(&root->fs_info->recow_ebs,
15004 struct extent_buffer, recow);
15005 list_del_init(&eb->recow);
15006 ret = recow_extent_buffer(root, eb);
15009 error("fails to fix transid errors");
15014 while (!list_empty(&delete_items)) {
15015 struct bad_item *bad;
15017 bad = list_first_entry(&delete_items, struct bad_item, list);
15018 list_del_init(&bad->list);
15020 ret = delete_bad_item(root, bad);
15026 if (info->quota_enabled) {
15027 fprintf(stderr, "checking quota groups\n");
15028 ret = qgroup_verify_all(info);
15031 error("failed to check quota groups");
15035 ret = repair_qgroups(info, &qgroups_repaired);
15038 error("failed to repair quota groups");
15044 if (!list_empty(&root->fs_info->recow_ebs)) {
15045 error("transid errors in file system");
15050 printf("found %llu bytes used, ",
15051 (unsigned long long)bytes_used);
15053 printf("error(s) found\n");
15055 printf("no error found\n");
15056 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15057 printf("total tree bytes: %llu\n",
15058 (unsigned long long)total_btree_bytes);
15059 printf("total fs tree bytes: %llu\n",
15060 (unsigned long long)total_fs_tree_bytes);
15061 printf("total extent tree bytes: %llu\n",
15062 (unsigned long long)total_extent_tree_bytes);
15063 printf("btree space waste bytes: %llu\n",
15064 (unsigned long long)btree_space_waste);
15065 printf("file data blocks allocated: %llu\n referenced %llu\n",
15066 (unsigned long long)data_bytes_allocated,
15067 (unsigned long long)data_bytes_referenced);
15069 free_qgroup_counts();
15070 free_root_recs_tree(&root_cache);
15074 if (ctx.progress_enabled)
15075 task_deinit(ctx.info);