2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2152 if (roots->nnodes == 1)
2155 node = rb_first(&roots->root);
2156 u = rb_entry(node, struct ulist_node, rb_node);
2158 * current root id is not smallest, we skip it and let it be checked
2159 * in the fs or file tree who hash the smallest root id.
2161 if (root->objectid != u->val)
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2170 struct btrfs_root *extent_root = root->fs_info->extent_root;
2171 struct btrfs_root_item *ri = &root->root_item;
2172 struct btrfs_extent_inline_ref *iref;
2173 struct btrfs_extent_item *ei;
2174 struct btrfs_key key;
2175 struct btrfs_path *path = NULL;
2186 * Except file/reloc tree, we can not have FULL BACKREF MODE
2188 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2192 if (eb->start == btrfs_root_bytenr(ri))
2195 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2198 owner = btrfs_header_owner(eb);
2199 if (owner == root->objectid)
2202 path = btrfs_alloc_path();
2206 key.objectid = btrfs_header_bytenr(eb);
2208 key.offset = (u64)-1;
2210 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2217 ret = btrfs_previous_extent_item(extent_root, path,
2223 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2225 eb = path->nodes[0];
2226 slot = path->slots[0];
2227 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2229 flags = btrfs_extent_flags(eb, ei);
2230 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2233 ptr = (unsigned long)(ei + 1);
2234 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2236 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237 ptr += sizeof(struct btrfs_tree_block_info);
2240 /* Reached extent item ends normally */
2244 /* Beyond extent item end, wrong item size */
2246 error("extent item at bytenr %llu slot %d has wrong size",
2251 iref = (struct btrfs_extent_inline_ref *)ptr;
2252 offset = btrfs_extent_inline_ref_offset(eb, iref);
2253 type = btrfs_extent_inline_ref_type(eb, iref);
2255 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2257 ptr += btrfs_extent_inline_ref_size(type);
2261 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267 btrfs_free_path(path);
2272 * for a tree node or leaf, we record its reference count, so later if we still
2273 * process this node or leaf, don't need to compute its reference count again.
2275 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278 struct extent_buffer *eb, struct node_refs *nrefs,
2279 u64 level, int check_all)
2281 struct ulist *roots;
2284 int root_level = btrfs_header_level(root->node);
2288 if (nrefs->bytenr[level] == bytenr)
2291 if (bytenr != (u64)-1) {
2292 /* the return value of this function seems a mistake */
2293 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294 level, 1, &refs, &flags);
2296 if (ret < 0 && !check_all)
2299 nrefs->bytenr[level] = bytenr;
2300 nrefs->refs[level] = refs;
2301 nrefs->full_backref[level] = 0;
2302 nrefs->checked[level] = 0;
2305 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2310 check = need_check(root, roots);
2312 nrefs->need_check[level] = check;
2315 nrefs->need_check[level] = 1;
2317 if (level == root_level) {
2318 nrefs->need_check[level] = 1;
2321 * The node refs may have not been
2322 * updated if upper needs checking (the
2323 * lowest root_objectid) the node can
2326 nrefs->need_check[level] =
2327 nrefs->need_check[level + 1];
2333 if (check_all && eb) {
2334 calc_extent_flag_v2(root, eb, &flags);
2335 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336 nrefs->full_backref[level] = 1;
2343 * @level if @level == -1 means extent data item
2344 * else normal treeblocl.
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347 struct node_refs *nrefs, int level)
2349 int root_level = btrfs_header_level(root->node);
2351 if (level > root_level || level < -1)
2353 if (level == root_level)
2356 * if the upper node is marked full backref, it should contain shared
2357 * backref of the parent (except owner == root->objectid).
2359 while (++level <= root_level)
2360 if (nrefs->refs[level] > 1)
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367 struct walk_control *wc, int *level,
2368 struct node_refs *nrefs)
2370 enum btrfs_tree_block_status status;
2373 struct btrfs_fs_info *fs_info = root->fs_info;
2374 struct extent_buffer *next;
2375 struct extent_buffer *cur;
2379 WARN_ON(*level < 0);
2380 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383 refs = nrefs->refs[*level];
2386 ret = btrfs_lookup_extent_info(NULL, root,
2387 path->nodes[*level]->start,
2388 *level, 1, &refs, NULL);
2393 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394 nrefs->refs[*level] = refs;
2398 ret = enter_shared_node(root, path->nodes[*level]->start,
2406 while (*level >= 0) {
2407 WARN_ON(*level < 0);
2408 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409 cur = path->nodes[*level];
2411 if (btrfs_header_level(cur) != *level)
2414 if (path->slots[*level] >= btrfs_header_nritems(cur))
2417 ret = process_one_leaf(root, cur, wc);
2422 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2425 if (bytenr == nrefs->bytenr[*level - 1]) {
2426 refs = nrefs->refs[*level - 1];
2428 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429 *level - 1, 1, &refs, NULL);
2433 nrefs->bytenr[*level - 1] = bytenr;
2434 nrefs->refs[*level - 1] = refs;
2439 ret = enter_shared_node(root, bytenr, refs,
2442 path->slots[*level]++;
2447 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449 free_extent_buffer(next);
2450 reada_walk_down(root, cur, path->slots[*level]);
2451 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452 if (!extent_buffer_uptodate(next)) {
2453 struct btrfs_key node_key;
2455 btrfs_node_key_to_cpu(path->nodes[*level],
2457 path->slots[*level]);
2458 btrfs_add_corrupt_extent_record(root->fs_info,
2460 path->nodes[*level]->start,
2461 root->fs_info->nodesize,
2468 ret = check_child_node(cur, path->slots[*level], next);
2470 free_extent_buffer(next);
2475 if (btrfs_is_leaf(next))
2476 status = btrfs_check_leaf(root, NULL, next);
2478 status = btrfs_check_node(root, NULL, next);
2479 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480 free_extent_buffer(next);
2485 *level = *level - 1;
2486 free_extent_buffer(path->nodes[*level]);
2487 path->nodes[*level] = next;
2488 path->slots[*level] = 0;
2491 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2495 static int fs_root_objectid(u64 objectid);
2498 * Update global fs information.
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2504 struct extent_buffer *eb = path->nodes[level];
2506 total_btree_bytes += eb->len;
2507 if (fs_root_objectid(root->objectid))
2508 total_fs_tree_bytes += eb->len;
2509 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510 total_extent_tree_bytes += eb->len;
2513 btree_space_waste += btrfs_leaf_free_space(root, eb);
2515 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516 btrfs_header_nritems(eb));
2517 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2522 * This function only handles BACKREF_MISSING,
2523 * If corresponding extent item exists, increase the ref, else insert an extent
2526 * Returns error bits after repair.
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529 struct btrfs_root *root,
2530 struct extent_buffer *node,
2531 struct node_refs *nrefs, int level, int err)
2533 struct btrfs_fs_info *fs_info = root->fs_info;
2534 struct btrfs_root *extent_root = fs_info->extent_root;
2535 struct btrfs_path path;
2536 struct btrfs_extent_item *ei;
2537 struct btrfs_tree_block_info *bi;
2538 struct btrfs_key key;
2539 struct extent_buffer *eb;
2540 u32 size = sizeof(*ei);
2541 u32 node_size = root->fs_info->nodesize;
2542 int insert_extent = 0;
2543 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544 int root_level = btrfs_header_level(root->node);
2549 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2552 if ((err & BACKREF_MISSING) == 0)
2555 WARN_ON(level > BTRFS_MAX_LEVEL);
2558 btrfs_init_path(&path);
2559 bytenr = btrfs_header_bytenr(node);
2560 owner = btrfs_header_owner(node);
2561 generation = btrfs_header_generation(node);
2563 key.objectid = bytenr;
2565 key.offset = (u64)-1;
2567 /* Search for the extent item */
2568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2574 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2578 /* calculate if the extent item flag is full backref or not */
2579 if (nrefs->full_backref[level] != 0)
2580 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2582 /* insert an extent item */
2583 if (insert_extent) {
2584 struct btrfs_disk_key copy_key;
2586 generation = btrfs_header_generation(node);
2588 if (level < root_level && nrefs->full_backref[level + 1] &&
2589 owner != root->objectid) {
2590 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2593 key.objectid = bytenr;
2594 if (!skinny_metadata) {
2595 key.type = BTRFS_EXTENT_ITEM_KEY;
2596 key.offset = node_size;
2597 size += sizeof(*bi);
2599 key.type = BTRFS_METADATA_ITEM_KEY;
2603 btrfs_release_path(&path);
2604 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2610 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2612 btrfs_set_extent_refs(eb, ei, 0);
2613 btrfs_set_extent_generation(eb, ei, generation);
2614 btrfs_set_extent_flags(eb, ei, flags);
2616 if (!skinny_metadata) {
2617 bi = (struct btrfs_tree_block_info *)(ei + 1);
2618 memset_extent_buffer(eb, 0, (unsigned long)bi,
2620 btrfs_set_disk_key_objectid(©_key, root->objectid);
2621 btrfs_set_disk_key_type(©_key, 0);
2622 btrfs_set_disk_key_offset(©_key, 0);
2624 btrfs_set_tree_block_level(eb, bi, level);
2625 btrfs_set_tree_block_key(eb, bi, ©_key);
2627 btrfs_mark_buffer_dirty(eb);
2628 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2632 nrefs->refs[level] = 0;
2633 nrefs->full_backref[level] =
2634 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635 btrfs_release_path(&path);
2638 if (level < root_level && nrefs->full_backref[level + 1] &&
2639 owner != root->objectid)
2640 parent = nrefs->bytenr[level + 1];
2642 /* increase the ref */
2643 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644 parent, root->objectid, level, 0);
2646 nrefs->refs[level]++;
2648 btrfs_release_path(&path);
2651 "failed to repair tree block ref start %llu root %llu due to %s",
2652 bytenr, root->objectid, strerror(-ret));
2654 printf("Added one tree block ref start %llu %s %llu\n",
2655 bytenr, parent ? "parent" : "root",
2656 parent ? parent : root->objectid);
2657 err &= ~BACKREF_MISSING;
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664 unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666 struct extent_buffer *eb, u64 bytenr,
2667 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669 struct btrfs_root *root, struct btrfs_path *path,
2670 struct node_refs *nrefs, int account_bytes);
2673 * @trans just for lowmem repair mode
2674 * @check all if not 0 then check all tree block backrefs and items
2675 * 0 then just check relationship of items in fs tree(s)
2677 * Returns >0 Found error, should continue
2678 * Returns <0 Fatal error, must exit the whole check
2679 * Returns 0 No errors found
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682 struct btrfs_root *root, struct btrfs_path *path,
2683 int *level, struct node_refs *nrefs, int ext_ref,
2687 enum btrfs_tree_block_status status;
2690 struct btrfs_fs_info *fs_info = root->fs_info;
2691 struct extent_buffer *next;
2692 struct extent_buffer *cur;
2696 int account_file_data = 0;
2698 WARN_ON(*level < 0);
2699 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2701 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702 path->nodes[*level], nrefs, *level, check_all);
2706 while (*level >= 0) {
2707 WARN_ON(*level < 0);
2708 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709 cur = path->nodes[*level];
2710 bytenr = btrfs_header_bytenr(cur);
2711 check = nrefs->need_check[*level];
2713 if (btrfs_header_level(cur) != *level)
2716 * Update bytes accounting and check tree block ref
2717 * NOTE: Doing accounting and check before checking nritems
2718 * is necessary because of empty node/leaf.
2720 if ((check_all && !nrefs->checked[*level]) ||
2721 (!check_all && nrefs->need_check[*level])) {
2722 ret = check_tree_block_ref(root, cur,
2723 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724 btrfs_header_owner(cur), nrefs);
2727 ret = repair_tree_block_ref(trans, root,
2728 path->nodes[*level], nrefs, *level, ret);
2731 if (check_all && nrefs->need_check[*level] &&
2732 nrefs->refs[*level]) {
2733 account_bytes(root, path, *level);
2734 account_file_data = 1;
2736 nrefs->checked[*level] = 1;
2739 if (path->slots[*level] >= btrfs_header_nritems(cur))
2742 /* Don't forgot to check leaf/node validation */
2744 /* skip duplicate check */
2745 if (check || !check_all) {
2746 ret = btrfs_check_leaf(root, NULL, cur);
2747 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2755 ret = process_one_leaf_v2(root, path, nrefs,
2758 ret = check_leaf_items(trans, root, path,
2759 nrefs, account_file_data);
2763 if (check || !check_all) {
2764 ret = btrfs_check_node(root, NULL, cur);
2765 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2772 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2775 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2780 * check all trees in check_chunks_and_extent_v2
2781 * check shared node once in check_fs_roots
2783 if (!check_all && !nrefs->need_check[*level - 1]) {
2784 path->slots[*level]++;
2788 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790 free_extent_buffer(next);
2791 reada_walk_down(root, cur, path->slots[*level]);
2792 next = read_tree_block(fs_info, bytenr, ptr_gen);
2793 if (!extent_buffer_uptodate(next)) {
2794 struct btrfs_key node_key;
2796 btrfs_node_key_to_cpu(path->nodes[*level],
2798 path->slots[*level]);
2799 btrfs_add_corrupt_extent_record(fs_info,
2800 &node_key, path->nodes[*level]->start,
2801 fs_info->nodesize, *level);
2807 ret = check_child_node(cur, path->slots[*level], next);
2812 if (btrfs_is_leaf(next))
2813 status = btrfs_check_leaf(root, NULL, next);
2815 status = btrfs_check_node(root, NULL, next);
2816 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817 free_extent_buffer(next);
2822 *level = *level - 1;
2823 free_extent_buffer(path->nodes[*level]);
2824 path->nodes[*level] = next;
2825 path->slots[*level] = 0;
2826 account_file_data = 0;
2828 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834 struct walk_control *wc, int *level)
2837 struct extent_buffer *leaf;
2839 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840 leaf = path->nodes[i];
2841 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2846 free_extent_buffer(path->nodes[*level]);
2847 path->nodes[*level] = NULL;
2848 BUG_ON(*level > wc->active_node);
2849 if (*level == wc->active_node)
2850 leave_shared_node(root, wc, *level);
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2861 struct extent_buffer *leaf;
2863 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864 leaf = path->nodes[i];
2865 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2870 free_extent_buffer(path->nodes[*level]);
2871 path->nodes[*level] = NULL;
2878 static int check_root_dir(struct inode_record *rec)
2880 struct inode_backref *backref;
2883 if (!rec->found_inode_item || rec->errors)
2885 if (rec->nlink != 1 || rec->found_link != 0)
2887 if (list_empty(&rec->backrefs))
2889 backref = to_inode_backref(rec->backrefs.next);
2890 if (!backref->found_inode_ref)
2892 if (backref->index != 0 || backref->namelen != 2 ||
2893 memcmp(backref->name, "..", 2))
2895 if (backref->found_dir_index || backref->found_dir_item)
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903 struct btrfs_root *root, struct btrfs_path *path,
2904 struct inode_record *rec)
2906 struct btrfs_inode_item *ei;
2907 struct btrfs_key key;
2910 key.objectid = rec->ino;
2911 key.type = BTRFS_INODE_ITEM_KEY;
2912 key.offset = (u64)-1;
2914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2918 if (!path->slots[0]) {
2925 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926 if (key.objectid != rec->ino) {
2931 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932 struct btrfs_inode_item);
2933 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934 btrfs_mark_buffer_dirty(path->nodes[0]);
2935 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937 root->root_key.objectid);
2939 btrfs_release_path(path);
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root,
2945 struct btrfs_path *path,
2946 struct inode_record *rec)
2950 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951 btrfs_release_path(path);
2953 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958 struct btrfs_root *root,
2959 struct btrfs_path *path,
2960 struct inode_record *rec)
2962 struct btrfs_inode_item *ei;
2963 struct btrfs_key key;
2966 key.objectid = rec->ino;
2967 key.type = BTRFS_INODE_ITEM_KEY;
2970 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2977 /* Since ret == 0, no need to check anything */
2978 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979 struct btrfs_inode_item);
2980 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981 btrfs_mark_buffer_dirty(path->nodes[0]);
2982 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983 printf("reset nbytes for ino %llu root %llu\n",
2984 rec->ino, root->root_key.objectid);
2986 btrfs_release_path(path);
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991 struct cache_tree *inode_cache,
2992 struct inode_record *rec,
2993 struct inode_backref *backref)
2995 struct btrfs_path path;
2996 struct btrfs_trans_handle *trans;
2997 struct btrfs_dir_item *dir_item;
2998 struct extent_buffer *leaf;
2999 struct btrfs_key key;
3000 struct btrfs_disk_key disk_key;
3001 struct inode_record *dir_rec;
3002 unsigned long name_ptr;
3003 u32 data_size = sizeof(*dir_item) + backref->namelen;
3006 trans = btrfs_start_transaction(root, 1);
3008 return PTR_ERR(trans);
3010 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011 (unsigned long long)rec->ino);
3013 btrfs_init_path(&path);
3014 key.objectid = backref->dir;
3015 key.type = BTRFS_DIR_INDEX_KEY;
3016 key.offset = backref->index;
3017 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3020 leaf = path.nodes[0];
3021 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3023 disk_key.objectid = cpu_to_le64(rec->ino);
3024 disk_key.type = BTRFS_INODE_ITEM_KEY;
3025 disk_key.offset = 0;
3027 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029 btrfs_set_dir_data_len(leaf, dir_item, 0);
3030 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031 name_ptr = (unsigned long)(dir_item + 1);
3032 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033 btrfs_mark_buffer_dirty(leaf);
3034 btrfs_release_path(&path);
3035 btrfs_commit_transaction(trans, root);
3037 backref->found_dir_index = 1;
3038 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039 BUG_ON(IS_ERR(dir_rec));
3042 dir_rec->found_size += backref->namelen;
3043 if (dir_rec->found_size == dir_rec->isize &&
3044 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046 if (dir_rec->found_size != dir_rec->isize)
3047 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3052 static int delete_dir_index(struct btrfs_root *root,
3053 struct inode_backref *backref)
3055 struct btrfs_trans_handle *trans;
3056 struct btrfs_dir_item *di;
3057 struct btrfs_path path;
3060 trans = btrfs_start_transaction(root, 1);
3062 return PTR_ERR(trans);
3064 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065 (unsigned long long)backref->dir,
3066 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067 (unsigned long long)root->objectid);
3069 btrfs_init_path(&path);
3070 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071 backref->name, backref->namelen,
3072 backref->index, -1);
3075 btrfs_release_path(&path);
3076 btrfs_commit_transaction(trans, root);
3083 ret = btrfs_del_item(trans, root, &path);
3085 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3087 btrfs_release_path(&path);
3088 btrfs_commit_transaction(trans, root);
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093 struct btrfs_root *root, u64 ino, u64 size,
3094 u64 nbytes, u64 nlink, u32 mode)
3096 struct btrfs_inode_item ii;
3097 time_t now = time(NULL);
3100 btrfs_set_stack_inode_size(&ii, size);
3101 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102 btrfs_set_stack_inode_nlink(&ii, nlink);
3103 btrfs_set_stack_inode_mode(&ii, mode);
3104 btrfs_set_stack_inode_generation(&ii, trans->transid);
3105 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3113 ret = btrfs_insert_inode(trans, root, ino, &ii);
3116 warning("root %llu inode %llu recreating inode item, this may "
3117 "be incomplete, please check permissions and content after "
3118 "the fsck completes.\n", (unsigned long long)root->objectid,
3119 (unsigned long long)ino);
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125 struct btrfs_root *root, u64 ino,
3128 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3130 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3133 static int create_inode_item(struct btrfs_root *root,
3134 struct inode_record *rec, int root_dir)
3136 struct btrfs_trans_handle *trans;
3142 trans = btrfs_start_transaction(root, 1);
3143 if (IS_ERR(trans)) {
3144 ret = PTR_ERR(trans);
3148 nlink = root_dir ? 1 : rec->found_link;
3149 if (rec->found_dir_item) {
3150 if (rec->found_file_extent)
3151 fprintf(stderr, "root %llu inode %llu has both a dir "
3152 "item and extents, unsure if it is a dir or a "
3153 "regular file so setting it as a directory\n",
3154 (unsigned long long)root->objectid,
3155 (unsigned long long)rec->ino);
3156 mode = S_IFDIR | 0755;
3157 size = rec->found_size;
3158 } else if (!rec->found_dir_item) {
3159 size = rec->extent_end;
3160 mode = S_IFREG | 0755;
3163 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3165 btrfs_commit_transaction(trans, root);
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170 struct inode_record *rec,
3171 struct cache_tree *inode_cache,
3174 struct inode_backref *tmp, *backref;
3175 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3179 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180 if (!delete && rec->ino == root_dirid) {
3181 if (!rec->found_inode_item) {
3182 ret = create_inode_item(root, rec, 1);
3189 /* Index 0 for root dir's are special, don't mess with it */
3190 if (rec->ino == root_dirid && backref->index == 0)
3194 ((backref->found_dir_index && !backref->found_inode_ref) ||
3195 (backref->found_dir_index && backref->found_inode_ref &&
3196 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197 ret = delete_dir_index(root, backref);
3201 list_del(&backref->list);
3206 if (!delete && !backref->found_dir_index &&
3207 backref->found_dir_item && backref->found_inode_ref) {
3208 ret = add_missing_dir_index(root, inode_cache, rec,
3213 if (backref->found_dir_item &&
3214 backref->found_dir_index) {
3215 if (!backref->errors &&
3216 backref->found_inode_ref) {
3217 list_del(&backref->list);
3224 if (!delete && (!backref->found_dir_index &&
3225 !backref->found_dir_item &&
3226 backref->found_inode_ref)) {
3227 struct btrfs_trans_handle *trans;
3228 struct btrfs_key location;
3230 ret = check_dir_conflict(root, backref->name,
3236 * let nlink fixing routine to handle it,
3237 * which can do it better.
3242 location.objectid = rec->ino;
3243 location.type = BTRFS_INODE_ITEM_KEY;
3244 location.offset = 0;
3246 trans = btrfs_start_transaction(root, 1);
3247 if (IS_ERR(trans)) {
3248 ret = PTR_ERR(trans);
3251 fprintf(stderr, "adding missing dir index/item pair "
3253 (unsigned long long)rec->ino);
3254 ret = btrfs_insert_dir_item(trans, root, backref->name,
3256 backref->dir, &location,
3257 imode_to_type(rec->imode),
3260 btrfs_commit_transaction(trans, root);
3264 if (!delete && (backref->found_inode_ref &&
3265 backref->found_dir_index &&
3266 backref->found_dir_item &&
3267 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268 !rec->found_inode_item)) {
3269 ret = create_inode_item(root, rec, 0);
3276 return ret ? ret : repaired;
3280 * To determine the file type for nlink/inode_item repair
3282 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283 * Return -ENOENT if file type is not found.
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3287 struct inode_backref *backref;
3289 /* For inode item recovered case */
3290 if (rec->found_inode_item) {
3291 *type = imode_to_type(rec->imode);
3295 list_for_each_entry(backref, &rec->backrefs, list) {
3296 if (backref->found_dir_index || backref->found_dir_item) {
3297 *type = backref->filetype;
3305 * To determine the file name for nlink repair
3307 * Return 0 if file name is found, set name and namelen.
3308 * Return -ENOENT if file name is not found.
3310 static int find_file_name(struct inode_record *rec,
3311 char *name, int *namelen)
3313 struct inode_backref *backref;
3315 list_for_each_entry(backref, &rec->backrefs, list) {
3316 if (backref->found_dir_index || backref->found_dir_item ||
3317 backref->found_inode_ref) {
3318 memcpy(name, backref->name, backref->namelen);
3319 *namelen = backref->namelen;
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root,
3329 struct btrfs_path *path,
3330 struct inode_record *rec)
3332 struct inode_backref *backref;
3333 struct inode_backref *tmp;
3334 struct btrfs_key key;
3335 struct btrfs_inode_item *inode_item;
3338 /* We don't believe this either, reset it and iterate backref */
3339 rec->found_link = 0;
3341 /* Remove all backref including the valid ones */
3342 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344 backref->index, backref->name,
3345 backref->namelen, 0);
3349 /* remove invalid backref, so it won't be added back */
3350 if (!(backref->found_dir_index &&
3351 backref->found_dir_item &&
3352 backref->found_inode_ref)) {
3353 list_del(&backref->list);
3360 /* Set nlink to 0 */
3361 key.objectid = rec->ino;
3362 key.type = BTRFS_INODE_ITEM_KEY;
3364 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3371 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372 struct btrfs_inode_item);
3373 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374 btrfs_mark_buffer_dirty(path->nodes[0]);
3375 btrfs_release_path(path);
3378 * Add back valid inode_ref/dir_item/dir_index,
3379 * add_link() will handle the nlink inc, so new nlink must be correct
3381 list_for_each_entry(backref, &rec->backrefs, list) {
3382 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383 backref->name, backref->namelen,
3384 backref->filetype, &backref->index, 1, 0);
3389 btrfs_release_path(path);
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394 struct btrfs_root *root,
3395 struct btrfs_path *path,
3398 struct btrfs_key key, found_key;
3401 btrfs_init_path(path);
3402 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3404 key.type = BTRFS_INODE_ITEM_KEY;
3405 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3407 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408 path->slots[0] - 1);
3409 *highest_ino = found_key.objectid;
3412 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3414 btrfs_release_path(path);
3419 * Link inode to dir 'lost+found'. Increase @ref_count.
3421 * Returns 0 means success.
3422 * Returns <0 means failure.
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425 struct btrfs_root *root,
3426 struct btrfs_path *path,
3427 u64 ino, char *namebuf, u32 name_len,
3428 u8 filetype, u64 *ref_count)
3430 char *dir_name = "lost+found";
3435 btrfs_release_path(path);
3436 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3441 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3445 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3448 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449 namebuf, name_len, filetype, NULL, 1, 0);
3451 * Add ".INO" suffix several times to handle case where
3452 * "FILENAME.INO" is already taken by another file.
3454 while (ret == -EEXIST) {
3456 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3458 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3462 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3464 name_len += count_digits(ino) + 1;
3465 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466 name_len, filetype, NULL, 1, 0);
3469 error("failed to link the inode %llu to %s dir: %s",
3470 ino, dir_name, strerror(-ret));
3475 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476 name_len, namebuf, dir_name);
3478 btrfs_release_path(path);
3480 error("failed to move file '%.*s' to '%s' dir", name_len,
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486 struct btrfs_root *root,
3487 struct btrfs_path *path,
3488 struct inode_record *rec)
3490 char namebuf[BTRFS_NAME_LEN] = {0};
3493 int name_recovered = 0;
3494 int type_recovered = 0;
3498 * Get file name and type first before these invalid inode ref
3499 * are deleted by remove_all_invalid_backref()
3501 name_recovered = !find_file_name(rec, namebuf, &namelen);
3502 type_recovered = !find_file_type(rec, &type);
3504 if (!name_recovered) {
3505 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506 rec->ino, rec->ino);
3507 namelen = count_digits(rec->ino);
3508 sprintf(namebuf, "%llu", rec->ino);
3511 if (!type_recovered) {
3512 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3514 type = BTRFS_FT_REG_FILE;
3518 ret = reset_nlink(trans, root, path, rec);
3521 "Failed to reset nlink for inode %llu: %s\n",
3522 rec->ino, strerror(-ret));
3526 if (rec->found_link == 0) {
3527 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528 namebuf, namelen, type,
3529 (u64 *)&rec->found_link);
3533 printf("Fixed the nlink of inode %llu\n", rec->ino);
3536 * Clear the flag anyway, or we will loop forever for the same inode
3537 * as it will not be removed from the bad inode list and the dead loop
3540 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541 btrfs_release_path(path);
3546 * Check if there is any normal(reg or prealloc) file extent for given
3548 * This is used to determine the file type when neither its dir_index/item or
3549 * inode_item exists.
3551 * This will *NOT* report error, if any error happens, just consider it does
3552 * not have any normal file extent.
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3556 struct btrfs_path path;
3557 struct btrfs_key key;
3558 struct btrfs_key found_key;
3559 struct btrfs_file_extent_item *fi;
3563 btrfs_init_path(&path);
3565 key.type = BTRFS_EXTENT_DATA_KEY;
3568 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3573 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574 ret = btrfs_next_leaf(root, &path);
3581 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3583 if (found_key.objectid != ino ||
3584 found_key.type != BTRFS_EXTENT_DATA_KEY)
3586 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587 struct btrfs_file_extent_item);
3588 type = btrfs_file_extent_type(path.nodes[0], fi);
3589 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595 btrfs_release_path(&path);
3599 static u32 btrfs_type_to_imode(u8 type)
3601 static u32 imode_by_btrfs_type[] = {
3602 [BTRFS_FT_REG_FILE] = S_IFREG,
3603 [BTRFS_FT_DIR] = S_IFDIR,
3604 [BTRFS_FT_CHRDEV] = S_IFCHR,
3605 [BTRFS_FT_BLKDEV] = S_IFBLK,
3606 [BTRFS_FT_FIFO] = S_IFIFO,
3607 [BTRFS_FT_SOCK] = S_IFSOCK,
3608 [BTRFS_FT_SYMLINK] = S_IFLNK,
3611 return imode_by_btrfs_type[(type)];
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_path *path,
3617 struct inode_record *rec)
3621 int type_recovered = 0;
3624 printf("Trying to rebuild inode:%llu\n", rec->ino);
3626 type_recovered = !find_file_type(rec, &filetype);
3629 * Try to determine inode type if type not found.
3631 * For found regular file extent, it must be FILE.
3632 * For found dir_item/index, it must be DIR.
3634 * For undetermined one, use FILE as fallback.
3637 * 1. If found backref(inode_index/item is already handled) to it,
3639 * Need new inode-inode ref structure to allow search for that.
3641 if (!type_recovered) {
3642 if (rec->found_file_extent &&
3643 find_normal_file_extent(root, rec->ino)) {
3645 filetype = BTRFS_FT_REG_FILE;
3646 } else if (rec->found_dir_item) {
3648 filetype = BTRFS_FT_DIR;
3649 } else if (!list_empty(&rec->orphan_extents)) {
3651 filetype = BTRFS_FT_REG_FILE;
3653 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3656 filetype = BTRFS_FT_REG_FILE;
3660 ret = btrfs_new_inode(trans, root, rec->ino,
3661 mode | btrfs_type_to_imode(filetype));
3666 * Here inode rebuild is done, we only rebuild the inode item,
3667 * don't repair the nlink(like move to lost+found).
3668 * That is the job of nlink repair.
3670 * We just fill the record and return
3672 rec->found_dir_item = 1;
3673 rec->imode = mode | btrfs_type_to_imode(filetype);
3675 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676 /* Ensure the inode_nlinks repair function will be called */
3677 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683 struct btrfs_root *root,
3684 struct btrfs_path *path,
3685 struct inode_record *rec)
3687 struct orphan_data_extent *orphan;
3688 struct orphan_data_extent *tmp;
3691 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3693 * Check for conflicting file extents
3695 * Here we don't know whether the extents is compressed or not,
3696 * so we can only assume it not compressed nor data offset,
3697 * and use its disk_len as extent length.
3699 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700 orphan->offset, orphan->disk_len, 0);
3701 btrfs_release_path(path);
3706 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707 orphan->disk_bytenr, orphan->disk_len);
3708 ret = btrfs_free_extent(trans,
3709 root->fs_info->extent_root,
3710 orphan->disk_bytenr, orphan->disk_len,
3711 0, root->objectid, orphan->objectid,
3716 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717 orphan->offset, orphan->disk_bytenr,
3718 orphan->disk_len, orphan->disk_len);
3722 /* Update file size info */
3723 rec->found_size += orphan->disk_len;
3724 if (rec->found_size == rec->nbytes)
3725 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3727 /* Update the file extent hole info too */
3728 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3732 if (RB_EMPTY_ROOT(&rec->holes))
3733 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3735 list_del(&orphan->list);
3738 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744 struct btrfs_root *root,
3745 struct btrfs_path *path,
3746 struct inode_record *rec)
3748 struct rb_node *node;
3749 struct file_extent_hole *hole;
3753 node = rb_first(&rec->holes);
3757 hole = rb_entry(node, struct file_extent_hole, node);
3758 ret = btrfs_punch_hole(trans, root, rec->ino,
3759 hole->start, hole->len);
3762 ret = del_file_extent_hole(&rec->holes, hole->start,
3766 if (RB_EMPTY_ROOT(&rec->holes))
3767 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768 node = rb_first(&rec->holes);
3770 /* special case for a file losing all its file extent */
3772 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773 round_up(rec->isize,
3774 root->fs_info->sectorsize));
3778 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779 rec->ino, root->objectid);
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3786 struct btrfs_trans_handle *trans;
3787 struct btrfs_path path;
3790 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791 I_ERR_NO_ORPHAN_ITEM |
3792 I_ERR_LINK_COUNT_WRONG |
3793 I_ERR_NO_INODE_ITEM |
3794 I_ERR_FILE_EXTENT_ORPHAN |
3795 I_ERR_FILE_EXTENT_DISCOUNT|
3796 I_ERR_FILE_NBYTES_WRONG)))
3800 * For nlink repair, it may create a dir and add link, so
3801 * 2 for parent(256)'s dir_index and dir_item
3802 * 2 for lost+found dir's inode_item and inode_ref
3803 * 1 for the new inode_ref of the file
3804 * 2 for lost+found dir's dir_index and dir_item for the file
3806 trans = btrfs_start_transaction(root, 7);
3808 return PTR_ERR(trans);
3810 btrfs_init_path(&path);
3811 if (rec->errors & I_ERR_NO_INODE_ITEM)
3812 ret = repair_inode_no_item(trans, root, &path, rec);
3813 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818 ret = repair_inode_isize(trans, root, &path, rec);
3819 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822 ret = repair_inode_nlinks(trans, root, &path, rec);
3823 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824 ret = repair_inode_nbytes(trans, root, &path, rec);
3825 btrfs_commit_transaction(trans, root);
3826 btrfs_release_path(&path);
3830 static int check_inode_recs(struct btrfs_root *root,
3831 struct cache_tree *inode_cache)
3833 struct cache_extent *cache;
3834 struct ptr_node *node;
3835 struct inode_record *rec;
3836 struct inode_backref *backref;
3841 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3843 if (btrfs_root_refs(&root->root_item) == 0) {
3844 if (!cache_tree_empty(inode_cache))
3845 fprintf(stderr, "warning line %d\n", __LINE__);
3850 * We need to repair backrefs first because we could change some of the
3851 * errors in the inode recs.
3853 * We also need to go through and delete invalid backrefs first and then
3854 * add the correct ones second. We do this because we may get EEXIST
3855 * when adding back the correct index because we hadn't yet deleted the
3858 * For example, if we were missing a dir index then the directories
3859 * isize would be wrong, so if we fixed the isize to what we thought it
3860 * would be and then fixed the backref we'd still have a invalid fs, so
3861 * we need to add back the dir index and then check to see if the isize
3866 if (stage == 3 && !err)
3869 cache = search_cache_extent(inode_cache, 0);
3870 while (repair && cache) {
3871 node = container_of(cache, struct ptr_node, cache);
3873 cache = next_cache_extent(cache);
3875 /* Need to free everything up and rescan */
3877 remove_cache_extent(inode_cache, &node->cache);
3879 free_inode_rec(rec);
3883 if (list_empty(&rec->backrefs))
3886 ret = repair_inode_backrefs(root, rec, inode_cache,
3900 rec = get_inode_rec(inode_cache, root_dirid, 0);
3901 BUG_ON(IS_ERR(rec));
3903 ret = check_root_dir(rec);
3905 fprintf(stderr, "root %llu root dir %llu error\n",
3906 (unsigned long long)root->root_key.objectid,
3907 (unsigned long long)root_dirid);
3908 print_inode_error(root, rec);
3913 struct btrfs_trans_handle *trans;
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 err = PTR_ERR(trans);
3922 "root %llu missing its root dir, recreating\n",
3923 (unsigned long long)root->objectid);
3925 ret = btrfs_make_root_dir(trans, root, root_dirid);
3928 btrfs_commit_transaction(trans, root);
3932 fprintf(stderr, "root %llu root dir %llu not found\n",
3933 (unsigned long long)root->root_key.objectid,
3934 (unsigned long long)root_dirid);
3938 cache = search_cache_extent(inode_cache, 0);
3941 node = container_of(cache, struct ptr_node, cache);
3943 remove_cache_extent(inode_cache, &node->cache);
3945 if (rec->ino == root_dirid ||
3946 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947 free_inode_rec(rec);
3951 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952 ret = check_orphan_item(root, rec->ino);
3954 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955 if (can_free_inode_rec(rec)) {
3956 free_inode_rec(rec);
3961 if (!rec->found_inode_item)
3962 rec->errors |= I_ERR_NO_INODE_ITEM;
3963 if (rec->found_link != rec->nlink)
3964 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3966 ret = try_repair_inode(root, rec);
3967 if (ret == 0 && can_free_inode_rec(rec)) {
3968 free_inode_rec(rec);
3974 if (!(repair && ret == 0))
3976 print_inode_error(root, rec);
3977 list_for_each_entry(backref, &rec->backrefs, list) {
3978 if (!backref->found_dir_item)
3979 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980 if (!backref->found_dir_index)
3981 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982 if (!backref->found_inode_ref)
3983 backref->errors |= REF_ERR_NO_INODE_REF;
3984 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985 " namelen %u name %s filetype %d errors %x",
3986 (unsigned long long)backref->dir,
3987 (unsigned long long)backref->index,
3988 backref->namelen, backref->name,
3989 backref->filetype, backref->errors);
3990 print_ref_error(backref->errors);
3992 free_inode_rec(rec);
3994 return (error > 0) ? -1 : 0;
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4000 struct cache_extent *cache;
4001 struct root_record *rec = NULL;
4004 cache = lookup_cache_extent(root_cache, objectid, 1);
4006 rec = container_of(cache, struct root_record, cache);
4008 rec = calloc(1, sizeof(*rec));
4010 return ERR_PTR(-ENOMEM);
4011 rec->objectid = objectid;
4012 INIT_LIST_HEAD(&rec->backrefs);
4013 rec->cache.start = objectid;
4014 rec->cache.size = 1;
4016 ret = insert_cache_extent(root_cache, &rec->cache);
4018 return ERR_PTR(-EEXIST);
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024 u64 ref_root, u64 dir, u64 index,
4025 const char *name, int namelen)
4027 struct root_backref *backref;
4029 list_for_each_entry(backref, &rec->backrefs, list) {
4030 if (backref->ref_root != ref_root || backref->dir != dir ||
4031 backref->namelen != namelen)
4033 if (memcmp(name, backref->name, namelen))
4038 backref = calloc(1, sizeof(*backref) + namelen + 1);
4041 backref->ref_root = ref_root;
4043 backref->index = index;
4044 backref->namelen = namelen;
4045 memcpy(backref->name, name, namelen);
4046 backref->name[namelen] = '\0';
4047 list_add_tail(&backref->list, &rec->backrefs);
4051 static void free_root_record(struct cache_extent *cache)
4053 struct root_record *rec;
4054 struct root_backref *backref;
4056 rec = container_of(cache, struct root_record, cache);
4057 while (!list_empty(&rec->backrefs)) {
4058 backref = to_root_backref(rec->backrefs.next);
4059 list_del(&backref->list);
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4068 static int add_root_backref(struct cache_tree *root_cache,
4069 u64 root_id, u64 ref_root, u64 dir, u64 index,
4070 const char *name, int namelen,
4071 int item_type, int errors)
4073 struct root_record *rec;
4074 struct root_backref *backref;
4076 rec = get_root_rec(root_cache, root_id);
4077 BUG_ON(IS_ERR(rec));
4078 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4081 backref->errors |= errors;
4083 if (item_type != BTRFS_DIR_ITEM_KEY) {
4084 if (backref->found_dir_index || backref->found_back_ref ||
4085 backref->found_forward_ref) {
4086 if (backref->index != index)
4087 backref->errors |= REF_ERR_INDEX_UNMATCH;
4089 backref->index = index;
4093 if (item_type == BTRFS_DIR_ITEM_KEY) {
4094 if (backref->found_forward_ref)
4096 backref->found_dir_item = 1;
4097 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098 backref->found_dir_index = 1;
4099 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100 if (backref->found_forward_ref)
4101 backref->errors |= REF_ERR_DUP_ROOT_REF;
4102 else if (backref->found_dir_item)
4104 backref->found_forward_ref = 1;
4105 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106 if (backref->found_back_ref)
4107 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108 backref->found_back_ref = 1;
4113 if (backref->found_forward_ref && backref->found_dir_item)
4114 backref->reachable = 1;
4118 static int merge_root_recs(struct btrfs_root *root,
4119 struct cache_tree *src_cache,
4120 struct cache_tree *dst_cache)
4122 struct cache_extent *cache;
4123 struct ptr_node *node;
4124 struct inode_record *rec;
4125 struct inode_backref *backref;
4128 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129 free_inode_recs_tree(src_cache);
4134 cache = search_cache_extent(src_cache, 0);
4137 node = container_of(cache, struct ptr_node, cache);
4139 remove_cache_extent(src_cache, &node->cache);
4142 ret = is_child_root(root, root->objectid, rec->ino);
4148 list_for_each_entry(backref, &rec->backrefs, list) {
4149 BUG_ON(backref->found_inode_ref);
4150 if (backref->found_dir_item)
4151 add_root_backref(dst_cache, rec->ino,
4152 root->root_key.objectid, backref->dir,
4153 backref->index, backref->name,
4154 backref->namelen, BTRFS_DIR_ITEM_KEY,
4156 if (backref->found_dir_index)
4157 add_root_backref(dst_cache, rec->ino,
4158 root->root_key.objectid, backref->dir,
4159 backref->index, backref->name,
4160 backref->namelen, BTRFS_DIR_INDEX_KEY,
4164 free_inode_rec(rec);
4171 static int check_root_refs(struct btrfs_root *root,
4172 struct cache_tree *root_cache)
4174 struct root_record *rec;
4175 struct root_record *ref_root;
4176 struct root_backref *backref;
4177 struct cache_extent *cache;
4183 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184 BUG_ON(IS_ERR(rec));
4187 /* fixme: this can not detect circular references */
4190 cache = search_cache_extent(root_cache, 0);
4194 rec = container_of(cache, struct root_record, cache);
4195 cache = next_cache_extent(cache);
4197 if (rec->found_ref == 0)
4200 list_for_each_entry(backref, &rec->backrefs, list) {
4201 if (!backref->reachable)
4204 ref_root = get_root_rec(root_cache,
4206 BUG_ON(IS_ERR(ref_root));
4207 if (ref_root->found_ref > 0)
4210 backref->reachable = 0;
4212 if (rec->found_ref == 0)
4218 cache = search_cache_extent(root_cache, 0);
4222 rec = container_of(cache, struct root_record, cache);
4223 cache = next_cache_extent(cache);
4225 if (rec->found_ref == 0 &&
4226 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228 ret = check_orphan_item(root->fs_info->tree_root,
4234 * If we don't have a root item then we likely just have
4235 * a dir item in a snapshot for this root but no actual
4236 * ref key or anything so it's meaningless.
4238 if (!rec->found_root_item)
4241 fprintf(stderr, "fs tree %llu not referenced\n",
4242 (unsigned long long)rec->objectid);
4246 if (rec->found_ref > 0 && !rec->found_root_item)
4248 list_for_each_entry(backref, &rec->backrefs, list) {
4249 if (!backref->found_dir_item)
4250 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251 if (!backref->found_dir_index)
4252 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253 if (!backref->found_back_ref)
4254 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255 if (!backref->found_forward_ref)
4256 backref->errors |= REF_ERR_NO_ROOT_REF;
4257 if (backref->reachable && backref->errors)
4264 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265 (unsigned long long)rec->objectid, rec->found_ref,
4266 rec->found_root_item ? "" : "not found");
4268 list_for_each_entry(backref, &rec->backrefs, list) {
4269 if (!backref->reachable)
4271 if (!backref->errors && rec->found_root_item)
4273 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274 " index %llu namelen %u name %s errors %x\n",
4275 (unsigned long long)backref->ref_root,
4276 (unsigned long long)backref->dir,
4277 (unsigned long long)backref->index,
4278 backref->namelen, backref->name,
4280 print_ref_error(backref->errors);
4283 return errors > 0 ? 1 : 0;
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287 struct btrfs_key *key,
4288 struct cache_tree *root_cache)
4294 struct btrfs_root_ref *ref;
4295 char namebuf[BTRFS_NAME_LEN];
4298 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4300 dirid = btrfs_root_ref_dirid(eb, ref);
4301 index = btrfs_root_ref_sequence(eb, ref);
4302 name_len = btrfs_root_ref_name_len(eb, ref);
4304 if (name_len <= BTRFS_NAME_LEN) {
4308 len = BTRFS_NAME_LEN;
4309 error = REF_ERR_NAME_TOO_LONG;
4311 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4313 if (key->type == BTRFS_ROOT_REF_KEY) {
4314 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315 index, namebuf, len, key->type, error);
4317 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318 index, namebuf, len, key->type, error);
4323 static void free_corrupt_block(struct cache_extent *cache)
4325 struct btrfs_corrupt_block *corrupt;
4327 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4334 * Repair the btree of the given root.
4336 * The fix is to remove the node key in corrupt_blocks cache_tree.
4337 * and rebalance the tree.
4338 * After the fix, the btree should be writeable.
4340 static int repair_btree(struct btrfs_root *root,
4341 struct cache_tree *corrupt_blocks)
4343 struct btrfs_trans_handle *trans;
4344 struct btrfs_path path;
4345 struct btrfs_corrupt_block *corrupt;
4346 struct cache_extent *cache;
4347 struct btrfs_key key;
4352 if (cache_tree_empty(corrupt_blocks))
4355 trans = btrfs_start_transaction(root, 1);
4356 if (IS_ERR(trans)) {
4357 ret = PTR_ERR(trans);
4358 fprintf(stderr, "Error starting transaction: %s\n",
4362 btrfs_init_path(&path);
4363 cache = first_cache_extent(corrupt_blocks);
4365 corrupt = container_of(cache, struct btrfs_corrupt_block,
4367 level = corrupt->level;
4368 path.lowest_level = level;
4369 key.objectid = corrupt->key.objectid;
4370 key.type = corrupt->key.type;
4371 key.offset = corrupt->key.offset;
4374 * Here we don't want to do any tree balance, since it may
4375 * cause a balance with corrupted brother leaf/node,
4376 * so ins_len set to 0 here.
4377 * Balance will be done after all corrupt node/leaf is deleted.
4379 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4382 offset = btrfs_node_blockptr(path.nodes[level],
4385 /* Remove the ptr */
4386 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4390 * Remove the corresponding extent
4391 * return value is not concerned.
4393 btrfs_release_path(&path);
4394 ret = btrfs_free_extent(trans, root, offset,
4395 root->fs_info->nodesize, 0,
4396 root->root_key.objectid, level - 1, 0);
4397 cache = next_cache_extent(cache);
4400 /* Balance the btree using btrfs_search_slot() */
4401 cache = first_cache_extent(corrupt_blocks);
4403 corrupt = container_of(cache, struct btrfs_corrupt_block,
4405 memcpy(&key, &corrupt->key, sizeof(key));
4406 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4409 /* return will always >0 since it won't find the item */
4411 btrfs_release_path(&path);
4412 cache = next_cache_extent(cache);
4415 btrfs_commit_transaction(trans, root);
4416 btrfs_release_path(&path);
4420 static int check_fs_root(struct btrfs_root *root,
4421 struct cache_tree *root_cache,
4422 struct walk_control *wc)
4428 struct btrfs_path path;
4429 struct shared_node root_node;
4430 struct root_record *rec;
4431 struct btrfs_root_item *root_item = &root->root_item;
4432 struct cache_tree corrupt_blocks;
4433 struct orphan_data_extent *orphan;
4434 struct orphan_data_extent *tmp;
4435 enum btrfs_tree_block_status status;
4436 struct node_refs nrefs;
4439 * Reuse the corrupt_block cache tree to record corrupted tree block
4441 * Unlike the usage in extent tree check, here we do it in a per
4442 * fs/subvol tree base.
4444 cache_tree_init(&corrupt_blocks);
4445 root->fs_info->corrupt_blocks = &corrupt_blocks;
4447 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448 rec = get_root_rec(root_cache, root->root_key.objectid);
4449 BUG_ON(IS_ERR(rec));
4450 if (btrfs_root_refs(root_item) > 0)
4451 rec->found_root_item = 1;
4454 btrfs_init_path(&path);
4455 memset(&root_node, 0, sizeof(root_node));
4456 cache_tree_init(&root_node.root_cache);
4457 cache_tree_init(&root_node.inode_cache);
4458 memset(&nrefs, 0, sizeof(nrefs));
4460 /* Move the orphan extent record to corresponding inode_record */
4461 list_for_each_entry_safe(orphan, tmp,
4462 &root->orphan_data_extents, list) {
4463 struct inode_record *inode;
4465 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4467 BUG_ON(IS_ERR(inode));
4468 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469 list_move(&orphan->list, &inode->orphan_extents);
4472 level = btrfs_header_level(root->node);
4473 memset(wc->nodes, 0, sizeof(wc->nodes));
4474 wc->nodes[level] = &root_node;
4475 wc->active_node = level;
4476 wc->root_level = level;
4478 /* We may not have checked the root block, lets do that now */
4479 if (btrfs_is_leaf(root->node))
4480 status = btrfs_check_leaf(root, NULL, root->node);
4482 status = btrfs_check_node(root, NULL, root->node);
4483 if (status != BTRFS_TREE_BLOCK_CLEAN)
4486 if (btrfs_root_refs(root_item) > 0 ||
4487 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488 path.nodes[level] = root->node;
4489 extent_buffer_get(root->node);
4490 path.slots[level] = 0;
4492 struct btrfs_key key;
4493 struct btrfs_disk_key found_key;
4495 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496 level = root_item->drop_level;
4497 path.lowest_level = level;
4498 if (level > btrfs_header_level(root->node) ||
4499 level >= BTRFS_MAX_LEVEL) {
4500 error("ignoring invalid drop level: %u", level);
4503 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4506 btrfs_node_key(path.nodes[level], &found_key,
4508 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509 sizeof(found_key)));
4513 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519 wret = walk_up_tree(root, &path, wc, &level);
4526 btrfs_release_path(&path);
4528 if (!cache_tree_empty(&corrupt_blocks)) {
4529 struct cache_extent *cache;
4530 struct btrfs_corrupt_block *corrupt;
4532 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533 root->root_key.objectid);
4534 cache = first_cache_extent(&corrupt_blocks);
4536 corrupt = container_of(cache,
4537 struct btrfs_corrupt_block,
4539 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540 cache->start, corrupt->level,
4541 corrupt->key.objectid, corrupt->key.type,
4542 corrupt->key.offset);
4543 cache = next_cache_extent(cache);
4546 printf("Try to repair the btree for root %llu\n",
4547 root->root_key.objectid);
4548 ret = repair_btree(root, &corrupt_blocks);
4550 fprintf(stderr, "Failed to repair btree: %s\n",
4553 printf("Btree for root %llu is fixed\n",
4554 root->root_key.objectid);
4558 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4562 if (root_node.current) {
4563 root_node.current->checked = 1;
4564 maybe_free_inode_rec(&root_node.inode_cache,
4568 err = check_inode_recs(root, &root_node.inode_cache);
4572 free_corrupt_blocks_tree(&corrupt_blocks);
4573 root->fs_info->corrupt_blocks = NULL;
4574 free_orphan_data_extents(&root->orphan_data_extents);
4578 static int fs_root_objectid(u64 objectid)
4580 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4583 return is_fstree(objectid);
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587 struct cache_tree *root_cache)
4589 struct btrfs_path path;
4590 struct btrfs_key key;
4591 struct walk_control wc;
4592 struct extent_buffer *leaf, *tree_node;
4593 struct btrfs_root *tmp_root;
4594 struct btrfs_root *tree_root = fs_info->tree_root;
4598 if (ctx.progress_enabled) {
4599 ctx.tp = TASK_FS_ROOTS;
4600 task_start(ctx.info);
4604 * Just in case we made any changes to the extent tree that weren't
4605 * reflected into the free space cache yet.
4608 reset_cached_block_groups(fs_info);
4609 memset(&wc, 0, sizeof(wc));
4610 cache_tree_init(&wc.shared);
4611 btrfs_init_path(&path);
4616 key.type = BTRFS_ROOT_ITEM_KEY;
4617 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4622 tree_node = tree_root->node;
4624 if (tree_node != tree_root->node) {
4625 free_root_recs_tree(root_cache);
4626 btrfs_release_path(&path);
4629 leaf = path.nodes[0];
4630 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631 ret = btrfs_next_leaf(tree_root, &path);
4637 leaf = path.nodes[0];
4639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641 fs_root_objectid(key.objectid)) {
4642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643 tmp_root = btrfs_read_fs_root_no_cache(
4646 key.offset = (u64)-1;
4647 tmp_root = btrfs_read_fs_root(
4650 if (IS_ERR(tmp_root)) {
4654 ret = check_fs_root(tmp_root, root_cache, &wc);
4655 if (ret == -EAGAIN) {
4656 free_root_recs_tree(root_cache);
4657 btrfs_release_path(&path);
4662 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663 btrfs_free_fs_root(tmp_root);
4664 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665 key.type == BTRFS_ROOT_BACKREF_KEY) {
4666 process_root_ref(leaf, path.slots[0], &key,
4673 btrfs_release_path(&path);
4675 free_extent_cache_tree(&wc.shared);
4676 if (!cache_tree_empty(&wc.shared))
4677 fprintf(stderr, "warning line %d\n", __LINE__);
4679 task_stop(ctx.info);
4685 * Find the @index according by @ino and name.
4686 * Notice:time efficiency is O(N)
4688 * @root: the root of the fs/file tree
4689 * @index_ret: the index as return value
4690 * @namebuf: the name to match
4691 * @name_len: the length of name to match
4692 * @file_type: the file_type of INODE_ITEM to match
4694 * Returns 0 if found and *@index_ret will be modified with right value
4695 * Returns< 0 not found and *@index_ret will be (u64)-1
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698 u64 *index_ret, char *namebuf, u32 name_len,
4701 struct btrfs_path path;
4702 struct extent_buffer *node;
4703 struct btrfs_dir_item *di;
4704 struct btrfs_key key;
4705 struct btrfs_key location;
4706 char name[BTRFS_NAME_LEN] = {0};
4718 /* search from the last index */
4719 key.objectid = dirid;
4720 key.offset = (u64)-1;
4721 key.type = BTRFS_DIR_INDEX_KEY;
4723 btrfs_init_path(&path);
4724 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4729 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4732 *index_ret = (64)-1;
4735 /* Check whether inode_id/filetype/name match */
4736 node = path.nodes[0];
4737 slot = path.slots[0];
4738 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4742 len = btrfs_dir_name_len(node, di);
4743 data_len = btrfs_dir_data_len(node, di);
4745 btrfs_dir_item_key_to_cpu(node, di, &location);
4746 if (location.objectid != location_id ||
4747 location.type != BTRFS_INODE_ITEM_KEY ||
4748 location.offset != 0)
4751 filetype = btrfs_dir_type(node, di);
4752 if (file_type != filetype)
4755 if (len > BTRFS_NAME_LEN)
4756 len = BTRFS_NAME_LEN;
4758 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759 if (len != name_len || strncmp(namebuf, name, len))
4762 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763 *index_ret = key.offset;
4767 len += sizeof(*di) + data_len;
4768 di = (struct btrfs_dir_item *)((char *)di + len);
4774 btrfs_release_path(&path);
4779 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780 * INODE_REF/INODE_EXTREF match.
4782 * @root: the root of the fs/file tree
4783 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784 * value while find index
4785 * @location_key: location key of the struct btrfs_dir_item to match
4786 * @name: the name to match
4787 * @namelen: the length of name
4788 * @file_type: the type of file to math
4790 * Return 0 if no error occurred.
4791 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792 * DIR_ITEM/DIR_INDEX
4793 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794 * and DIR_ITEM/DIR_INDEX mismatch
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797 struct btrfs_key *location_key, char *name,
4798 u32 namelen, u8 file_type)
4800 struct btrfs_path path;
4801 struct extent_buffer *node;
4802 struct btrfs_dir_item *di;
4803 struct btrfs_key location;
4804 char namebuf[BTRFS_NAME_LEN] = {0};
4813 /* get the index by traversing all index */
4814 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815 ret = find_dir_index(root, key->objectid,
4816 location_key->objectid, &key->offset,
4817 name, namelen, file_type);
4819 ret = DIR_INDEX_MISSING;
4823 btrfs_init_path(&path);
4824 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4826 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4831 /* Check whether inode_id/filetype/name match */
4832 node = path.nodes[0];
4833 slot = path.slots[0];
4834 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835 total = btrfs_item_size_nr(node, slot);
4836 while (cur < total) {
4837 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4840 len = btrfs_dir_name_len(node, di);
4841 data_len = btrfs_dir_data_len(node, di);
4843 btrfs_dir_item_key_to_cpu(node, di, &location);
4844 if (location.objectid != location_key->objectid ||
4845 location.type != location_key->type ||
4846 location.offset != location_key->offset)
4849 filetype = btrfs_dir_type(node, di);
4850 if (file_type != filetype)
4853 if (len > BTRFS_NAME_LEN) {
4854 len = BTRFS_NAME_LEN;
4855 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4857 key->type == BTRFS_DIR_ITEM_KEY ?
4858 "DIR_ITEM" : "DIR_INDEX",
4859 key->objectid, key->offset, len);
4861 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4863 if (len != namelen || strncmp(namebuf, name, len))
4869 len += sizeof(*di) + data_len;
4870 di = (struct btrfs_dir_item *)((char *)di + len);
4875 btrfs_release_path(&path);
4880 * Prints inode ref error message
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883 u64 index, const char *namebuf, int name_len,
4884 u8 filetype, int err)
4889 /* root dir error */
4890 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4892 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893 root->objectid, key->objectid, key->offset, namebuf);
4898 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900 root->objectid, key->offset,
4901 btrfs_name_hash(namebuf, name_len),
4902 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4904 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906 root->objectid, key->offset, index,
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4912 * Insert the missing inode item.
4914 * Returns 0 means success.
4915 * Returns <0 means error.
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4920 struct btrfs_key key;
4921 struct btrfs_trans_handle *trans;
4922 struct btrfs_path path;
4926 key.type = BTRFS_INODE_ITEM_KEY;
4929 btrfs_init_path(&path);
4930 trans = btrfs_start_transaction(root, 1);
4931 if (IS_ERR(trans)) {
4936 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937 if (ret < 0 || !ret)
4940 /* insert inode item */
4941 create_inode_item_lowmem(trans, root, ino, filetype);
4944 btrfs_commit_transaction(trans, root);
4947 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948 root->objectid, ino);
4949 btrfs_release_path(&path);
4954 * The ternary means dir item, dir index and relative inode ref.
4955 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4958 * If two of three is missing or mismatched, delete the existing one.
4959 * If one of three is missing or mismatched, add the missing one.
4961 * returns 0 means success.
4962 * returns not 0 means on error;
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965 u64 index, char *name, int name_len, u8 filetype,
4968 struct btrfs_trans_handle *trans;
4973 * stage shall be one of following valild values:
4974 * 0: Fine, nothing to do.
4975 * 1: One of three is wrong, so add missing one.
4976 * 2: Two of three is wrong, so delete existed one.
4978 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4980 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4982 if (err & (INODE_REF_MISSING))
4985 /* stage must be smllarer than 3 */
4988 trans = btrfs_start_transaction(root, 1);
4990 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4995 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996 filetype, &index, 1, 1);
5000 btrfs_commit_transaction(trans, root);
5003 error("fail to repair inode %llu name %s filetype %u",
5004 ino, name, filetype);
5006 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007 stage == 2 ? "Delete" : "Add",
5008 ino, name, filetype);
5014 * Traverse the given INODE_REF and call find_dir_item() to find related
5015 * DIR_ITEM/DIR_INDEX.
5017 * @root: the root of the fs/file tree
5018 * @ref_key: the key of the INODE_REF
5019 * @path the path provides node and slot
5020 * @refs: the count of INODE_REF
5021 * @mode: the st_mode of INODE_ITEM
5022 * @name_ret: returns with the first ref's name
5023 * @name_len_ret: len of the name_ret
5025 * Return 0 if no error occurred.
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028 struct btrfs_path *path, char *name_ret,
5029 u32 *namelen_ret, u64 *refs_ret, int mode)
5031 struct btrfs_key key;
5032 struct btrfs_key location;
5033 struct btrfs_inode_ref *ref;
5034 struct extent_buffer *node;
5035 char namebuf[BTRFS_NAME_LEN] = {0};
5045 int need_research = 0;
5053 /* since after repair, path and the dir item may be changed */
5054 if (need_research) {
5056 btrfs_release_path(path);
5057 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058 /* the item was deleted, let path point to the last checked item */
5060 if (path->slots[0] == 0)
5061 btrfs_prev_leaf(root, path);
5069 location.objectid = ref_key->objectid;
5070 location.type = BTRFS_INODE_ITEM_KEY;
5071 location.offset = 0;
5072 node = path->nodes[0];
5073 slot = path->slots[0];
5075 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077 total = btrfs_item_size_nr(node, slot);
5080 /* Update inode ref count */
5083 index = btrfs_inode_ref_index(node, ref);
5084 name_len = btrfs_inode_ref_name_len(node, ref);
5086 if (name_len <= BTRFS_NAME_LEN) {
5089 len = BTRFS_NAME_LEN;
5090 warning("root %llu INODE_REF[%llu %llu] name too long",
5091 root->objectid, ref_key->objectid, ref_key->offset);
5094 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5096 /* copy the first name found to name_ret */
5097 if (refs == 1 && name_ret) {
5098 memcpy(name_ret, namebuf, len);
5102 /* Check root dir ref */
5103 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104 if (index != 0 || len != strlen("..") ||
5105 strncmp("..", namebuf, len) ||
5106 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107 /* set err bits then repair will delete the ref */
5108 err |= DIR_INDEX_MISSING;
5109 err |= DIR_ITEM_MISSING;
5114 /* Find related DIR_INDEX */
5115 key.objectid = ref_key->offset;
5116 key.type = BTRFS_DIR_INDEX_KEY;
5118 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119 imode_to_type(mode));
5121 /* Find related dir_item */
5122 key.objectid = ref_key->offset;
5123 key.type = BTRFS_DIR_ITEM_KEY;
5124 key.offset = btrfs_name_hash(namebuf, len);
5125 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126 imode_to_type(mode));
5128 if (tmp_err && repair) {
5129 ret = repair_ternary_lowmem(root, ref_key->offset,
5130 ref_key->objectid, index, namebuf,
5131 name_len, imode_to_type(mode),
5138 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139 imode_to_type(mode), tmp_err);
5141 len = sizeof(*ref) + name_len;
5142 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5153 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154 * DIR_ITEM/DIR_INDEX.
5156 * @root: the root of the fs/file tree
5157 * @ref_key: the key of the INODE_EXTREF
5158 * @refs: the count of INODE_EXTREF
5159 * @mode: the st_mode of INODE_ITEM
5161 * Return 0 if no error occurred.
5163 static int check_inode_extref(struct btrfs_root *root,
5164 struct btrfs_key *ref_key,
5165 struct extent_buffer *node, int slot, u64 *refs,
5168 struct btrfs_key key;
5169 struct btrfs_key location;
5170 struct btrfs_inode_extref *extref;
5171 char namebuf[BTRFS_NAME_LEN] = {0};
5181 location.objectid = ref_key->objectid;
5182 location.type = BTRFS_INODE_ITEM_KEY;
5183 location.offset = 0;
5185 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186 total = btrfs_item_size_nr(node, slot);
5189 /* update inode ref count */
5191 name_len = btrfs_inode_extref_name_len(node, extref);
5192 index = btrfs_inode_extref_index(node, extref);
5193 parent = btrfs_inode_extref_parent(node, extref);
5194 if (name_len <= BTRFS_NAME_LEN) {
5197 len = BTRFS_NAME_LEN;
5198 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199 root->objectid, ref_key->objectid, ref_key->offset);
5201 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5203 /* Check root dir ref name */
5204 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206 root->objectid, ref_key->objectid, ref_key->offset,
5208 err |= ROOT_DIR_ERROR;
5211 /* find related dir_index */
5212 key.objectid = parent;
5213 key.type = BTRFS_DIR_INDEX_KEY;
5215 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5218 /* find related dir_item */
5219 key.objectid = parent;
5220 key.type = BTRFS_DIR_ITEM_KEY;
5221 key.offset = btrfs_name_hash(namebuf, len);
5222 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5225 len = sizeof(*extref) + name_len;
5226 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5236 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237 * DIR_ITEM/DIR_INDEX match.
5238 * Return with @index_ret.
5240 * @root: the root of the fs/file tree
5241 * @key: the key of the INODE_REF/INODE_EXTREF
5242 * @name: the name in the INODE_REF/INODE_EXTREF
5243 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5244 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5245 * value (64)-1 means do not check index
5246 * @ext_ref: the EXTENDED_IREF feature
5248 * Return 0 if no error occurred.
5249 * Return >0 for error bitmap
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252 char *name, int namelen, u64 *index_ret,
5253 unsigned int ext_ref)
5255 struct btrfs_path path;
5256 struct btrfs_inode_ref *ref;
5257 struct btrfs_inode_extref *extref;
5258 struct extent_buffer *node;
5259 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5272 btrfs_init_path(&path);
5273 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5275 ret = INODE_REF_MISSING;
5279 node = path.nodes[0];
5280 slot = path.slots[0];
5282 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283 total = btrfs_item_size_nr(node, slot);
5285 /* Iterate all entry of INODE_REF */
5286 while (cur < total) {
5287 ret = INODE_REF_MISSING;
5289 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290 ref_index = btrfs_inode_ref_index(node, ref);
5291 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5294 if (cur + sizeof(*ref) + ref_namelen > total ||
5295 ref_namelen > BTRFS_NAME_LEN) {
5296 warning("root %llu INODE %s[%llu %llu] name too long",
5298 key->type == BTRFS_INODE_REF_KEY ?
5300 key->objectid, key->offset);
5302 if (cur + sizeof(*ref) > total)
5304 len = min_t(u32, total - cur - sizeof(*ref),
5310 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5313 if (len != namelen || strncmp(ref_namebuf, name, len))
5316 *index_ret = ref_index;
5320 len = sizeof(*ref) + ref_namelen;
5321 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5326 /* Skip if not support EXTENDED_IREF feature */
5330 btrfs_release_path(&path);
5331 btrfs_init_path(&path);
5333 dir_id = key->offset;
5334 key->type = BTRFS_INODE_EXTREF_KEY;
5335 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5337 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5339 ret = INODE_REF_MISSING;
5343 node = path.nodes[0];
5344 slot = path.slots[0];
5346 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5348 total = btrfs_item_size_nr(node, slot);
5350 /* Iterate all entry of INODE_EXTREF */
5351 while (cur < total) {
5352 ret = INODE_REF_MISSING;
5354 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355 ref_index = btrfs_inode_extref_index(node, extref);
5356 parent = btrfs_inode_extref_parent(node, extref);
5357 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5360 if (parent != dir_id)
5363 if (ref_namelen <= BTRFS_NAME_LEN) {
5366 len = BTRFS_NAME_LEN;
5367 warning("root %llu INODE %s[%llu %llu] name too long",
5369 key->type == BTRFS_INODE_REF_KEY ?
5371 key->objectid, key->offset);
5373 read_extent_buffer(node, ref_namebuf,
5374 (unsigned long)(extref + 1), len);
5376 if (len != namelen || strncmp(ref_namebuf, name, len))
5379 *index_ret = ref_index;
5384 len = sizeof(*extref) + ref_namelen;
5385 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5390 btrfs_release_path(&path);
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395 u64 ino, u64 index, const char *namebuf,
5396 int name_len, u8 filetype, int err)
5398 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400 root->objectid, key->objectid, key->offset, namebuf,
5402 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5405 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407 root->objectid, key->objectid, index, namebuf, filetype,
5408 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5411 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5413 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414 root->objectid, ino, index, namebuf, filetype,
5415 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5418 if (err & INODE_REF_MISSING)
5420 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421 root->objectid, ino, key->objectid, namebuf, filetype);
5426 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5428 * Returns error after repair
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431 u64 index, u8 filetype, char *namebuf, u32 name_len,
5436 if (err & INODE_ITEM_MISSING) {
5437 ret = repair_inode_item_missing(root, ino, filetype);
5439 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5442 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444 name_len, filetype, err);
5446 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448 err &= ~(INODE_REF_MISSING);
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5457 struct btrfs_key key;
5458 struct btrfs_path path;
5460 struct btrfs_dir_item *di;
5470 key.offset = (u64)-1;
5472 btrfs_init_path(&path);
5473 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5478 /* if found, go to spacial case */
5483 ret = btrfs_previous_item(root, &path, ino, type);
5491 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5493 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5495 while (cur < total) {
5496 len = btrfs_dir_name_len(path.nodes[0], di);
5497 if (len > BTRFS_NAME_LEN)
5498 len = BTRFS_NAME_LEN;
5501 len += btrfs_dir_data_len(path.nodes[0], di);
5503 di = (struct btrfs_dir_item *)((char *)di + len);
5509 btrfs_release_path(&path);
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5520 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5524 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5528 *size = item_size + index_size;
5532 error("failed to count root %llu INODE[%llu] root size",
5533 root->objectid, ino);
5538 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5541 * @root: the root of the fs/file tree
5542 * @key: the key of the INODE_REF/INODE_EXTREF
5544 * @size: the st_size of the INODE_ITEM
5545 * @ext_ref: the EXTENDED_IREF feature
5547 * Return 0 if no error occurred.
5548 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551 struct btrfs_path *path, u64 *size,
5552 unsigned int ext_ref)
5554 struct btrfs_dir_item *di;
5555 struct btrfs_inode_item *ii;
5556 struct btrfs_key key;
5557 struct btrfs_key location;
5558 struct extent_buffer *node;
5560 char namebuf[BTRFS_NAME_LEN] = {0};
5572 int need_research = 0;
5575 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576 * ignore index check.
5578 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579 index = di_key->offset;
5586 /* since after repair, path and the dir item may be changed */
5587 if (need_research) {
5589 err |= DIR_COUNT_AGAIN;
5590 btrfs_release_path(path);
5591 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592 /* the item was deleted, let path point the last checked item */
5594 if (path->slots[0] == 0)
5595 btrfs_prev_leaf(root, path);
5603 node = path->nodes[0];
5604 slot = path->slots[0];
5606 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607 total = btrfs_item_size_nr(node, slot);
5608 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5610 while (cur < total) {
5611 data_len = btrfs_dir_data_len(node, di);
5614 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5616 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617 di_key->objectid, di_key->offset, data_len);
5619 name_len = btrfs_dir_name_len(node, di);
5620 if (name_len <= BTRFS_NAME_LEN) {
5623 len = BTRFS_NAME_LEN;
5624 warning("root %llu %s[%llu %llu] name too long",
5626 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627 di_key->objectid, di_key->offset);
5629 (*size) += name_len;
5630 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5632 filetype = btrfs_dir_type(node, di);
5634 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635 di_key->offset != btrfs_name_hash(namebuf, len)) {
5637 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638 root->objectid, di_key->objectid, di_key->offset,
5639 namebuf, len, filetype, di_key->offset,
5640 btrfs_name_hash(namebuf, len));
5643 btrfs_dir_item_key_to_cpu(node, di, &location);
5644 /* Ignore related ROOT_ITEM check */
5645 if (location.type == BTRFS_ROOT_ITEM_KEY)
5648 btrfs_release_path(path);
5649 /* Check relative INODE_ITEM(existence/filetype) */
5650 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5652 tmp_err |= INODE_ITEM_MISSING;
5656 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657 struct btrfs_inode_item);
5658 mode = btrfs_inode_mode(path->nodes[0], ii);
5659 if (imode_to_type(mode) != filetype) {
5660 tmp_err |= INODE_ITEM_MISMATCH;
5664 /* Check relative INODE_REF/INODE_EXTREF */
5665 key.objectid = location.objectid;
5666 key.type = BTRFS_INODE_REF_KEY;
5667 key.offset = di_key->objectid;
5668 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5671 /* check relative INDEX/ITEM */
5672 key.objectid = di_key->objectid;
5673 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674 key.type = BTRFS_DIR_INDEX_KEY;
5677 key.type = BTRFS_DIR_ITEM_KEY;
5678 key.offset = btrfs_name_hash(namebuf, name_len);
5681 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682 name_len, filetype);
5683 /* find_dir_item may find index */
5684 if (key.type == BTRFS_DIR_INDEX_KEY)
5688 if (tmp_err && repair) {
5689 ret = repair_dir_item(root, di_key->objectid,
5690 location.objectid, index,
5691 imode_to_type(mode), namebuf,
5693 if (ret != tmp_err) {
5698 btrfs_release_path(path);
5699 print_dir_item_err(root, di_key, location.objectid, index,
5700 namebuf, name_len, filetype, tmp_err);
5702 len = sizeof(*di) + name_len + data_len;
5703 di = (struct btrfs_dir_item *)((char *)di + len);
5706 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708 root->objectid, di_key->objectid,
5715 btrfs_release_path(path);
5716 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5718 err |= ret > 0 ? -ENOENT : ret;
5723 * Wrapper function of btrfs_punch_hole.
5725 * Returns 0 means success.
5726 * Returns not 0 means error.
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5731 struct btrfs_trans_handle *trans;
5734 trans = btrfs_start_transaction(root, 1);
5736 return PTR_ERR(trans);
5738 ret = btrfs_punch_hole(trans, root, ino, start, len);
5740 error("failed to add hole [%llu, %llu] in inode [%llu]",
5743 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5746 btrfs_commit_transaction(trans, root);
5751 * Check file extent datasum/hole, update the size of the file extents,
5752 * check and update the last offset of the file extent.
5754 * @root: the root of fs/file tree.
5755 * @fkey: the key of the file extent.
5756 * @nodatasum: INODE_NODATASUM feature.
5757 * @size: the sum of all EXTENT_DATA items size for this inode.
5758 * @end: the offset of the last extent.
5760 * Return 0 if no error occurred.
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763 struct extent_buffer *node, int slot,
5764 unsigned int nodatasum, u64 *size, u64 *end)
5766 struct btrfs_file_extent_item *fi;
5769 u64 extent_num_bytes;
5771 u64 csum_found; /* In byte size, sectorsize aligned */
5772 u64 search_start; /* Logical range start we search for csum */
5773 u64 search_len; /* Logical range len we search for csum */
5774 unsigned int extent_type;
5775 unsigned int is_hole;
5780 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5782 /* Check inline extent */
5783 extent_type = btrfs_file_extent_type(node, fi);
5784 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785 struct btrfs_item *e = btrfs_item_nr(slot);
5786 u32 item_inline_len;
5788 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790 compressed = btrfs_file_extent_compression(node, fi);
5791 if (extent_num_bytes == 0) {
5793 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794 root->objectid, fkey->objectid, fkey->offset);
5795 err |= FILE_EXTENT_ERROR;
5797 if (!compressed && extent_num_bytes != item_inline_len) {
5799 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800 root->objectid, fkey->objectid, fkey->offset,
5801 extent_num_bytes, item_inline_len);
5802 err |= FILE_EXTENT_ERROR;
5804 *end += extent_num_bytes;
5805 *size += extent_num_bytes;
5809 /* Check extent type */
5810 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812 err |= FILE_EXTENT_ERROR;
5813 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814 root->objectid, fkey->objectid, fkey->offset);
5818 /* Check REG_EXTENT/PREALLOC_EXTENT */
5819 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822 extent_offset = btrfs_file_extent_offset(node, fi);
5823 compressed = btrfs_file_extent_compression(node, fi);
5824 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5827 * Check EXTENT_DATA csum
5829 * For plain (uncompressed) extent, we should only check the range
5830 * we're referring to, as it's possible that part of prealloc extent
5831 * has been written, and has csum:
5833 * |<--- Original large preallocated extent A ---->|
5834 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5837 * For compressed extent, we should check the whole range.
5840 search_start = disk_bytenr + extent_offset;
5841 search_len = extent_num_bytes;
5843 search_start = disk_bytenr;
5844 search_len = disk_num_bytes;
5846 ret = count_csum_range(root, search_start, search_len, &csum_found);
5847 if (csum_found > 0 && nodatasum) {
5848 err |= ODD_CSUM_ITEM;
5849 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850 root->objectid, fkey->objectid, fkey->offset);
5851 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852 !is_hole && (ret < 0 || csum_found < search_len)) {
5853 err |= CSUM_ITEM_MISSING;
5854 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855 root->objectid, fkey->objectid, fkey->offset,
5856 csum_found, search_len);
5857 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858 err |= ODD_CSUM_ITEM;
5859 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860 root->objectid, fkey->objectid, fkey->offset, csum_found);
5863 /* Check EXTENT_DATA hole */
5864 if (!no_holes && *end != fkey->offset) {
5866 ret = punch_extent_hole(root, fkey->objectid,
5867 *end, fkey->offset - *end);
5868 if (!repair || ret) {
5869 err |= FILE_EXTENT_ERROR;
5871 "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
5872 root->objectid, fkey->objectid, fkey->offset, *end);
5876 *end += extent_num_bytes;
5878 *size += extent_num_bytes;
5884 * Set inode item nbytes to @nbytes
5886 * Returns 0 on success
5887 * Returns != 0 on error
5889 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5890 struct btrfs_path *path,
5891 u64 ino, u64 nbytes)
5893 struct btrfs_trans_handle *trans;
5894 struct btrfs_inode_item *ii;
5895 struct btrfs_key key;
5896 struct btrfs_key research_key;
5900 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5903 key.type = BTRFS_INODE_ITEM_KEY;
5906 trans = btrfs_start_transaction(root, 1);
5907 if (IS_ERR(trans)) {
5908 ret = PTR_ERR(trans);
5913 btrfs_release_path(path);
5914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5922 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5923 struct btrfs_inode_item);
5924 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5925 btrfs_mark_buffer_dirty(path->nodes[0]);
5927 btrfs_commit_transaction(trans, root);
5930 error("failed to set nbytes in inode %llu root %llu",
5931 ino, root->root_key.objectid);
5933 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5934 root->root_key.objectid, nbytes);
5937 btrfs_release_path(path);
5938 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5945 * Set directory inode isize to @isize.
5947 * Returns 0 on success.
5948 * Returns != 0 on error.
5950 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5951 struct btrfs_path *path,
5954 struct btrfs_trans_handle *trans;
5955 struct btrfs_inode_item *ii;
5956 struct btrfs_key key;
5957 struct btrfs_key research_key;
5961 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5964 key.type = BTRFS_INODE_ITEM_KEY;
5967 trans = btrfs_start_transaction(root, 1);
5968 if (IS_ERR(trans)) {
5969 ret = PTR_ERR(trans);
5974 btrfs_release_path(path);
5975 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5983 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5984 struct btrfs_inode_item);
5985 btrfs_set_inode_size(path->nodes[0], ii, isize);
5986 btrfs_mark_buffer_dirty(path->nodes[0]);
5988 btrfs_commit_transaction(trans, root);
5991 error("failed to set isize in inode %llu root %llu",
5992 ino, root->root_key.objectid);
5994 printf("Set isize in inode %llu root %llu to %llu\n",
5995 ino, root->root_key.objectid, isize);
5997 btrfs_release_path(path);
5998 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6005 * Wrapper function for btrfs_add_orphan_item().
6007 * Returns 0 on success.
6008 * Returns != 0 on error.
6010 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6011 struct btrfs_path *path, u64 ino)
6013 struct btrfs_trans_handle *trans;
6014 struct btrfs_key research_key;
6018 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6020 trans = btrfs_start_transaction(root, 1);
6021 if (IS_ERR(trans)) {
6022 ret = PTR_ERR(trans);
6027 btrfs_release_path(path);
6028 ret = btrfs_add_orphan_item(trans, root, path, ino);
6030 btrfs_commit_transaction(trans, root);
6033 error("failed to add inode %llu as orphan item root %llu",
6034 ino, root->root_key.objectid);
6036 printf("Added inode %llu as orphan item root %llu\n",
6037 ino, root->root_key.objectid);
6039 btrfs_release_path(path);
6040 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6046 /* Set inode_item nlink to @ref_count.
6047 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6049 * Returns 0 on success
6051 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6052 struct btrfs_path *path, u64 ino,
6053 const char *name, u32 namelen,
6054 u64 ref_count, u8 filetype, u64 *nlink)
6056 struct btrfs_trans_handle *trans;
6057 struct btrfs_inode_item *ii;
6058 struct btrfs_key key;
6059 struct btrfs_key old_key;
6060 char namebuf[BTRFS_NAME_LEN] = {0};
6066 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6068 if (name && namelen) {
6069 ASSERT(namelen <= BTRFS_NAME_LEN);
6070 memcpy(namebuf, name, namelen);
6073 sprintf(namebuf, "%llu", ino);
6074 name_len = count_digits(ino);
6075 printf("Can't find file name for inode %llu, use %s instead\n",
6079 trans = btrfs_start_transaction(root, 1);
6080 if (IS_ERR(trans)) {
6081 ret = PTR_ERR(trans);
6085 btrfs_release_path(path);
6086 /* if refs is 0, put it into lostfound */
6087 if (ref_count == 0) {
6088 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6089 name_len, filetype, &ref_count);
6094 /* reset inode_item's nlink to ref_count */
6096 key.type = BTRFS_INODE_ITEM_KEY;
6099 btrfs_release_path(path);
6100 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6106 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6107 struct btrfs_inode_item);
6108 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6109 btrfs_mark_buffer_dirty(path->nodes[0]);
6114 btrfs_commit_transaction(trans, root);
6118 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6119 root->objectid, ino, namebuf, filetype);
6121 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6122 root->objectid, ino, namebuf, filetype);
6125 btrfs_release_path(path);
6126 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6133 * Check INODE_ITEM and related ITEMs (the same inode number)
6134 * 1. check link count
6135 * 2. check inode ref/extref
6136 * 3. check dir item/index
6138 * @ext_ref: the EXTENDED_IREF feature
6140 * Return 0 if no error occurred.
6141 * Return >0 for error or hit the traversal is done(by error bitmap)
6143 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6144 unsigned int ext_ref)
6146 struct extent_buffer *node;
6147 struct btrfs_inode_item *ii;
6148 struct btrfs_key key;
6149 struct btrfs_key last_key;
6158 u64 extent_size = 0;
6160 unsigned int nodatasum;
6164 char namebuf[BTRFS_NAME_LEN] = {0};
6167 node = path->nodes[0];
6168 slot = path->slots[0];
6170 btrfs_item_key_to_cpu(node, &key, slot);
6171 inode_id = key.objectid;
6173 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6174 ret = btrfs_next_item(root, path);
6180 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6181 isize = btrfs_inode_size(node, ii);
6182 nbytes = btrfs_inode_nbytes(node, ii);
6183 mode = btrfs_inode_mode(node, ii);
6184 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6185 nlink = btrfs_inode_nlink(node, ii);
6186 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6189 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6190 ret = btrfs_next_item(root, path);
6192 /* out will fill 'err' rusing current statistics */
6194 } else if (ret > 0) {
6199 node = path->nodes[0];
6200 slot = path->slots[0];
6201 btrfs_item_key_to_cpu(node, &key, slot);
6202 if (key.objectid != inode_id)
6206 case BTRFS_INODE_REF_KEY:
6207 ret = check_inode_ref(root, &key, path, namebuf,
6208 &name_len, &refs, mode);
6211 case BTRFS_INODE_EXTREF_KEY:
6212 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6213 warning("root %llu EXTREF[%llu %llu] isn't supported",
6214 root->objectid, key.objectid,
6216 ret = check_inode_extref(root, &key, node, slot, &refs,
6220 case BTRFS_DIR_ITEM_KEY:
6221 case BTRFS_DIR_INDEX_KEY:
6223 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6224 root->objectid, inode_id,
6225 imode_to_type(mode), key.objectid,
6228 ret = check_dir_item(root, &key, path, &size, ext_ref);
6231 case BTRFS_EXTENT_DATA_KEY:
6233 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6234 root->objectid, inode_id, key.objectid,
6237 ret = check_file_extent(root, &key, node, slot,
6238 nodatasum, &extent_size,
6242 case BTRFS_XATTR_ITEM_KEY:
6245 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6246 key.objectid, key.type, key.offset);
6251 if (err & LAST_ITEM) {
6252 btrfs_release_path(path);
6253 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6258 /* verify INODE_ITEM nlink/isize/nbytes */
6260 if (repair && (err & DIR_COUNT_AGAIN)) {
6261 err &= ~DIR_COUNT_AGAIN;
6262 count_dir_isize(root, inode_id, &size);
6265 if ((nlink != 1 || refs != 1) && repair) {
6266 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6267 namebuf, name_len, refs, imode_to_type(mode),
6272 err |= LINK_COUNT_ERROR;
6273 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6274 root->objectid, inode_id, nlink);
6278 * Just a warning, as dir inode nbytes is just an
6279 * instructive value.
6281 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6282 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6283 root->objectid, inode_id,
6284 root->fs_info->nodesize);
6287 if (isize != size) {
6289 ret = repair_dir_isize_lowmem(root, path,
6291 if (!repair || ret) {
6294 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6295 root->objectid, inode_id, isize, size);
6299 if (nlink != refs) {
6301 ret = repair_inode_nlinks_lowmem(root, path,
6302 inode_id, namebuf, name_len, refs,
6303 imode_to_type(mode), &nlink);
6304 if (!repair || ret) {
6305 err |= LINK_COUNT_ERROR;
6307 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6308 root->objectid, inode_id, nlink, refs);
6310 } else if (!nlink) {
6312 ret = repair_inode_orphan_item_lowmem(root,
6314 if (!repair || ret) {
6316 error("root %llu INODE[%llu] is orphan item",
6317 root->objectid, inode_id);
6321 if (!nbytes && !no_holes && extent_end < isize) {
6323 ret = punch_extent_hole(root, inode_id,
6324 extent_end, isize - extent_end);
6325 if (!repair || ret) {
6326 err |= NBYTES_ERROR;
6328 "root %llu INODE[%llu] size %llu should have a file extent hole",
6329 root->objectid, inode_id, isize);
6333 if (nbytes != extent_size) {
6335 ret = repair_inode_nbytes_lowmem(root, path,
6336 inode_id, extent_size);
6337 if (!repair || ret) {
6338 err |= NBYTES_ERROR;
6340 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6341 root->objectid, inode_id, nbytes,
6347 if (err & LAST_ITEM)
6348 btrfs_next_item(root, path);
6353 * Insert the missing inode item and inode ref.
6355 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6356 * Root dir should be handled specially because root dir is the root of fs.
6358 * returns err (>0 or 0) after repair
6360 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6362 struct btrfs_trans_handle *trans;
6363 struct btrfs_key key;
6364 struct btrfs_path path;
6365 int filetype = BTRFS_FT_DIR;
6368 btrfs_init_path(&path);
6370 if (err & INODE_REF_MISSING) {
6371 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6372 key.type = BTRFS_INODE_REF_KEY;
6373 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6375 trans = btrfs_start_transaction(root, 1);
6376 if (IS_ERR(trans)) {
6377 ret = PTR_ERR(trans);
6381 btrfs_release_path(&path);
6382 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6386 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6387 BTRFS_FIRST_FREE_OBJECTID,
6388 BTRFS_FIRST_FREE_OBJECTID, 0);
6392 printf("Add INODE_REF[%llu %llu] name %s\n",
6393 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6395 err &= ~INODE_REF_MISSING;
6398 error("fail to insert first inode's ref");
6399 btrfs_commit_transaction(trans, root);
6402 if (err & INODE_ITEM_MISSING) {
6403 ret = repair_inode_item_missing(root,
6404 BTRFS_FIRST_FREE_OBJECTID, filetype);
6407 err &= ~INODE_ITEM_MISSING;
6411 error("fail to repair first inode");
6412 btrfs_release_path(&path);
6417 * check first root dir's inode_item and inode_ref
6419 * returns 0 means no error
6420 * returns >0 means error
6421 * returns <0 means fatal error
6423 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6425 struct btrfs_path path;
6426 struct btrfs_key key;
6427 struct btrfs_inode_item *ii;
6433 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6434 key.type = BTRFS_INODE_ITEM_KEY;
6437 /* For root being dropped, we don't need to check first inode */
6438 if (btrfs_root_refs(&root->root_item) == 0 &&
6439 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6440 BTRFS_FIRST_FREE_OBJECTID)
6443 btrfs_init_path(&path);
6444 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6449 err |= INODE_ITEM_MISSING;
6451 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6452 struct btrfs_inode_item);
6453 mode = btrfs_inode_mode(path.nodes[0], ii);
6454 if (imode_to_type(mode) != BTRFS_FT_DIR)
6455 err |= INODE_ITEM_MISMATCH;
6458 /* lookup first inode ref */
6459 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6460 key.type = BTRFS_INODE_REF_KEY;
6461 /* special index value */
6464 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6470 btrfs_release_path(&path);
6473 err = repair_fs_first_inode(root, err);
6475 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6476 error("root dir INODE_ITEM is %s",
6477 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6478 if (err & INODE_REF_MISSING)
6479 error("root dir INODE_REF is missing");
6481 return ret < 0 ? ret : err;
6484 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6485 u64 parent, u64 root)
6487 struct rb_node *node;
6488 struct tree_backref *back = NULL;
6489 struct tree_backref match = {
6496 match.parent = parent;
6497 match.node.full_backref = 1;
6502 node = rb_search(&rec->backref_tree, &match.node.node,
6503 (rb_compare_keys)compare_extent_backref, NULL);
6505 back = to_tree_backref(rb_node_to_extent_backref(node));
6510 static struct data_backref *find_data_backref(struct extent_record *rec,
6511 u64 parent, u64 root,
6512 u64 owner, u64 offset,
6514 u64 disk_bytenr, u64 bytes)
6516 struct rb_node *node;
6517 struct data_backref *back = NULL;
6518 struct data_backref match = {
6525 .found_ref = found_ref,
6526 .disk_bytenr = disk_bytenr,
6530 match.parent = parent;
6531 match.node.full_backref = 1;
6536 node = rb_search(&rec->backref_tree, &match.node.node,
6537 (rb_compare_keys)compare_extent_backref, NULL);
6539 back = to_data_backref(rb_node_to_extent_backref(node));
6544 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6545 * blocks and integrity of fs tree items.
6547 * @root: the root of the tree to be checked.
6548 * @ext_ref feature EXTENDED_IREF is enable or not.
6549 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6550 * otherwise means check fs tree(s) items relationship and
6551 * @root MUST be a fs tree root.
6552 * Returns 0 represents OK.
6553 * Returns not 0 represents error.
6555 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6556 struct btrfs_root *root, unsigned int ext_ref,
6560 struct btrfs_path path;
6561 struct node_refs nrefs;
6562 struct btrfs_root_item *root_item = &root->root_item;
6567 memset(&nrefs, 0, sizeof(nrefs));
6570 * We need to manually check the first inode item (256)
6571 * As the following traversal function will only start from
6572 * the first inode item in the leaf, if inode item (256) is
6573 * missing we will skip it forever.
6575 ret = check_fs_first_inode(root, ext_ref);
6581 level = btrfs_header_level(root->node);
6582 btrfs_init_path(&path);
6584 if (btrfs_root_refs(root_item) > 0 ||
6585 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6586 path.nodes[level] = root->node;
6587 path.slots[level] = 0;
6588 extent_buffer_get(root->node);
6590 struct btrfs_key key;
6592 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6593 level = root_item->drop_level;
6594 path.lowest_level = level;
6595 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6602 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6603 ext_ref, check_all);
6607 /* if ret is negative, walk shall stop */
6613 ret = walk_up_tree_v2(root, &path, &level);
6615 /* Normal exit, reset ret to err */
6622 btrfs_release_path(&path);
6627 * Iterate all items in the tree and call check_inode_item() to check.
6629 * @root: the root of the tree to be checked.
6630 * @ext_ref: the EXTENDED_IREF feature
6632 * Return 0 if no error found.
6633 * Return <0 for error.
6635 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6637 reset_cached_block_groups(root->fs_info);
6638 return check_btrfs_root(NULL, root, ext_ref, 0);
6642 * Find the relative ref for root_ref and root_backref.
6644 * @root: the root of the root tree.
6645 * @ref_key: the key of the root ref.
6647 * Return 0 if no error occurred.
6649 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6650 struct extent_buffer *node, int slot)
6652 struct btrfs_path path;
6653 struct btrfs_key key;
6654 struct btrfs_root_ref *ref;
6655 struct btrfs_root_ref *backref;
6656 char ref_name[BTRFS_NAME_LEN] = {0};
6657 char backref_name[BTRFS_NAME_LEN] = {0};
6663 u32 backref_namelen;
6668 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6669 ref_dirid = btrfs_root_ref_dirid(node, ref);
6670 ref_seq = btrfs_root_ref_sequence(node, ref);
6671 ref_namelen = btrfs_root_ref_name_len(node, ref);
6673 if (ref_namelen <= BTRFS_NAME_LEN) {
6676 len = BTRFS_NAME_LEN;
6677 warning("%s[%llu %llu] ref_name too long",
6678 ref_key->type == BTRFS_ROOT_REF_KEY ?
6679 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6682 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6684 /* Find relative root_ref */
6685 key.objectid = ref_key->offset;
6686 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6687 key.offset = ref_key->objectid;
6689 btrfs_init_path(&path);
6690 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6692 err |= ROOT_REF_MISSING;
6693 error("%s[%llu %llu] couldn't find relative ref",
6694 ref_key->type == BTRFS_ROOT_REF_KEY ?
6695 "ROOT_REF" : "ROOT_BACKREF",
6696 ref_key->objectid, ref_key->offset);
6700 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6701 struct btrfs_root_ref);
6702 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6703 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6704 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6706 if (backref_namelen <= BTRFS_NAME_LEN) {
6707 len = backref_namelen;
6709 len = BTRFS_NAME_LEN;
6710 warning("%s[%llu %llu] ref_name too long",
6711 key.type == BTRFS_ROOT_REF_KEY ?
6712 "ROOT_REF" : "ROOT_BACKREF",
6713 key.objectid, key.offset);
6715 read_extent_buffer(path.nodes[0], backref_name,
6716 (unsigned long)(backref + 1), len);
6718 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6719 ref_namelen != backref_namelen ||
6720 strncmp(ref_name, backref_name, len)) {
6721 err |= ROOT_REF_MISMATCH;
6722 error("%s[%llu %llu] mismatch relative ref",
6723 ref_key->type == BTRFS_ROOT_REF_KEY ?
6724 "ROOT_REF" : "ROOT_BACKREF",
6725 ref_key->objectid, ref_key->offset);
6728 btrfs_release_path(&path);
6733 * Check all fs/file tree in low_memory mode.
6735 * 1. for fs tree root item, call check_fs_root_v2()
6736 * 2. for fs tree root ref/backref, call check_root_ref()
6738 * Return 0 if no error occurred.
6740 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6742 struct btrfs_root *tree_root = fs_info->tree_root;
6743 struct btrfs_root *cur_root = NULL;
6744 struct btrfs_path path;
6745 struct btrfs_key key;
6746 struct extent_buffer *node;
6747 unsigned int ext_ref;
6752 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6754 btrfs_init_path(&path);
6755 key.objectid = BTRFS_FS_TREE_OBJECTID;
6757 key.type = BTRFS_ROOT_ITEM_KEY;
6759 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6763 } else if (ret > 0) {
6769 node = path.nodes[0];
6770 slot = path.slots[0];
6771 btrfs_item_key_to_cpu(node, &key, slot);
6772 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6774 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6775 fs_root_objectid(key.objectid)) {
6776 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6777 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6780 key.offset = (u64)-1;
6781 cur_root = btrfs_read_fs_root(fs_info, &key);
6784 if (IS_ERR(cur_root)) {
6785 error("Fail to read fs/subvol tree: %lld",
6791 ret = check_fs_root_v2(cur_root, ext_ref);
6794 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6795 btrfs_free_fs_root(cur_root);
6796 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6797 key.type == BTRFS_ROOT_BACKREF_KEY) {
6798 ret = check_root_ref(tree_root, &key, node, slot);
6802 ret = btrfs_next_item(tree_root, &path);
6812 btrfs_release_path(&path);
6816 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6817 struct cache_tree *root_cache)
6821 if (!ctx.progress_enabled)
6822 fprintf(stderr, "checking fs roots\n");
6823 if (check_mode == CHECK_MODE_LOWMEM)
6824 ret = check_fs_roots_v2(fs_info);
6826 ret = check_fs_roots(fs_info, root_cache);
6831 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6833 struct extent_backref *back, *tmp;
6834 struct tree_backref *tback;
6835 struct data_backref *dback;
6839 rbtree_postorder_for_each_entry_safe(back, tmp,
6840 &rec->backref_tree, node) {
6841 if (!back->found_extent_tree) {
6845 if (back->is_data) {
6846 dback = to_data_backref(back);
6847 fprintf(stderr, "Data backref %llu %s %llu"
6848 " owner %llu offset %llu num_refs %lu"
6849 " not found in extent tree\n",
6850 (unsigned long long)rec->start,
6851 back->full_backref ?
6853 back->full_backref ?
6854 (unsigned long long)dback->parent:
6855 (unsigned long long)dback->root,
6856 (unsigned long long)dback->owner,
6857 (unsigned long long)dback->offset,
6858 (unsigned long)dback->num_refs);
6860 tback = to_tree_backref(back);
6861 fprintf(stderr, "Tree backref %llu parent %llu"
6862 " root %llu not found in extent tree\n",
6863 (unsigned long long)rec->start,
6864 (unsigned long long)tback->parent,
6865 (unsigned long long)tback->root);
6868 if (!back->is_data && !back->found_ref) {
6872 tback = to_tree_backref(back);
6873 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6874 (unsigned long long)rec->start,
6875 back->full_backref ? "parent" : "root",
6876 back->full_backref ?
6877 (unsigned long long)tback->parent :
6878 (unsigned long long)tback->root, back);
6880 if (back->is_data) {
6881 dback = to_data_backref(back);
6882 if (dback->found_ref != dback->num_refs) {
6886 fprintf(stderr, "Incorrect local backref count"
6887 " on %llu %s %llu owner %llu"
6888 " offset %llu found %u wanted %u back %p\n",
6889 (unsigned long long)rec->start,
6890 back->full_backref ?
6892 back->full_backref ?
6893 (unsigned long long)dback->parent:
6894 (unsigned long long)dback->root,
6895 (unsigned long long)dback->owner,
6896 (unsigned long long)dback->offset,
6897 dback->found_ref, dback->num_refs, back);
6899 if (dback->disk_bytenr != rec->start) {
6903 fprintf(stderr, "Backref disk bytenr does not"
6904 " match extent record, bytenr=%llu, "
6905 "ref bytenr=%llu\n",
6906 (unsigned long long)rec->start,
6907 (unsigned long long)dback->disk_bytenr);
6910 if (dback->bytes != rec->nr) {
6914 fprintf(stderr, "Backref bytes do not match "
6915 "extent backref, bytenr=%llu, ref "
6916 "bytes=%llu, backref bytes=%llu\n",
6917 (unsigned long long)rec->start,
6918 (unsigned long long)rec->nr,
6919 (unsigned long long)dback->bytes);
6922 if (!back->is_data) {
6925 dback = to_data_backref(back);
6926 found += dback->found_ref;
6929 if (found != rec->refs) {
6933 fprintf(stderr, "Incorrect global backref count "
6934 "on %llu found %llu wanted %llu\n",
6935 (unsigned long long)rec->start,
6936 (unsigned long long)found,
6937 (unsigned long long)rec->refs);
6943 static void __free_one_backref(struct rb_node *node)
6945 struct extent_backref *back = rb_node_to_extent_backref(node);
6950 static void free_all_extent_backrefs(struct extent_record *rec)
6952 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6955 static void free_extent_record_cache(struct cache_tree *extent_cache)
6957 struct cache_extent *cache;
6958 struct extent_record *rec;
6961 cache = first_cache_extent(extent_cache);
6964 rec = container_of(cache, struct extent_record, cache);
6965 remove_cache_extent(extent_cache, cache);
6966 free_all_extent_backrefs(rec);
6971 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6972 struct extent_record *rec)
6974 if (rec->content_checked && rec->owner_ref_checked &&
6975 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6976 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6977 !rec->bad_full_backref && !rec->crossing_stripes &&
6978 !rec->wrong_chunk_type) {
6979 remove_cache_extent(extent_cache, &rec->cache);
6980 free_all_extent_backrefs(rec);
6981 list_del_init(&rec->list);
6987 static int check_owner_ref(struct btrfs_root *root,
6988 struct extent_record *rec,
6989 struct extent_buffer *buf)
6991 struct extent_backref *node, *tmp;
6992 struct tree_backref *back;
6993 struct btrfs_root *ref_root;
6994 struct btrfs_key key;
6995 struct btrfs_path path;
6996 struct extent_buffer *parent;
7001 rbtree_postorder_for_each_entry_safe(node, tmp,
7002 &rec->backref_tree, node) {
7005 if (!node->found_ref)
7007 if (node->full_backref)
7009 back = to_tree_backref(node);
7010 if (btrfs_header_owner(buf) == back->root)
7013 BUG_ON(rec->is_root);
7015 /* try to find the block by search corresponding fs tree */
7016 key.objectid = btrfs_header_owner(buf);
7017 key.type = BTRFS_ROOT_ITEM_KEY;
7018 key.offset = (u64)-1;
7020 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7021 if (IS_ERR(ref_root))
7024 level = btrfs_header_level(buf);
7026 btrfs_item_key_to_cpu(buf, &key, 0);
7028 btrfs_node_key_to_cpu(buf, &key, 0);
7030 btrfs_init_path(&path);
7031 path.lowest_level = level + 1;
7032 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7036 parent = path.nodes[level + 1];
7037 if (parent && buf->start == btrfs_node_blockptr(parent,
7038 path.slots[level + 1]))
7041 btrfs_release_path(&path);
7042 return found ? 0 : 1;
7045 static int is_extent_tree_record(struct extent_record *rec)
7047 struct extent_backref *node, *tmp;
7048 struct tree_backref *back;
7051 rbtree_postorder_for_each_entry_safe(node, tmp,
7052 &rec->backref_tree, node) {
7055 back = to_tree_backref(node);
7056 if (node->full_backref)
7058 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7065 static int record_bad_block_io(struct btrfs_fs_info *info,
7066 struct cache_tree *extent_cache,
7069 struct extent_record *rec;
7070 struct cache_extent *cache;
7071 struct btrfs_key key;
7073 cache = lookup_cache_extent(extent_cache, start, len);
7077 rec = container_of(cache, struct extent_record, cache);
7078 if (!is_extent_tree_record(rec))
7081 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7082 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7085 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7086 struct extent_buffer *buf, int slot)
7088 if (btrfs_header_level(buf)) {
7089 struct btrfs_key_ptr ptr1, ptr2;
7091 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7092 sizeof(struct btrfs_key_ptr));
7093 read_extent_buffer(buf, &ptr2,
7094 btrfs_node_key_ptr_offset(slot + 1),
7095 sizeof(struct btrfs_key_ptr));
7096 write_extent_buffer(buf, &ptr1,
7097 btrfs_node_key_ptr_offset(slot + 1),
7098 sizeof(struct btrfs_key_ptr));
7099 write_extent_buffer(buf, &ptr2,
7100 btrfs_node_key_ptr_offset(slot),
7101 sizeof(struct btrfs_key_ptr));
7103 struct btrfs_disk_key key;
7104 btrfs_node_key(buf, &key, 0);
7105 btrfs_fixup_low_keys(root, path, &key,
7106 btrfs_header_level(buf) + 1);
7109 struct btrfs_item *item1, *item2;
7110 struct btrfs_key k1, k2;
7111 char *item1_data, *item2_data;
7112 u32 item1_offset, item2_offset, item1_size, item2_size;
7114 item1 = btrfs_item_nr(slot);
7115 item2 = btrfs_item_nr(slot + 1);
7116 btrfs_item_key_to_cpu(buf, &k1, slot);
7117 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7118 item1_offset = btrfs_item_offset(buf, item1);
7119 item2_offset = btrfs_item_offset(buf, item2);
7120 item1_size = btrfs_item_size(buf, item1);
7121 item2_size = btrfs_item_size(buf, item2);
7123 item1_data = malloc(item1_size);
7126 item2_data = malloc(item2_size);
7132 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7133 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7135 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7136 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7140 btrfs_set_item_offset(buf, item1, item2_offset);
7141 btrfs_set_item_offset(buf, item2, item1_offset);
7142 btrfs_set_item_size(buf, item1, item2_size);
7143 btrfs_set_item_size(buf, item2, item1_size);
7145 path->slots[0] = slot;
7146 btrfs_set_item_key_unsafe(root, path, &k2);
7147 path->slots[0] = slot + 1;
7148 btrfs_set_item_key_unsafe(root, path, &k1);
7153 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7155 struct extent_buffer *buf;
7156 struct btrfs_key k1, k2;
7158 int level = path->lowest_level;
7161 buf = path->nodes[level];
7162 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7164 btrfs_node_key_to_cpu(buf, &k1, i);
7165 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7167 btrfs_item_key_to_cpu(buf, &k1, i);
7168 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7170 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7172 ret = swap_values(root, path, buf, i);
7175 btrfs_mark_buffer_dirty(buf);
7181 static int delete_bogus_item(struct btrfs_root *root,
7182 struct btrfs_path *path,
7183 struct extent_buffer *buf, int slot)
7185 struct btrfs_key key;
7186 int nritems = btrfs_header_nritems(buf);
7188 btrfs_item_key_to_cpu(buf, &key, slot);
7190 /* These are all the keys we can deal with missing. */
7191 if (key.type != BTRFS_DIR_INDEX_KEY &&
7192 key.type != BTRFS_EXTENT_ITEM_KEY &&
7193 key.type != BTRFS_METADATA_ITEM_KEY &&
7194 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7195 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7198 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7199 (unsigned long long)key.objectid, key.type,
7200 (unsigned long long)key.offset, slot, buf->start);
7201 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7202 btrfs_item_nr_offset(slot + 1),
7203 sizeof(struct btrfs_item) *
7204 (nritems - slot - 1));
7205 btrfs_set_header_nritems(buf, nritems - 1);
7207 struct btrfs_disk_key disk_key;
7209 btrfs_item_key(buf, &disk_key, 0);
7210 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7212 btrfs_mark_buffer_dirty(buf);
7216 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7218 struct extent_buffer *buf;
7222 /* We should only get this for leaves */
7223 BUG_ON(path->lowest_level);
7224 buf = path->nodes[0];
7226 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7227 unsigned int shift = 0, offset;
7229 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7230 BTRFS_LEAF_DATA_SIZE(root)) {
7231 if (btrfs_item_end_nr(buf, i) >
7232 BTRFS_LEAF_DATA_SIZE(root)) {
7233 ret = delete_bogus_item(root, path, buf, i);
7236 fprintf(stderr, "item is off the end of the "
7237 "leaf, can't fix\n");
7241 shift = BTRFS_LEAF_DATA_SIZE(root) -
7242 btrfs_item_end_nr(buf, i);
7243 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7244 btrfs_item_offset_nr(buf, i - 1)) {
7245 if (btrfs_item_end_nr(buf, i) >
7246 btrfs_item_offset_nr(buf, i - 1)) {
7247 ret = delete_bogus_item(root, path, buf, i);
7250 fprintf(stderr, "items overlap, can't fix\n");
7254 shift = btrfs_item_offset_nr(buf, i - 1) -
7255 btrfs_item_end_nr(buf, i);
7260 printf("Shifting item nr %d by %u bytes in block %llu\n",
7261 i, shift, (unsigned long long)buf->start);
7262 offset = btrfs_item_offset_nr(buf, i);
7263 memmove_extent_buffer(buf,
7264 btrfs_leaf_data(buf) + offset + shift,
7265 btrfs_leaf_data(buf) + offset,
7266 btrfs_item_size_nr(buf, i));
7267 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7269 btrfs_mark_buffer_dirty(buf);
7273 * We may have moved things, in which case we want to exit so we don't
7274 * write those changes out. Once we have proper abort functionality in
7275 * progs this can be changed to something nicer.
7282 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7283 * then just return -EIO.
7285 static int try_to_fix_bad_block(struct btrfs_root *root,
7286 struct extent_buffer *buf,
7287 enum btrfs_tree_block_status status)
7289 struct btrfs_trans_handle *trans;
7290 struct ulist *roots;
7291 struct ulist_node *node;
7292 struct btrfs_root *search_root;
7293 struct btrfs_path path;
7294 struct ulist_iterator iter;
7295 struct btrfs_key root_key, key;
7298 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7299 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7302 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7306 btrfs_init_path(&path);
7307 ULIST_ITER_INIT(&iter);
7308 while ((node = ulist_next(roots, &iter))) {
7309 root_key.objectid = node->val;
7310 root_key.type = BTRFS_ROOT_ITEM_KEY;
7311 root_key.offset = (u64)-1;
7313 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7320 trans = btrfs_start_transaction(search_root, 0);
7321 if (IS_ERR(trans)) {
7322 ret = PTR_ERR(trans);
7326 path.lowest_level = btrfs_header_level(buf);
7327 path.skip_check_block = 1;
7328 if (path.lowest_level)
7329 btrfs_node_key_to_cpu(buf, &key, 0);
7331 btrfs_item_key_to_cpu(buf, &key, 0);
7332 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7335 btrfs_commit_transaction(trans, search_root);
7338 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7339 ret = fix_key_order(search_root, &path);
7340 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7341 ret = fix_item_offset(search_root, &path);
7343 btrfs_commit_transaction(trans, search_root);
7346 btrfs_release_path(&path);
7347 btrfs_commit_transaction(trans, search_root);
7350 btrfs_release_path(&path);
7354 static int check_block(struct btrfs_root *root,
7355 struct cache_tree *extent_cache,
7356 struct extent_buffer *buf, u64 flags)
7358 struct extent_record *rec;
7359 struct cache_extent *cache;
7360 struct btrfs_key key;
7361 enum btrfs_tree_block_status status;
7365 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7368 rec = container_of(cache, struct extent_record, cache);
7369 rec->generation = btrfs_header_generation(buf);
7371 level = btrfs_header_level(buf);
7372 if (btrfs_header_nritems(buf) > 0) {
7375 btrfs_item_key_to_cpu(buf, &key, 0);
7377 btrfs_node_key_to_cpu(buf, &key, 0);
7379 rec->info_objectid = key.objectid;
7381 rec->info_level = level;
7383 if (btrfs_is_leaf(buf))
7384 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7386 status = btrfs_check_node(root, &rec->parent_key, buf);
7388 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7390 status = try_to_fix_bad_block(root, buf, status);
7391 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7393 fprintf(stderr, "bad block %llu\n",
7394 (unsigned long long)buf->start);
7397 * Signal to callers we need to start the scan over
7398 * again since we'll have cowed blocks.
7403 rec->content_checked = 1;
7404 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7405 rec->owner_ref_checked = 1;
7407 ret = check_owner_ref(root, rec, buf);
7409 rec->owner_ref_checked = 1;
7413 maybe_free_extent_rec(extent_cache, rec);
7418 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7419 u64 parent, u64 root)
7421 struct list_head *cur = rec->backrefs.next;
7422 struct extent_backref *node;
7423 struct tree_backref *back;
7425 while(cur != &rec->backrefs) {
7426 node = to_extent_backref(cur);
7430 back = to_tree_backref(node);
7432 if (!node->full_backref)
7434 if (parent == back->parent)
7437 if (node->full_backref)
7439 if (back->root == root)
7447 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7448 u64 parent, u64 root)
7450 struct tree_backref *ref = malloc(sizeof(*ref));
7454 memset(&ref->node, 0, sizeof(ref->node));
7456 ref->parent = parent;
7457 ref->node.full_backref = 1;
7460 ref->node.full_backref = 0;
7467 static struct data_backref *find_data_backref(struct extent_record *rec,
7468 u64 parent, u64 root,
7469 u64 owner, u64 offset,
7471 u64 disk_bytenr, u64 bytes)
7473 struct list_head *cur = rec->backrefs.next;
7474 struct extent_backref *node;
7475 struct data_backref *back;
7477 while(cur != &rec->backrefs) {
7478 node = to_extent_backref(cur);
7482 back = to_data_backref(node);
7484 if (!node->full_backref)
7486 if (parent == back->parent)
7489 if (node->full_backref)
7491 if (back->root == root && back->owner == owner &&
7492 back->offset == offset) {
7493 if (found_ref && node->found_ref &&
7494 (back->bytes != bytes ||
7495 back->disk_bytenr != disk_bytenr))
7505 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7506 u64 parent, u64 root,
7507 u64 owner, u64 offset,
7510 struct data_backref *ref = malloc(sizeof(*ref));
7514 memset(&ref->node, 0, sizeof(ref->node));
7515 ref->node.is_data = 1;
7518 ref->parent = parent;
7521 ref->node.full_backref = 1;
7525 ref->offset = offset;
7526 ref->node.full_backref = 0;
7528 ref->bytes = max_size;
7531 if (max_size > rec->max_size)
7532 rec->max_size = max_size;
7536 /* Check if the type of extent matches with its chunk */
7537 static void check_extent_type(struct extent_record *rec)
7539 struct btrfs_block_group_cache *bg_cache;
7541 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7545 /* data extent, check chunk directly*/
7546 if (!rec->metadata) {
7547 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7548 rec->wrong_chunk_type = 1;
7552 /* metadata extent, check the obvious case first */
7553 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7554 BTRFS_BLOCK_GROUP_METADATA))) {
7555 rec->wrong_chunk_type = 1;
7560 * Check SYSTEM extent, as it's also marked as metadata, we can only
7561 * make sure it's a SYSTEM extent by its backref
7563 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7564 struct extent_backref *node;
7565 struct tree_backref *tback;
7568 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7569 if (node->is_data) {
7570 /* tree block shouldn't have data backref */
7571 rec->wrong_chunk_type = 1;
7574 tback = container_of(node, struct tree_backref, node);
7576 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7577 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7579 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7580 if (!(bg_cache->flags & bg_type))
7581 rec->wrong_chunk_type = 1;
7586 * Allocate a new extent record, fill default values from @tmpl and insert int
7587 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7588 * the cache, otherwise it fails.
7590 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7591 struct extent_record *tmpl)
7593 struct extent_record *rec;
7596 BUG_ON(tmpl->max_size == 0);
7597 rec = malloc(sizeof(*rec));
7600 rec->start = tmpl->start;
7601 rec->max_size = tmpl->max_size;
7602 rec->nr = max(tmpl->nr, tmpl->max_size);
7603 rec->found_rec = tmpl->found_rec;
7604 rec->content_checked = tmpl->content_checked;
7605 rec->owner_ref_checked = tmpl->owner_ref_checked;
7606 rec->num_duplicates = 0;
7607 rec->metadata = tmpl->metadata;
7608 rec->flag_block_full_backref = FLAG_UNSET;
7609 rec->bad_full_backref = 0;
7610 rec->crossing_stripes = 0;
7611 rec->wrong_chunk_type = 0;
7612 rec->is_root = tmpl->is_root;
7613 rec->refs = tmpl->refs;
7614 rec->extent_item_refs = tmpl->extent_item_refs;
7615 rec->parent_generation = tmpl->parent_generation;
7616 INIT_LIST_HEAD(&rec->backrefs);
7617 INIT_LIST_HEAD(&rec->dups);
7618 INIT_LIST_HEAD(&rec->list);
7619 rec->backref_tree = RB_ROOT;
7620 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7621 rec->cache.start = tmpl->start;
7622 rec->cache.size = tmpl->nr;
7623 ret = insert_cache_extent(extent_cache, &rec->cache);
7628 bytes_used += rec->nr;
7631 rec->crossing_stripes = check_crossing_stripes(global_info,
7632 rec->start, global_info->nodesize);
7633 check_extent_type(rec);
7638 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7640 * - refs - if found, increase refs
7641 * - is_root - if found, set
7642 * - content_checked - if found, set
7643 * - owner_ref_checked - if found, set
7645 * If not found, create a new one, initialize and insert.
7647 static int add_extent_rec(struct cache_tree *extent_cache,
7648 struct extent_record *tmpl)
7650 struct extent_record *rec;
7651 struct cache_extent *cache;
7655 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7657 rec = container_of(cache, struct extent_record, cache);
7661 rec->nr = max(tmpl->nr, tmpl->max_size);
7664 * We need to make sure to reset nr to whatever the extent
7665 * record says was the real size, this way we can compare it to
7668 if (tmpl->found_rec) {
7669 if (tmpl->start != rec->start || rec->found_rec) {
7670 struct extent_record *tmp;
7673 if (list_empty(&rec->list))
7674 list_add_tail(&rec->list,
7675 &duplicate_extents);
7678 * We have to do this song and dance in case we
7679 * find an extent record that falls inside of
7680 * our current extent record but does not have
7681 * the same objectid.
7683 tmp = malloc(sizeof(*tmp));
7686 tmp->start = tmpl->start;
7687 tmp->max_size = tmpl->max_size;
7690 tmp->metadata = tmpl->metadata;
7691 tmp->extent_item_refs = tmpl->extent_item_refs;
7692 INIT_LIST_HEAD(&tmp->list);
7693 list_add_tail(&tmp->list, &rec->dups);
7694 rec->num_duplicates++;
7701 if (tmpl->extent_item_refs && !dup) {
7702 if (rec->extent_item_refs) {
7703 fprintf(stderr, "block %llu rec "
7704 "extent_item_refs %llu, passed %llu\n",
7705 (unsigned long long)tmpl->start,
7706 (unsigned long long)
7707 rec->extent_item_refs,
7708 (unsigned long long)tmpl->extent_item_refs);
7710 rec->extent_item_refs = tmpl->extent_item_refs;
7714 if (tmpl->content_checked)
7715 rec->content_checked = 1;
7716 if (tmpl->owner_ref_checked)
7717 rec->owner_ref_checked = 1;
7718 memcpy(&rec->parent_key, &tmpl->parent_key,
7719 sizeof(tmpl->parent_key));
7720 if (tmpl->parent_generation)
7721 rec->parent_generation = tmpl->parent_generation;
7722 if (rec->max_size < tmpl->max_size)
7723 rec->max_size = tmpl->max_size;
7726 * A metadata extent can't cross stripe_len boundary, otherwise
7727 * kernel scrub won't be able to handle it.
7728 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7732 rec->crossing_stripes = check_crossing_stripes(
7733 global_info, rec->start,
7734 global_info->nodesize);
7735 check_extent_type(rec);
7736 maybe_free_extent_rec(extent_cache, rec);
7740 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7745 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7746 u64 parent, u64 root, int found_ref)
7748 struct extent_record *rec;
7749 struct tree_backref *back;
7750 struct cache_extent *cache;
7752 bool insert = false;
7754 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7756 struct extent_record tmpl;
7758 memset(&tmpl, 0, sizeof(tmpl));
7759 tmpl.start = bytenr;
7764 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7768 /* really a bug in cache_extent implement now */
7769 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7774 rec = container_of(cache, struct extent_record, cache);
7775 if (rec->start != bytenr) {
7777 * Several cause, from unaligned bytenr to over lapping extents
7782 back = find_tree_backref(rec, parent, root);
7784 back = alloc_tree_backref(rec, parent, root);
7791 if (back->node.found_ref) {
7792 fprintf(stderr, "Extent back ref already exists "
7793 "for %llu parent %llu root %llu \n",
7794 (unsigned long long)bytenr,
7795 (unsigned long long)parent,
7796 (unsigned long long)root);
7798 back->node.found_ref = 1;
7800 if (back->node.found_extent_tree) {
7801 fprintf(stderr, "Extent back ref already exists "
7802 "for %llu parent %llu root %llu \n",
7803 (unsigned long long)bytenr,
7804 (unsigned long long)parent,
7805 (unsigned long long)root);
7807 back->node.found_extent_tree = 1;
7810 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7811 compare_extent_backref));
7812 check_extent_type(rec);
7813 maybe_free_extent_rec(extent_cache, rec);
7817 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7818 u64 parent, u64 root, u64 owner, u64 offset,
7819 u32 num_refs, int found_ref, u64 max_size)
7821 struct extent_record *rec;
7822 struct data_backref *back;
7823 struct cache_extent *cache;
7825 bool insert = false;
7827 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7829 struct extent_record tmpl;
7831 memset(&tmpl, 0, sizeof(tmpl));
7832 tmpl.start = bytenr;
7834 tmpl.max_size = max_size;
7836 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7840 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7845 rec = container_of(cache, struct extent_record, cache);
7846 if (rec->max_size < max_size)
7847 rec->max_size = max_size;
7850 * If found_ref is set then max_size is the real size and must match the
7851 * existing refs. So if we have already found a ref then we need to
7852 * make sure that this ref matches the existing one, otherwise we need
7853 * to add a new backref so we can notice that the backrefs don't match
7854 * and we need to figure out who is telling the truth. This is to
7855 * account for that awful fsync bug I introduced where we'd end up with
7856 * a btrfs_file_extent_item that would have its length include multiple
7857 * prealloc extents or point inside of a prealloc extent.
7859 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7862 back = alloc_data_backref(rec, parent, root, owner, offset,
7869 BUG_ON(num_refs != 1);
7870 if (back->node.found_ref)
7871 BUG_ON(back->bytes != max_size);
7872 back->node.found_ref = 1;
7873 back->found_ref += 1;
7874 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7875 back->bytes = max_size;
7876 back->disk_bytenr = bytenr;
7878 /* Need to reinsert if not already in the tree */
7880 rb_erase(&back->node.node, &rec->backref_tree);
7885 rec->content_checked = 1;
7886 rec->owner_ref_checked = 1;
7888 if (back->node.found_extent_tree) {
7889 fprintf(stderr, "Extent back ref already exists "
7890 "for %llu parent %llu root %llu "
7891 "owner %llu offset %llu num_refs %lu\n",
7892 (unsigned long long)bytenr,
7893 (unsigned long long)parent,
7894 (unsigned long long)root,
7895 (unsigned long long)owner,
7896 (unsigned long long)offset,
7897 (unsigned long)num_refs);
7899 back->num_refs = num_refs;
7900 back->node.found_extent_tree = 1;
7903 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7904 compare_extent_backref));
7906 maybe_free_extent_rec(extent_cache, rec);
7910 static int add_pending(struct cache_tree *pending,
7911 struct cache_tree *seen, u64 bytenr, u32 size)
7914 ret = add_cache_extent(seen, bytenr, size);
7917 add_cache_extent(pending, bytenr, size);
7921 static int pick_next_pending(struct cache_tree *pending,
7922 struct cache_tree *reada,
7923 struct cache_tree *nodes,
7924 u64 last, struct block_info *bits, int bits_nr,
7927 unsigned long node_start = last;
7928 struct cache_extent *cache;
7931 cache = search_cache_extent(reada, 0);
7933 bits[0].start = cache->start;
7934 bits[0].size = cache->size;
7939 if (node_start > 32768)
7940 node_start -= 32768;
7942 cache = search_cache_extent(nodes, node_start);
7944 cache = search_cache_extent(nodes, 0);
7947 cache = search_cache_extent(pending, 0);
7952 bits[ret].start = cache->start;
7953 bits[ret].size = cache->size;
7954 cache = next_cache_extent(cache);
7956 } while (cache && ret < bits_nr);
7962 bits[ret].start = cache->start;
7963 bits[ret].size = cache->size;
7964 cache = next_cache_extent(cache);
7966 } while (cache && ret < bits_nr);
7968 if (bits_nr - ret > 8) {
7969 u64 lookup = bits[0].start + bits[0].size;
7970 struct cache_extent *next;
7971 next = search_cache_extent(pending, lookup);
7973 if (next->start - lookup > 32768)
7975 bits[ret].start = next->start;
7976 bits[ret].size = next->size;
7977 lookup = next->start + next->size;
7981 next = next_cache_extent(next);
7989 static void free_chunk_record(struct cache_extent *cache)
7991 struct chunk_record *rec;
7993 rec = container_of(cache, struct chunk_record, cache);
7994 list_del_init(&rec->list);
7995 list_del_init(&rec->dextents);
7999 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8001 cache_tree_free_extents(chunk_cache, free_chunk_record);
8004 static void free_device_record(struct rb_node *node)
8006 struct device_record *rec;
8008 rec = container_of(node, struct device_record, node);
8012 FREE_RB_BASED_TREE(device_cache, free_device_record);
8014 int insert_block_group_record(struct block_group_tree *tree,
8015 struct block_group_record *bg_rec)
8019 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8023 list_add_tail(&bg_rec->list, &tree->block_groups);
8027 static void free_block_group_record(struct cache_extent *cache)
8029 struct block_group_record *rec;
8031 rec = container_of(cache, struct block_group_record, cache);
8032 list_del_init(&rec->list);
8036 void free_block_group_tree(struct block_group_tree *tree)
8038 cache_tree_free_extents(&tree->tree, free_block_group_record);
8041 int insert_device_extent_record(struct device_extent_tree *tree,
8042 struct device_extent_record *de_rec)
8047 * Device extent is a bit different from the other extents, because
8048 * the extents which belong to the different devices may have the
8049 * same start and size, so we need use the special extent cache
8050 * search/insert functions.
8052 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8056 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8057 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8061 static void free_device_extent_record(struct cache_extent *cache)
8063 struct device_extent_record *rec;
8065 rec = container_of(cache, struct device_extent_record, cache);
8066 if (!list_empty(&rec->chunk_list))
8067 list_del_init(&rec->chunk_list);
8068 if (!list_empty(&rec->device_list))
8069 list_del_init(&rec->device_list);
8073 void free_device_extent_tree(struct device_extent_tree *tree)
8075 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8078 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8079 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8080 struct extent_buffer *leaf, int slot)
8082 struct btrfs_extent_ref_v0 *ref0;
8083 struct btrfs_key key;
8086 btrfs_item_key_to_cpu(leaf, &key, slot);
8087 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8088 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8089 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8092 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8093 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8099 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8100 struct btrfs_key *key,
8103 struct btrfs_chunk *ptr;
8104 struct chunk_record *rec;
8107 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8108 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8110 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8112 fprintf(stderr, "memory allocation failed\n");
8116 INIT_LIST_HEAD(&rec->list);
8117 INIT_LIST_HEAD(&rec->dextents);
8120 rec->cache.start = key->offset;
8121 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8123 rec->generation = btrfs_header_generation(leaf);
8125 rec->objectid = key->objectid;
8126 rec->type = key->type;
8127 rec->offset = key->offset;
8129 rec->length = rec->cache.size;
8130 rec->owner = btrfs_chunk_owner(leaf, ptr);
8131 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8132 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8133 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8134 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8135 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8136 rec->num_stripes = num_stripes;
8137 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8139 for (i = 0; i < rec->num_stripes; ++i) {
8140 rec->stripes[i].devid =
8141 btrfs_stripe_devid_nr(leaf, ptr, i);
8142 rec->stripes[i].offset =
8143 btrfs_stripe_offset_nr(leaf, ptr, i);
8144 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8145 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8152 static int process_chunk_item(struct cache_tree *chunk_cache,
8153 struct btrfs_key *key, struct extent_buffer *eb,
8156 struct chunk_record *rec;
8157 struct btrfs_chunk *chunk;
8160 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8162 * Do extra check for this chunk item,
8164 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8165 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8166 * and owner<->key_type check.
8168 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8171 error("chunk(%llu, %llu) is not valid, ignore it",
8172 key->offset, btrfs_chunk_length(eb, chunk));
8175 rec = btrfs_new_chunk_record(eb, key, slot);
8176 ret = insert_cache_extent(chunk_cache, &rec->cache);
8178 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8179 rec->offset, rec->length);
8186 static int process_device_item(struct rb_root *dev_cache,
8187 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8189 struct btrfs_dev_item *ptr;
8190 struct device_record *rec;
8193 ptr = btrfs_item_ptr(eb,
8194 slot, struct btrfs_dev_item);
8196 rec = malloc(sizeof(*rec));
8198 fprintf(stderr, "memory allocation failed\n");
8202 rec->devid = key->offset;
8203 rec->generation = btrfs_header_generation(eb);
8205 rec->objectid = key->objectid;
8206 rec->type = key->type;
8207 rec->offset = key->offset;
8209 rec->devid = btrfs_device_id(eb, ptr);
8210 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8211 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8213 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8215 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8222 struct block_group_record *
8223 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8226 struct btrfs_block_group_item *ptr;
8227 struct block_group_record *rec;
8229 rec = calloc(1, sizeof(*rec));
8231 fprintf(stderr, "memory allocation failed\n");
8235 rec->cache.start = key->objectid;
8236 rec->cache.size = key->offset;
8238 rec->generation = btrfs_header_generation(leaf);
8240 rec->objectid = key->objectid;
8241 rec->type = key->type;
8242 rec->offset = key->offset;
8244 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8245 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8247 INIT_LIST_HEAD(&rec->list);
8252 static int process_block_group_item(struct block_group_tree *block_group_cache,
8253 struct btrfs_key *key,
8254 struct extent_buffer *eb, int slot)
8256 struct block_group_record *rec;
8259 rec = btrfs_new_block_group_record(eb, key, slot);
8260 ret = insert_block_group_record(block_group_cache, rec);
8262 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8263 rec->objectid, rec->offset);
8270 struct device_extent_record *
8271 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8272 struct btrfs_key *key, int slot)
8274 struct device_extent_record *rec;
8275 struct btrfs_dev_extent *ptr;
8277 rec = calloc(1, sizeof(*rec));
8279 fprintf(stderr, "memory allocation failed\n");
8283 rec->cache.objectid = key->objectid;
8284 rec->cache.start = key->offset;
8286 rec->generation = btrfs_header_generation(leaf);
8288 rec->objectid = key->objectid;
8289 rec->type = key->type;
8290 rec->offset = key->offset;
8292 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8293 rec->chunk_objecteid =
8294 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8296 btrfs_dev_extent_chunk_offset(leaf, ptr);
8297 rec->length = btrfs_dev_extent_length(leaf, ptr);
8298 rec->cache.size = rec->length;
8300 INIT_LIST_HEAD(&rec->chunk_list);
8301 INIT_LIST_HEAD(&rec->device_list);
8307 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8308 struct btrfs_key *key, struct extent_buffer *eb,
8311 struct device_extent_record *rec;
8314 rec = btrfs_new_device_extent_record(eb, key, slot);
8315 ret = insert_device_extent_record(dev_extent_cache, rec);
8318 "Device extent[%llu, %llu, %llu] existed.\n",
8319 rec->objectid, rec->offset, rec->length);
8326 static int process_extent_item(struct btrfs_root *root,
8327 struct cache_tree *extent_cache,
8328 struct extent_buffer *eb, int slot)
8330 struct btrfs_extent_item *ei;
8331 struct btrfs_extent_inline_ref *iref;
8332 struct btrfs_extent_data_ref *dref;
8333 struct btrfs_shared_data_ref *sref;
8334 struct btrfs_key key;
8335 struct extent_record tmpl;
8340 u32 item_size = btrfs_item_size_nr(eb, slot);
8346 btrfs_item_key_to_cpu(eb, &key, slot);
8348 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8350 num_bytes = root->fs_info->nodesize;
8352 num_bytes = key.offset;
8355 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8356 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8357 key.objectid, root->fs_info->sectorsize);
8360 if (item_size < sizeof(*ei)) {
8361 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8362 struct btrfs_extent_item_v0 *ei0;
8363 BUG_ON(item_size != sizeof(*ei0));
8364 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8365 refs = btrfs_extent_refs_v0(eb, ei0);
8369 memset(&tmpl, 0, sizeof(tmpl));
8370 tmpl.start = key.objectid;
8371 tmpl.nr = num_bytes;
8372 tmpl.extent_item_refs = refs;
8373 tmpl.metadata = metadata;
8375 tmpl.max_size = num_bytes;
8377 return add_extent_rec(extent_cache, &tmpl);
8380 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8381 refs = btrfs_extent_refs(eb, ei);
8382 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8386 if (metadata && num_bytes != root->fs_info->nodesize) {
8387 error("ignore invalid metadata extent, length %llu does not equal to %u",
8388 num_bytes, root->fs_info->nodesize);
8391 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8392 error("ignore invalid data extent, length %llu is not aligned to %u",
8393 num_bytes, root->fs_info->sectorsize);
8397 memset(&tmpl, 0, sizeof(tmpl));
8398 tmpl.start = key.objectid;
8399 tmpl.nr = num_bytes;
8400 tmpl.extent_item_refs = refs;
8401 tmpl.metadata = metadata;
8403 tmpl.max_size = num_bytes;
8404 add_extent_rec(extent_cache, &tmpl);
8406 ptr = (unsigned long)(ei + 1);
8407 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8408 key.type == BTRFS_EXTENT_ITEM_KEY)
8409 ptr += sizeof(struct btrfs_tree_block_info);
8411 end = (unsigned long)ei + item_size;
8413 iref = (struct btrfs_extent_inline_ref *)ptr;
8414 type = btrfs_extent_inline_ref_type(eb, iref);
8415 offset = btrfs_extent_inline_ref_offset(eb, iref);
8417 case BTRFS_TREE_BLOCK_REF_KEY:
8418 ret = add_tree_backref(extent_cache, key.objectid,
8422 "add_tree_backref failed (extent items tree block): %s",
8425 case BTRFS_SHARED_BLOCK_REF_KEY:
8426 ret = add_tree_backref(extent_cache, key.objectid,
8430 "add_tree_backref failed (extent items shared block): %s",
8433 case BTRFS_EXTENT_DATA_REF_KEY:
8434 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8435 add_data_backref(extent_cache, key.objectid, 0,
8436 btrfs_extent_data_ref_root(eb, dref),
8437 btrfs_extent_data_ref_objectid(eb,
8439 btrfs_extent_data_ref_offset(eb, dref),
8440 btrfs_extent_data_ref_count(eb, dref),
8443 case BTRFS_SHARED_DATA_REF_KEY:
8444 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8445 add_data_backref(extent_cache, key.objectid, offset,
8447 btrfs_shared_data_ref_count(eb, sref),
8451 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8452 key.objectid, key.type, num_bytes);
8455 ptr += btrfs_extent_inline_ref_size(type);
8462 static int check_cache_range(struct btrfs_root *root,
8463 struct btrfs_block_group_cache *cache,
8464 u64 offset, u64 bytes)
8466 struct btrfs_free_space *entry;
8472 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8473 bytenr = btrfs_sb_offset(i);
8474 ret = btrfs_rmap_block(root->fs_info,
8475 cache->key.objectid, bytenr, 0,
8476 &logical, &nr, &stripe_len);
8481 if (logical[nr] + stripe_len <= offset)
8483 if (offset + bytes <= logical[nr])
8485 if (logical[nr] == offset) {
8486 if (stripe_len >= bytes) {
8490 bytes -= stripe_len;
8491 offset += stripe_len;
8492 } else if (logical[nr] < offset) {
8493 if (logical[nr] + stripe_len >=
8498 bytes = (offset + bytes) -
8499 (logical[nr] + stripe_len);
8500 offset = logical[nr] + stripe_len;
8503 * Could be tricky, the super may land in the
8504 * middle of the area we're checking. First
8505 * check the easiest case, it's at the end.
8507 if (logical[nr] + stripe_len >=
8509 bytes = logical[nr] - offset;
8513 /* Check the left side */
8514 ret = check_cache_range(root, cache,
8516 logical[nr] - offset);
8522 /* Now we continue with the right side */
8523 bytes = (offset + bytes) -
8524 (logical[nr] + stripe_len);
8525 offset = logical[nr] + stripe_len;
8532 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8534 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8535 offset, offset+bytes);
8539 if (entry->offset != offset) {
8540 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8545 if (entry->bytes != bytes) {
8546 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8547 bytes, entry->bytes, offset);
8551 unlink_free_space(cache->free_space_ctl, entry);
8556 static int verify_space_cache(struct btrfs_root *root,
8557 struct btrfs_block_group_cache *cache)
8559 struct btrfs_path path;
8560 struct extent_buffer *leaf;
8561 struct btrfs_key key;
8565 root = root->fs_info->extent_root;
8567 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8569 btrfs_init_path(&path);
8570 key.objectid = last;
8572 key.type = BTRFS_EXTENT_ITEM_KEY;
8573 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8578 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8579 ret = btrfs_next_leaf(root, &path);
8587 leaf = path.nodes[0];
8588 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8589 if (key.objectid >= cache->key.offset + cache->key.objectid)
8591 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8592 key.type != BTRFS_METADATA_ITEM_KEY) {
8597 if (last == key.objectid) {
8598 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8599 last = key.objectid + key.offset;
8601 last = key.objectid + root->fs_info->nodesize;
8606 ret = check_cache_range(root, cache, last,
8607 key.objectid - last);
8610 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8611 last = key.objectid + key.offset;
8613 last = key.objectid + root->fs_info->nodesize;
8617 if (last < cache->key.objectid + cache->key.offset)
8618 ret = check_cache_range(root, cache, last,
8619 cache->key.objectid +
8620 cache->key.offset - last);
8623 btrfs_release_path(&path);
8626 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8627 fprintf(stderr, "There are still entries left in the space "
8635 static int check_space_cache(struct btrfs_root *root)
8637 struct btrfs_block_group_cache *cache;
8638 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8642 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8643 btrfs_super_generation(root->fs_info->super_copy) !=
8644 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8645 printf("cache and super generation don't match, space cache "
8646 "will be invalidated\n");
8650 if (ctx.progress_enabled) {
8651 ctx.tp = TASK_FREE_SPACE;
8652 task_start(ctx.info);
8656 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8660 start = cache->key.objectid + cache->key.offset;
8661 if (!cache->free_space_ctl) {
8662 if (btrfs_init_free_space_ctl(cache,
8663 root->fs_info->sectorsize)) {
8668 btrfs_remove_free_space_cache(cache);
8671 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8672 ret = exclude_super_stripes(root, cache);
8674 fprintf(stderr, "could not exclude super stripes: %s\n",
8679 ret = load_free_space_tree(root->fs_info, cache);
8680 free_excluded_extents(root, cache);
8682 fprintf(stderr, "could not load free space tree: %s\n",
8689 ret = load_free_space_cache(root->fs_info, cache);
8694 ret = verify_space_cache(root, cache);
8696 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8697 cache->key.objectid);
8702 task_stop(ctx.info);
8704 return error ? -EINVAL : 0;
8707 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8708 u64 num_bytes, unsigned long leaf_offset,
8709 struct extent_buffer *eb) {
8711 struct btrfs_fs_info *fs_info = root->fs_info;
8713 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8715 unsigned long csum_offset;
8719 u64 data_checked = 0;
8725 if (num_bytes % fs_info->sectorsize)
8728 data = malloc(num_bytes);
8732 while (offset < num_bytes) {
8735 read_len = num_bytes - offset;
8736 /* read as much space once a time */
8737 ret = read_extent_data(fs_info, data + offset,
8738 bytenr + offset, &read_len, mirror);
8742 /* verify every 4k data's checksum */
8743 while (data_checked < read_len) {
8745 tmp = offset + data_checked;
8747 csum = btrfs_csum_data((char *)data + tmp,
8748 csum, fs_info->sectorsize);
8749 btrfs_csum_final(csum, (u8 *)&csum);
8751 csum_offset = leaf_offset +
8752 tmp / fs_info->sectorsize * csum_size;
8753 read_extent_buffer(eb, (char *)&csum_expected,
8754 csum_offset, csum_size);
8755 /* try another mirror */
8756 if (csum != csum_expected) {
8757 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8758 mirror, bytenr + tmp,
8759 csum, csum_expected);
8760 num_copies = btrfs_num_copies(root->fs_info,
8762 if (mirror < num_copies - 1) {
8767 data_checked += fs_info->sectorsize;
8776 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8779 struct btrfs_path path;
8780 struct extent_buffer *leaf;
8781 struct btrfs_key key;
8784 btrfs_init_path(&path);
8785 key.objectid = bytenr;
8786 key.type = BTRFS_EXTENT_ITEM_KEY;
8787 key.offset = (u64)-1;
8790 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8793 fprintf(stderr, "Error looking up extent record %d\n", ret);
8794 btrfs_release_path(&path);
8797 if (path.slots[0] > 0) {
8800 ret = btrfs_prev_leaf(root, &path);
8803 } else if (ret > 0) {
8810 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8813 * Block group items come before extent items if they have the same
8814 * bytenr, so walk back one more just in case. Dear future traveller,
8815 * first congrats on mastering time travel. Now if it's not too much
8816 * trouble could you go back to 2006 and tell Chris to make the
8817 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8818 * EXTENT_ITEM_KEY please?
8820 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8821 if (path.slots[0] > 0) {
8824 ret = btrfs_prev_leaf(root, &path);
8827 } else if (ret > 0) {
8832 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8836 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8837 ret = btrfs_next_leaf(root, &path);
8839 fprintf(stderr, "Error going to next leaf "
8841 btrfs_release_path(&path);
8847 leaf = path.nodes[0];
8848 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8849 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8853 if (key.objectid + key.offset < bytenr) {
8857 if (key.objectid > bytenr + num_bytes)
8860 if (key.objectid == bytenr) {
8861 if (key.offset >= num_bytes) {
8865 num_bytes -= key.offset;
8866 bytenr += key.offset;
8867 } else if (key.objectid < bytenr) {
8868 if (key.objectid + key.offset >= bytenr + num_bytes) {
8872 num_bytes = (bytenr + num_bytes) -
8873 (key.objectid + key.offset);
8874 bytenr = key.objectid + key.offset;
8876 if (key.objectid + key.offset < bytenr + num_bytes) {
8877 u64 new_start = key.objectid + key.offset;
8878 u64 new_bytes = bytenr + num_bytes - new_start;
8881 * Weird case, the extent is in the middle of
8882 * our range, we'll have to search one side
8883 * and then the other. Not sure if this happens
8884 * in real life, but no harm in coding it up
8885 * anyway just in case.
8887 btrfs_release_path(&path);
8888 ret = check_extent_exists(root, new_start,
8891 fprintf(stderr, "Right section didn't "
8895 num_bytes = key.objectid - bytenr;
8898 num_bytes = key.objectid - bytenr;
8905 if (num_bytes && !ret) {
8906 fprintf(stderr, "There are no extents for csum range "
8907 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8911 btrfs_release_path(&path);
8915 static int check_csums(struct btrfs_root *root)
8917 struct btrfs_path path;
8918 struct extent_buffer *leaf;
8919 struct btrfs_key key;
8920 u64 offset = 0, num_bytes = 0;
8921 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8925 unsigned long leaf_offset;
8927 root = root->fs_info->csum_root;
8928 if (!extent_buffer_uptodate(root->node)) {
8929 fprintf(stderr, "No valid csum tree found\n");
8933 btrfs_init_path(&path);
8934 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8935 key.type = BTRFS_EXTENT_CSUM_KEY;
8937 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8939 fprintf(stderr, "Error searching csum tree %d\n", ret);
8940 btrfs_release_path(&path);
8944 if (ret > 0 && path.slots[0])
8949 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8950 ret = btrfs_next_leaf(root, &path);
8952 fprintf(stderr, "Error going to next leaf "
8959 leaf = path.nodes[0];
8961 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8962 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8967 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8968 csum_size) * root->fs_info->sectorsize;
8969 if (!check_data_csum)
8970 goto skip_csum_check;
8971 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8972 ret = check_extent_csums(root, key.offset, data_len,
8978 offset = key.offset;
8979 } else if (key.offset != offset + num_bytes) {
8980 ret = check_extent_exists(root, offset, num_bytes);
8982 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8983 "there is no extent record\n",
8984 offset, offset+num_bytes);
8987 offset = key.offset;
8990 num_bytes += data_len;
8994 btrfs_release_path(&path);
8998 static int is_dropped_key(struct btrfs_key *key,
8999 struct btrfs_key *drop_key) {
9000 if (key->objectid < drop_key->objectid)
9002 else if (key->objectid == drop_key->objectid) {
9003 if (key->type < drop_key->type)
9005 else if (key->type == drop_key->type) {
9006 if (key->offset < drop_key->offset)
9014 * Here are the rules for FULL_BACKREF.
9016 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9017 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9019 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9020 * if it happened after the relocation occurred since we'll have dropped the
9021 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9022 * have no real way to know for sure.
9024 * We process the blocks one root at a time, and we start from the lowest root
9025 * objectid and go to the highest. So we can just lookup the owner backref for
9026 * the record and if we don't find it then we know it doesn't exist and we have
9029 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9030 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9031 * be set or not and then we can check later once we've gathered all the refs.
9033 static int calc_extent_flag(struct cache_tree *extent_cache,
9034 struct extent_buffer *buf,
9035 struct root_item_record *ri,
9038 struct extent_record *rec;
9039 struct cache_extent *cache;
9040 struct tree_backref *tback;
9043 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9044 /* we have added this extent before */
9048 rec = container_of(cache, struct extent_record, cache);
9051 * Except file/reloc tree, we can not have
9054 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9059 if (buf->start == ri->bytenr)
9062 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9065 owner = btrfs_header_owner(buf);
9066 if (owner == ri->objectid)
9069 tback = find_tree_backref(rec, 0, owner);
9074 if (rec->flag_block_full_backref != FLAG_UNSET &&
9075 rec->flag_block_full_backref != 0)
9076 rec->bad_full_backref = 1;
9079 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9080 if (rec->flag_block_full_backref != FLAG_UNSET &&
9081 rec->flag_block_full_backref != 1)
9082 rec->bad_full_backref = 1;
9086 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9088 fprintf(stderr, "Invalid key type(");
9089 print_key_type(stderr, 0, key_type);
9090 fprintf(stderr, ") found in root(");
9091 print_objectid(stderr, rootid, 0);
9092 fprintf(stderr, ")\n");
9096 * Check if the key is valid with its extent buffer.
9098 * This is a early check in case invalid key exists in a extent buffer
9099 * This is not comprehensive yet, but should prevent wrong key/item passed
9102 static int check_type_with_root(u64 rootid, u8 key_type)
9105 /* Only valid in chunk tree */
9106 case BTRFS_DEV_ITEM_KEY:
9107 case BTRFS_CHUNK_ITEM_KEY:
9108 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9111 /* valid in csum and log tree */
9112 case BTRFS_CSUM_TREE_OBJECTID:
9113 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9117 case BTRFS_EXTENT_ITEM_KEY:
9118 case BTRFS_METADATA_ITEM_KEY:
9119 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9120 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9123 case BTRFS_ROOT_ITEM_KEY:
9124 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9127 case BTRFS_DEV_EXTENT_KEY:
9128 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9134 report_mismatch_key_root(key_type, rootid);
9138 static int run_next_block(struct btrfs_root *root,
9139 struct block_info *bits,
9142 struct cache_tree *pending,
9143 struct cache_tree *seen,
9144 struct cache_tree *reada,
9145 struct cache_tree *nodes,
9146 struct cache_tree *extent_cache,
9147 struct cache_tree *chunk_cache,
9148 struct rb_root *dev_cache,
9149 struct block_group_tree *block_group_cache,
9150 struct device_extent_tree *dev_extent_cache,
9151 struct root_item_record *ri)
9153 struct btrfs_fs_info *fs_info = root->fs_info;
9154 struct extent_buffer *buf;
9155 struct extent_record *rec = NULL;
9166 struct btrfs_key key;
9167 struct cache_extent *cache;
9170 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9171 bits_nr, &reada_bits);
9176 for(i = 0; i < nritems; i++) {
9177 ret = add_cache_extent(reada, bits[i].start,
9182 /* fixme, get the parent transid */
9183 readahead_tree_block(fs_info, bits[i].start, 0);
9186 *last = bits[0].start;
9187 bytenr = bits[0].start;
9188 size = bits[0].size;
9190 cache = lookup_cache_extent(pending, bytenr, size);
9192 remove_cache_extent(pending, cache);
9195 cache = lookup_cache_extent(reada, bytenr, size);
9197 remove_cache_extent(reada, cache);
9200 cache = lookup_cache_extent(nodes, bytenr, size);
9202 remove_cache_extent(nodes, cache);
9205 cache = lookup_cache_extent(extent_cache, bytenr, size);
9207 rec = container_of(cache, struct extent_record, cache);
9208 gen = rec->parent_generation;
9211 /* fixme, get the real parent transid */
9212 buf = read_tree_block(root->fs_info, bytenr, gen);
9213 if (!extent_buffer_uptodate(buf)) {
9214 record_bad_block_io(root->fs_info,
9215 extent_cache, bytenr, size);
9219 nritems = btrfs_header_nritems(buf);
9222 if (!init_extent_tree) {
9223 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9224 btrfs_header_level(buf), 1, NULL,
9227 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9229 fprintf(stderr, "Couldn't calc extent flags\n");
9230 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9235 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9237 fprintf(stderr, "Couldn't calc extent flags\n");
9238 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9242 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9244 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9245 ri->objectid == btrfs_header_owner(buf)) {
9247 * Ok we got to this block from it's original owner and
9248 * we have FULL_BACKREF set. Relocation can leave
9249 * converted blocks over so this is altogether possible,
9250 * however it's not possible if the generation > the
9251 * last snapshot, so check for this case.
9253 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9254 btrfs_header_generation(buf) > ri->last_snapshot) {
9255 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9256 rec->bad_full_backref = 1;
9261 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9262 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9263 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9264 rec->bad_full_backref = 1;
9268 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9269 rec->flag_block_full_backref = 1;
9273 rec->flag_block_full_backref = 0;
9275 owner = btrfs_header_owner(buf);
9278 ret = check_block(root, extent_cache, buf, flags);
9282 if (btrfs_is_leaf(buf)) {
9283 btree_space_waste += btrfs_leaf_free_space(root, buf);
9284 for (i = 0; i < nritems; i++) {
9285 struct btrfs_file_extent_item *fi;
9286 btrfs_item_key_to_cpu(buf, &key, i);
9288 * Check key type against the leaf owner.
9289 * Could filter quite a lot of early error if
9292 if (check_type_with_root(btrfs_header_owner(buf),
9294 fprintf(stderr, "ignoring invalid key\n");
9297 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9298 process_extent_item(root, extent_cache, buf,
9302 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9303 process_extent_item(root, extent_cache, buf,
9307 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9309 btrfs_item_size_nr(buf, i);
9312 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9313 process_chunk_item(chunk_cache, &key, buf, i);
9316 if (key.type == BTRFS_DEV_ITEM_KEY) {
9317 process_device_item(dev_cache, &key, buf, i);
9320 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9321 process_block_group_item(block_group_cache,
9325 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9326 process_device_extent_item(dev_extent_cache,
9331 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9332 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9333 process_extent_ref_v0(extent_cache, buf, i);
9340 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9341 ret = add_tree_backref(extent_cache,
9342 key.objectid, 0, key.offset, 0);
9345 "add_tree_backref failed (leaf tree block): %s",
9349 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9350 ret = add_tree_backref(extent_cache,
9351 key.objectid, key.offset, 0, 0);
9354 "add_tree_backref failed (leaf shared block): %s",
9358 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9359 struct btrfs_extent_data_ref *ref;
9360 ref = btrfs_item_ptr(buf, i,
9361 struct btrfs_extent_data_ref);
9362 add_data_backref(extent_cache,
9364 btrfs_extent_data_ref_root(buf, ref),
9365 btrfs_extent_data_ref_objectid(buf,
9367 btrfs_extent_data_ref_offset(buf, ref),
9368 btrfs_extent_data_ref_count(buf, ref),
9369 0, root->fs_info->sectorsize);
9372 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9373 struct btrfs_shared_data_ref *ref;
9374 ref = btrfs_item_ptr(buf, i,
9375 struct btrfs_shared_data_ref);
9376 add_data_backref(extent_cache,
9377 key.objectid, key.offset, 0, 0, 0,
9378 btrfs_shared_data_ref_count(buf, ref),
9379 0, root->fs_info->sectorsize);
9382 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9383 struct bad_item *bad;
9385 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9389 bad = malloc(sizeof(struct bad_item));
9392 INIT_LIST_HEAD(&bad->list);
9393 memcpy(&bad->key, &key,
9394 sizeof(struct btrfs_key));
9395 bad->root_id = owner;
9396 list_add_tail(&bad->list, &delete_items);
9399 if (key.type != BTRFS_EXTENT_DATA_KEY)
9401 fi = btrfs_item_ptr(buf, i,
9402 struct btrfs_file_extent_item);
9403 if (btrfs_file_extent_type(buf, fi) ==
9404 BTRFS_FILE_EXTENT_INLINE)
9406 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9409 data_bytes_allocated +=
9410 btrfs_file_extent_disk_num_bytes(buf, fi);
9411 if (data_bytes_allocated < root->fs_info->sectorsize) {
9414 data_bytes_referenced +=
9415 btrfs_file_extent_num_bytes(buf, fi);
9416 add_data_backref(extent_cache,
9417 btrfs_file_extent_disk_bytenr(buf, fi),
9418 parent, owner, key.objectid, key.offset -
9419 btrfs_file_extent_offset(buf, fi), 1, 1,
9420 btrfs_file_extent_disk_num_bytes(buf, fi));
9424 struct btrfs_key first_key;
9426 first_key.objectid = 0;
9429 btrfs_item_key_to_cpu(buf, &first_key, 0);
9430 level = btrfs_header_level(buf);
9431 for (i = 0; i < nritems; i++) {
9432 struct extent_record tmpl;
9434 ptr = btrfs_node_blockptr(buf, i);
9435 size = root->fs_info->nodesize;
9436 btrfs_node_key_to_cpu(buf, &key, i);
9438 if ((level == ri->drop_level)
9439 && is_dropped_key(&key, &ri->drop_key)) {
9444 memset(&tmpl, 0, sizeof(tmpl));
9445 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9446 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9451 tmpl.max_size = size;
9452 ret = add_extent_rec(extent_cache, &tmpl);
9456 ret = add_tree_backref(extent_cache, ptr, parent,
9460 "add_tree_backref failed (non-leaf block): %s",
9466 add_pending(nodes, seen, ptr, size);
9468 add_pending(pending, seen, ptr, size);
9471 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9472 nritems) * sizeof(struct btrfs_key_ptr);
9474 total_btree_bytes += buf->len;
9475 if (fs_root_objectid(btrfs_header_owner(buf)))
9476 total_fs_tree_bytes += buf->len;
9477 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9478 total_extent_tree_bytes += buf->len;
9480 free_extent_buffer(buf);
9484 static int add_root_to_pending(struct extent_buffer *buf,
9485 struct cache_tree *extent_cache,
9486 struct cache_tree *pending,
9487 struct cache_tree *seen,
9488 struct cache_tree *nodes,
9491 struct extent_record tmpl;
9494 if (btrfs_header_level(buf) > 0)
9495 add_pending(nodes, seen, buf->start, buf->len);
9497 add_pending(pending, seen, buf->start, buf->len);
9499 memset(&tmpl, 0, sizeof(tmpl));
9500 tmpl.start = buf->start;
9505 tmpl.max_size = buf->len;
9506 add_extent_rec(extent_cache, &tmpl);
9508 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9509 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9510 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9513 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9518 /* as we fix the tree, we might be deleting blocks that
9519 * we're tracking for repair. This hook makes sure we
9520 * remove any backrefs for blocks as we are fixing them.
9522 static int free_extent_hook(struct btrfs_trans_handle *trans,
9523 struct btrfs_root *root,
9524 u64 bytenr, u64 num_bytes, u64 parent,
9525 u64 root_objectid, u64 owner, u64 offset,
9528 struct extent_record *rec;
9529 struct cache_extent *cache;
9531 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9533 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9534 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9538 rec = container_of(cache, struct extent_record, cache);
9540 struct data_backref *back;
9541 back = find_data_backref(rec, parent, root_objectid, owner,
9542 offset, 1, bytenr, num_bytes);
9545 if (back->node.found_ref) {
9546 back->found_ref -= refs_to_drop;
9548 rec->refs -= refs_to_drop;
9550 if (back->node.found_extent_tree) {
9551 back->num_refs -= refs_to_drop;
9552 if (rec->extent_item_refs)
9553 rec->extent_item_refs -= refs_to_drop;
9555 if (back->found_ref == 0)
9556 back->node.found_ref = 0;
9557 if (back->num_refs == 0)
9558 back->node.found_extent_tree = 0;
9560 if (!back->node.found_extent_tree && back->node.found_ref) {
9561 rb_erase(&back->node.node, &rec->backref_tree);
9565 struct tree_backref *back;
9566 back = find_tree_backref(rec, parent, root_objectid);
9569 if (back->node.found_ref) {
9572 back->node.found_ref = 0;
9574 if (back->node.found_extent_tree) {
9575 if (rec->extent_item_refs)
9576 rec->extent_item_refs--;
9577 back->node.found_extent_tree = 0;
9579 if (!back->node.found_extent_tree && back->node.found_ref) {
9580 rb_erase(&back->node.node, &rec->backref_tree);
9584 maybe_free_extent_rec(extent_cache, rec);
9589 static int delete_extent_records(struct btrfs_trans_handle *trans,
9590 struct btrfs_root *root,
9591 struct btrfs_path *path,
9594 struct btrfs_key key;
9595 struct btrfs_key found_key;
9596 struct extent_buffer *leaf;
9601 key.objectid = bytenr;
9603 key.offset = (u64)-1;
9606 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9613 if (path->slots[0] == 0)
9619 leaf = path->nodes[0];
9620 slot = path->slots[0];
9622 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9623 if (found_key.objectid != bytenr)
9626 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9627 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9628 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9629 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9630 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9631 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9632 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9633 btrfs_release_path(path);
9634 if (found_key.type == 0) {
9635 if (found_key.offset == 0)
9637 key.offset = found_key.offset - 1;
9638 key.type = found_key.type;
9640 key.type = found_key.type - 1;
9641 key.offset = (u64)-1;
9645 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9646 found_key.objectid, found_key.type, found_key.offset);
9648 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9651 btrfs_release_path(path);
9653 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9654 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9655 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9656 found_key.offset : root->fs_info->nodesize;
9658 ret = btrfs_update_block_group(trans, root, bytenr,
9665 btrfs_release_path(path);
9670 * for a single backref, this will allocate a new extent
9671 * and add the backref to it.
9673 static int record_extent(struct btrfs_trans_handle *trans,
9674 struct btrfs_fs_info *info,
9675 struct btrfs_path *path,
9676 struct extent_record *rec,
9677 struct extent_backref *back,
9678 int allocated, u64 flags)
9681 struct btrfs_root *extent_root = info->extent_root;
9682 struct extent_buffer *leaf;
9683 struct btrfs_key ins_key;
9684 struct btrfs_extent_item *ei;
9685 struct data_backref *dback;
9686 struct btrfs_tree_block_info *bi;
9689 rec->max_size = max_t(u64, rec->max_size,
9693 u32 item_size = sizeof(*ei);
9696 item_size += sizeof(*bi);
9698 ins_key.objectid = rec->start;
9699 ins_key.offset = rec->max_size;
9700 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9702 ret = btrfs_insert_empty_item(trans, extent_root, path,
9703 &ins_key, item_size);
9707 leaf = path->nodes[0];
9708 ei = btrfs_item_ptr(leaf, path->slots[0],
9709 struct btrfs_extent_item);
9711 btrfs_set_extent_refs(leaf, ei, 0);
9712 btrfs_set_extent_generation(leaf, ei, rec->generation);
9714 if (back->is_data) {
9715 btrfs_set_extent_flags(leaf, ei,
9716 BTRFS_EXTENT_FLAG_DATA);
9718 struct btrfs_disk_key copy_key;;
9720 bi = (struct btrfs_tree_block_info *)(ei + 1);
9721 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9724 btrfs_set_disk_key_objectid(©_key,
9725 rec->info_objectid);
9726 btrfs_set_disk_key_type(©_key, 0);
9727 btrfs_set_disk_key_offset(©_key, 0);
9729 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9730 btrfs_set_tree_block_key(leaf, bi, ©_key);
9732 btrfs_set_extent_flags(leaf, ei,
9733 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9736 btrfs_mark_buffer_dirty(leaf);
9737 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9738 rec->max_size, 1, 0);
9741 btrfs_release_path(path);
9744 if (back->is_data) {
9748 dback = to_data_backref(back);
9749 if (back->full_backref)
9750 parent = dback->parent;
9754 for (i = 0; i < dback->found_ref; i++) {
9755 /* if parent != 0, we're doing a full backref
9756 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9757 * just makes the backref allocator create a data
9760 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9761 rec->start, rec->max_size,
9765 BTRFS_FIRST_FREE_OBJECTID :
9771 fprintf(stderr, "adding new data backref"
9772 " on %llu %s %llu owner %llu"
9773 " offset %llu found %d\n",
9774 (unsigned long long)rec->start,
9775 back->full_backref ?
9777 back->full_backref ?
9778 (unsigned long long)parent :
9779 (unsigned long long)dback->root,
9780 (unsigned long long)dback->owner,
9781 (unsigned long long)dback->offset,
9785 struct tree_backref *tback;
9787 tback = to_tree_backref(back);
9788 if (back->full_backref)
9789 parent = tback->parent;
9793 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9794 rec->start, rec->max_size,
9795 parent, tback->root, 0, 0);
9796 fprintf(stderr, "adding new tree backref on "
9797 "start %llu len %llu parent %llu root %llu\n",
9798 rec->start, rec->max_size, parent, tback->root);
9801 btrfs_release_path(path);
9805 static struct extent_entry *find_entry(struct list_head *entries,
9806 u64 bytenr, u64 bytes)
9808 struct extent_entry *entry = NULL;
9810 list_for_each_entry(entry, entries, list) {
9811 if (entry->bytenr == bytenr && entry->bytes == bytes)
9818 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9820 struct extent_entry *entry, *best = NULL, *prev = NULL;
9822 list_for_each_entry(entry, entries, list) {
9824 * If there are as many broken entries as entries then we know
9825 * not to trust this particular entry.
9827 if (entry->broken == entry->count)
9831 * Special case, when there are only two entries and 'best' is
9841 * If our current entry == best then we can't be sure our best
9842 * is really the best, so we need to keep searching.
9844 if (best && best->count == entry->count) {
9850 /* Prev == entry, not good enough, have to keep searching */
9851 if (!prev->broken && prev->count == entry->count)
9855 best = (prev->count > entry->count) ? prev : entry;
9856 else if (best->count < entry->count)
9864 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9865 struct data_backref *dback, struct extent_entry *entry)
9867 struct btrfs_trans_handle *trans;
9868 struct btrfs_root *root;
9869 struct btrfs_file_extent_item *fi;
9870 struct extent_buffer *leaf;
9871 struct btrfs_key key;
9875 key.objectid = dback->root;
9876 key.type = BTRFS_ROOT_ITEM_KEY;
9877 key.offset = (u64)-1;
9878 root = btrfs_read_fs_root(info, &key);
9880 fprintf(stderr, "Couldn't find root for our ref\n");
9885 * The backref points to the original offset of the extent if it was
9886 * split, so we need to search down to the offset we have and then walk
9887 * forward until we find the backref we're looking for.
9889 key.objectid = dback->owner;
9890 key.type = BTRFS_EXTENT_DATA_KEY;
9891 key.offset = dback->offset;
9892 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9894 fprintf(stderr, "Error looking up ref %d\n", ret);
9899 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9900 ret = btrfs_next_leaf(root, path);
9902 fprintf(stderr, "Couldn't find our ref, next\n");
9906 leaf = path->nodes[0];
9907 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9908 if (key.objectid != dback->owner ||
9909 key.type != BTRFS_EXTENT_DATA_KEY) {
9910 fprintf(stderr, "Couldn't find our ref, search\n");
9913 fi = btrfs_item_ptr(leaf, path->slots[0],
9914 struct btrfs_file_extent_item);
9915 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9916 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9918 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9923 btrfs_release_path(path);
9925 trans = btrfs_start_transaction(root, 1);
9927 return PTR_ERR(trans);
9930 * Ok we have the key of the file extent we want to fix, now we can cow
9931 * down to the thing and fix it.
9933 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9935 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9936 key.objectid, key.type, key.offset, ret);
9940 fprintf(stderr, "Well that's odd, we just found this key "
9941 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9946 leaf = path->nodes[0];
9947 fi = btrfs_item_ptr(leaf, path->slots[0],
9948 struct btrfs_file_extent_item);
9950 if (btrfs_file_extent_compression(leaf, fi) &&
9951 dback->disk_bytenr != entry->bytenr) {
9952 fprintf(stderr, "Ref doesn't match the record start and is "
9953 "compressed, please take a btrfs-image of this file "
9954 "system and send it to a btrfs developer so they can "
9955 "complete this functionality for bytenr %Lu\n",
9956 dback->disk_bytenr);
9961 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9962 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9963 } else if (dback->disk_bytenr > entry->bytenr) {
9964 u64 off_diff, offset;
9966 off_diff = dback->disk_bytenr - entry->bytenr;
9967 offset = btrfs_file_extent_offset(leaf, fi);
9968 if (dback->disk_bytenr + offset +
9969 btrfs_file_extent_num_bytes(leaf, fi) >
9970 entry->bytenr + entry->bytes) {
9971 fprintf(stderr, "Ref is past the entry end, please "
9972 "take a btrfs-image of this file system and "
9973 "send it to a btrfs developer, ref %Lu\n",
9974 dback->disk_bytenr);
9979 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9980 btrfs_set_file_extent_offset(leaf, fi, offset);
9981 } else if (dback->disk_bytenr < entry->bytenr) {
9984 offset = btrfs_file_extent_offset(leaf, fi);
9985 if (dback->disk_bytenr + offset < entry->bytenr) {
9986 fprintf(stderr, "Ref is before the entry start, please"
9987 " take a btrfs-image of this file system and "
9988 "send it to a btrfs developer, ref %Lu\n",
9989 dback->disk_bytenr);
9994 offset += dback->disk_bytenr;
9995 offset -= entry->bytenr;
9996 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9997 btrfs_set_file_extent_offset(leaf, fi, offset);
10000 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10003 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10004 * only do this if we aren't using compression, otherwise it's a
10007 if (!btrfs_file_extent_compression(leaf, fi))
10008 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10010 printf("ram bytes may be wrong?\n");
10011 btrfs_mark_buffer_dirty(leaf);
10013 err = btrfs_commit_transaction(trans, root);
10014 btrfs_release_path(path);
10015 return ret ? ret : err;
10018 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10019 struct extent_record *rec)
10021 struct extent_backref *back, *tmp;
10022 struct data_backref *dback;
10023 struct extent_entry *entry, *best = NULL;
10024 LIST_HEAD(entries);
10025 int nr_entries = 0;
10026 int broken_entries = 0;
10028 short mismatch = 0;
10031 * Metadata is easy and the backrefs should always agree on bytenr and
10032 * size, if not we've got bigger issues.
10037 rbtree_postorder_for_each_entry_safe(back, tmp,
10038 &rec->backref_tree, node) {
10039 if (back->full_backref || !back->is_data)
10042 dback = to_data_backref(back);
10045 * We only pay attention to backrefs that we found a real
10048 if (dback->found_ref == 0)
10052 * For now we only catch when the bytes don't match, not the
10053 * bytenr. We can easily do this at the same time, but I want
10054 * to have a fs image to test on before we just add repair
10055 * functionality willy-nilly so we know we won't screw up the
10059 entry = find_entry(&entries, dback->disk_bytenr,
10062 entry = malloc(sizeof(struct extent_entry));
10067 memset(entry, 0, sizeof(*entry));
10068 entry->bytenr = dback->disk_bytenr;
10069 entry->bytes = dback->bytes;
10070 list_add_tail(&entry->list, &entries);
10075 * If we only have on entry we may think the entries agree when
10076 * in reality they don't so we have to do some extra checking.
10078 if (dback->disk_bytenr != rec->start ||
10079 dback->bytes != rec->nr || back->broken)
10082 if (back->broken) {
10090 /* Yay all the backrefs agree, carry on good sir */
10091 if (nr_entries <= 1 && !mismatch)
10094 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10095 "%Lu\n", rec->start);
10098 * First we want to see if the backrefs can agree amongst themselves who
10099 * is right, so figure out which one of the entries has the highest
10102 best = find_most_right_entry(&entries);
10105 * Ok so we may have an even split between what the backrefs think, so
10106 * this is where we use the extent ref to see what it thinks.
10109 entry = find_entry(&entries, rec->start, rec->nr);
10110 if (!entry && (!broken_entries || !rec->found_rec)) {
10111 fprintf(stderr, "Backrefs don't agree with each other "
10112 "and extent record doesn't agree with anybody,"
10113 " so we can't fix bytenr %Lu bytes %Lu\n",
10114 rec->start, rec->nr);
10117 } else if (!entry) {
10119 * Ok our backrefs were broken, we'll assume this is the
10120 * correct value and add an entry for this range.
10122 entry = malloc(sizeof(struct extent_entry));
10127 memset(entry, 0, sizeof(*entry));
10128 entry->bytenr = rec->start;
10129 entry->bytes = rec->nr;
10130 list_add_tail(&entry->list, &entries);
10134 best = find_most_right_entry(&entries);
10136 fprintf(stderr, "Backrefs and extent record evenly "
10137 "split on who is right, this is going to "
10138 "require user input to fix bytenr %Lu bytes "
10139 "%Lu\n", rec->start, rec->nr);
10146 * I don't think this can happen currently as we'll abort() if we catch
10147 * this case higher up, but in case somebody removes that we still can't
10148 * deal with it properly here yet, so just bail out of that's the case.
10150 if (best->bytenr != rec->start) {
10151 fprintf(stderr, "Extent start and backref starts don't match, "
10152 "please use btrfs-image on this file system and send "
10153 "it to a btrfs developer so they can make fsck fix "
10154 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10155 rec->start, rec->nr);
10161 * Ok great we all agreed on an extent record, let's go find the real
10162 * references and fix up the ones that don't match.
10164 rbtree_postorder_for_each_entry_safe(back, tmp,
10165 &rec->backref_tree, node) {
10166 if (back->full_backref || !back->is_data)
10169 dback = to_data_backref(back);
10172 * Still ignoring backrefs that don't have a real ref attached
10175 if (dback->found_ref == 0)
10178 if (dback->bytes == best->bytes &&
10179 dback->disk_bytenr == best->bytenr)
10182 ret = repair_ref(info, path, dback, best);
10188 * Ok we messed with the actual refs, which means we need to drop our
10189 * entire cache and go back and rescan. I know this is a huge pain and
10190 * adds a lot of extra work, but it's the only way to be safe. Once all
10191 * the backrefs agree we may not need to do anything to the extent
10196 while (!list_empty(&entries)) {
10197 entry = list_entry(entries.next, struct extent_entry, list);
10198 list_del_init(&entry->list);
10204 static int process_duplicates(struct cache_tree *extent_cache,
10205 struct extent_record *rec)
10207 struct extent_record *good, *tmp;
10208 struct cache_extent *cache;
10212 * If we found a extent record for this extent then return, or if we
10213 * have more than one duplicate we are likely going to need to delete
10216 if (rec->found_rec || rec->num_duplicates > 1)
10219 /* Shouldn't happen but just in case */
10220 BUG_ON(!rec->num_duplicates);
10223 * So this happens if we end up with a backref that doesn't match the
10224 * actual extent entry. So either the backref is bad or the extent
10225 * entry is bad. Either way we want to have the extent_record actually
10226 * reflect what we found in the extent_tree, so we need to take the
10227 * duplicate out and use that as the extent_record since the only way we
10228 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10230 remove_cache_extent(extent_cache, &rec->cache);
10232 good = to_extent_record(rec->dups.next);
10233 list_del_init(&good->list);
10234 INIT_LIST_HEAD(&good->backrefs);
10235 INIT_LIST_HEAD(&good->dups);
10236 good->cache.start = good->start;
10237 good->cache.size = good->nr;
10238 good->content_checked = 0;
10239 good->owner_ref_checked = 0;
10240 good->num_duplicates = 0;
10241 good->refs = rec->refs;
10242 list_splice_init(&rec->backrefs, &good->backrefs);
10244 cache = lookup_cache_extent(extent_cache, good->start,
10248 tmp = container_of(cache, struct extent_record, cache);
10251 * If we find another overlapping extent and it's found_rec is
10252 * set then it's a duplicate and we need to try and delete
10255 if (tmp->found_rec || tmp->num_duplicates > 0) {
10256 if (list_empty(&good->list))
10257 list_add_tail(&good->list,
10258 &duplicate_extents);
10259 good->num_duplicates += tmp->num_duplicates + 1;
10260 list_splice_init(&tmp->dups, &good->dups);
10261 list_del_init(&tmp->list);
10262 list_add_tail(&tmp->list, &good->dups);
10263 remove_cache_extent(extent_cache, &tmp->cache);
10268 * Ok we have another non extent item backed extent rec, so lets
10269 * just add it to this extent and carry on like we did above.
10271 good->refs += tmp->refs;
10272 list_splice_init(&tmp->backrefs, &good->backrefs);
10273 remove_cache_extent(extent_cache, &tmp->cache);
10276 ret = insert_cache_extent(extent_cache, &good->cache);
10279 return good->num_duplicates ? 0 : 1;
10282 static int delete_duplicate_records(struct btrfs_root *root,
10283 struct extent_record *rec)
10285 struct btrfs_trans_handle *trans;
10286 LIST_HEAD(delete_list);
10287 struct btrfs_path path;
10288 struct extent_record *tmp, *good, *n;
10291 struct btrfs_key key;
10293 btrfs_init_path(&path);
10296 /* Find the record that covers all of the duplicates. */
10297 list_for_each_entry(tmp, &rec->dups, list) {
10298 if (good->start < tmp->start)
10300 if (good->nr > tmp->nr)
10303 if (tmp->start + tmp->nr < good->start + good->nr) {
10304 fprintf(stderr, "Ok we have overlapping extents that "
10305 "aren't completely covered by each other, this "
10306 "is going to require more careful thought. "
10307 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10308 tmp->start, tmp->nr, good->start, good->nr);
10315 list_add_tail(&rec->list, &delete_list);
10317 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10320 list_move_tail(&tmp->list, &delete_list);
10323 root = root->fs_info->extent_root;
10324 trans = btrfs_start_transaction(root, 1);
10325 if (IS_ERR(trans)) {
10326 ret = PTR_ERR(trans);
10330 list_for_each_entry(tmp, &delete_list, list) {
10331 if (tmp->found_rec == 0)
10333 key.objectid = tmp->start;
10334 key.type = BTRFS_EXTENT_ITEM_KEY;
10335 key.offset = tmp->nr;
10337 /* Shouldn't happen but just in case */
10338 if (tmp->metadata) {
10339 fprintf(stderr, "Well this shouldn't happen, extent "
10340 "record overlaps but is metadata? "
10341 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10345 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10351 ret = btrfs_del_item(trans, root, &path);
10354 btrfs_release_path(&path);
10357 err = btrfs_commit_transaction(trans, root);
10361 while (!list_empty(&delete_list)) {
10362 tmp = to_extent_record(delete_list.next);
10363 list_del_init(&tmp->list);
10369 while (!list_empty(&rec->dups)) {
10370 tmp = to_extent_record(rec->dups.next);
10371 list_del_init(&tmp->list);
10375 btrfs_release_path(&path);
10377 if (!ret && !nr_del)
10378 rec->num_duplicates = 0;
10380 return ret ? ret : nr_del;
10383 static int find_possible_backrefs(struct btrfs_fs_info *info,
10384 struct btrfs_path *path,
10385 struct cache_tree *extent_cache,
10386 struct extent_record *rec)
10388 struct btrfs_root *root;
10389 struct extent_backref *back, *tmp;
10390 struct data_backref *dback;
10391 struct cache_extent *cache;
10392 struct btrfs_file_extent_item *fi;
10393 struct btrfs_key key;
10397 rbtree_postorder_for_each_entry_safe(back, tmp,
10398 &rec->backref_tree, node) {
10399 /* Don't care about full backrefs (poor unloved backrefs) */
10400 if (back->full_backref || !back->is_data)
10403 dback = to_data_backref(back);
10405 /* We found this one, we don't need to do a lookup */
10406 if (dback->found_ref)
10409 key.objectid = dback->root;
10410 key.type = BTRFS_ROOT_ITEM_KEY;
10411 key.offset = (u64)-1;
10413 root = btrfs_read_fs_root(info, &key);
10415 /* No root, definitely a bad ref, skip */
10416 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10418 /* Other err, exit */
10420 return PTR_ERR(root);
10422 key.objectid = dback->owner;
10423 key.type = BTRFS_EXTENT_DATA_KEY;
10424 key.offset = dback->offset;
10425 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10427 btrfs_release_path(path);
10430 /* Didn't find it, we can carry on */
10435 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10436 struct btrfs_file_extent_item);
10437 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10438 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10439 btrfs_release_path(path);
10440 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10442 struct extent_record *tmp;
10443 tmp = container_of(cache, struct extent_record, cache);
10446 * If we found an extent record for the bytenr for this
10447 * particular backref then we can't add it to our
10448 * current extent record. We only want to add backrefs
10449 * that don't have a corresponding extent item in the
10450 * extent tree since they likely belong to this record
10451 * and we need to fix it if it doesn't match bytenrs.
10453 if (tmp->found_rec)
10457 dback->found_ref += 1;
10458 dback->disk_bytenr = bytenr;
10459 dback->bytes = bytes;
10462 * Set this so the verify backref code knows not to trust the
10463 * values in this backref.
10472 * Record orphan data ref into corresponding root.
10474 * Return 0 if the extent item contains data ref and recorded.
10475 * Return 1 if the extent item contains no useful data ref
10476 * On that case, it may contains only shared_dataref or metadata backref
10477 * or the file extent exists(this should be handled by the extent bytenr
10478 * recovery routine)
10479 * Return <0 if something goes wrong.
10481 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10482 struct extent_record *rec)
10484 struct btrfs_key key;
10485 struct btrfs_root *dest_root;
10486 struct extent_backref *back, *tmp;
10487 struct data_backref *dback;
10488 struct orphan_data_extent *orphan;
10489 struct btrfs_path path;
10490 int recorded_data_ref = 0;
10495 btrfs_init_path(&path);
10496 rbtree_postorder_for_each_entry_safe(back, tmp,
10497 &rec->backref_tree, node) {
10498 if (back->full_backref || !back->is_data ||
10499 !back->found_extent_tree)
10501 dback = to_data_backref(back);
10502 if (dback->found_ref)
10504 key.objectid = dback->root;
10505 key.type = BTRFS_ROOT_ITEM_KEY;
10506 key.offset = (u64)-1;
10508 dest_root = btrfs_read_fs_root(fs_info, &key);
10510 /* For non-exist root we just skip it */
10511 if (IS_ERR(dest_root) || !dest_root)
10514 key.objectid = dback->owner;
10515 key.type = BTRFS_EXTENT_DATA_KEY;
10516 key.offset = dback->offset;
10518 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10519 btrfs_release_path(&path);
10521 * For ret < 0, it's OK since the fs-tree may be corrupted,
10522 * we need to record it for inode/file extent rebuild.
10523 * For ret > 0, we record it only for file extent rebuild.
10524 * For ret == 0, the file extent exists but only bytenr
10525 * mismatch, let the original bytenr fix routine to handle,
10531 orphan = malloc(sizeof(*orphan));
10536 INIT_LIST_HEAD(&orphan->list);
10537 orphan->root = dback->root;
10538 orphan->objectid = dback->owner;
10539 orphan->offset = dback->offset;
10540 orphan->disk_bytenr = rec->cache.start;
10541 orphan->disk_len = rec->cache.size;
10542 list_add(&dest_root->orphan_data_extents, &orphan->list);
10543 recorded_data_ref = 1;
10546 btrfs_release_path(&path);
10548 return !recorded_data_ref;
10554 * when an incorrect extent item is found, this will delete
10555 * all of the existing entries for it and recreate them
10556 * based on what the tree scan found.
10558 static int fixup_extent_refs(struct btrfs_fs_info *info,
10559 struct cache_tree *extent_cache,
10560 struct extent_record *rec)
10562 struct btrfs_trans_handle *trans = NULL;
10564 struct btrfs_path path;
10565 struct cache_extent *cache;
10566 struct extent_backref *back, *tmp;
10570 if (rec->flag_block_full_backref)
10571 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10573 btrfs_init_path(&path);
10574 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10576 * Sometimes the backrefs themselves are so broken they don't
10577 * get attached to any meaningful rec, so first go back and
10578 * check any of our backrefs that we couldn't find and throw
10579 * them into the list if we find the backref so that
10580 * verify_backrefs can figure out what to do.
10582 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10587 /* step one, make sure all of the backrefs agree */
10588 ret = verify_backrefs(info, &path, rec);
10592 trans = btrfs_start_transaction(info->extent_root, 1);
10593 if (IS_ERR(trans)) {
10594 ret = PTR_ERR(trans);
10598 /* step two, delete all the existing records */
10599 ret = delete_extent_records(trans, info->extent_root, &path,
10605 /* was this block corrupt? If so, don't add references to it */
10606 cache = lookup_cache_extent(info->corrupt_blocks,
10607 rec->start, rec->max_size);
10613 /* step three, recreate all the refs we did find */
10614 rbtree_postorder_for_each_entry_safe(back, tmp,
10615 &rec->backref_tree, node) {
10617 * if we didn't find any references, don't create a
10618 * new extent record
10620 if (!back->found_ref)
10623 rec->bad_full_backref = 0;
10624 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10632 int err = btrfs_commit_transaction(trans, info->extent_root);
10638 fprintf(stderr, "Repaired extent references for %llu\n",
10639 (unsigned long long)rec->start);
10641 btrfs_release_path(&path);
10645 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10646 struct extent_record *rec)
10648 struct btrfs_trans_handle *trans;
10649 struct btrfs_root *root = fs_info->extent_root;
10650 struct btrfs_path path;
10651 struct btrfs_extent_item *ei;
10652 struct btrfs_key key;
10656 key.objectid = rec->start;
10657 if (rec->metadata) {
10658 key.type = BTRFS_METADATA_ITEM_KEY;
10659 key.offset = rec->info_level;
10661 key.type = BTRFS_EXTENT_ITEM_KEY;
10662 key.offset = rec->max_size;
10665 trans = btrfs_start_transaction(root, 0);
10667 return PTR_ERR(trans);
10669 btrfs_init_path(&path);
10670 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10672 btrfs_release_path(&path);
10673 btrfs_commit_transaction(trans, root);
10676 fprintf(stderr, "Didn't find extent for %llu\n",
10677 (unsigned long long)rec->start);
10678 btrfs_release_path(&path);
10679 btrfs_commit_transaction(trans, root);
10683 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10684 struct btrfs_extent_item);
10685 flags = btrfs_extent_flags(path.nodes[0], ei);
10686 if (rec->flag_block_full_backref) {
10687 fprintf(stderr, "setting full backref on %llu\n",
10688 (unsigned long long)key.objectid);
10689 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10691 fprintf(stderr, "clearing full backref on %llu\n",
10692 (unsigned long long)key.objectid);
10693 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10695 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10696 btrfs_mark_buffer_dirty(path.nodes[0]);
10697 btrfs_release_path(&path);
10698 ret = btrfs_commit_transaction(trans, root);
10700 fprintf(stderr, "Repaired extent flags for %llu\n",
10701 (unsigned long long)rec->start);
10706 /* right now we only prune from the extent allocation tree */
10707 static int prune_one_block(struct btrfs_trans_handle *trans,
10708 struct btrfs_fs_info *info,
10709 struct btrfs_corrupt_block *corrupt)
10712 struct btrfs_path path;
10713 struct extent_buffer *eb;
10717 int level = corrupt->level + 1;
10719 btrfs_init_path(&path);
10721 /* we want to stop at the parent to our busted block */
10722 path.lowest_level = level;
10724 ret = btrfs_search_slot(trans, info->extent_root,
10725 &corrupt->key, &path, -1, 1);
10730 eb = path.nodes[level];
10737 * hopefully the search gave us the block we want to prune,
10738 * lets try that first
10740 slot = path.slots[level];
10741 found = btrfs_node_blockptr(eb, slot);
10742 if (found == corrupt->cache.start)
10745 nritems = btrfs_header_nritems(eb);
10747 /* the search failed, lets scan this node and hope we find it */
10748 for (slot = 0; slot < nritems; slot++) {
10749 found = btrfs_node_blockptr(eb, slot);
10750 if (found == corrupt->cache.start)
10754 * we couldn't find the bad block. TODO, search all the nodes for pointers
10757 if (eb == info->extent_root->node) {
10762 btrfs_release_path(&path);
10767 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10768 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10771 btrfs_release_path(&path);
10775 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10777 struct btrfs_trans_handle *trans = NULL;
10778 struct cache_extent *cache;
10779 struct btrfs_corrupt_block *corrupt;
10782 cache = search_cache_extent(info->corrupt_blocks, 0);
10786 trans = btrfs_start_transaction(info->extent_root, 1);
10788 return PTR_ERR(trans);
10790 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10791 prune_one_block(trans, info, corrupt);
10792 remove_cache_extent(info->corrupt_blocks, cache);
10795 return btrfs_commit_transaction(trans, info->extent_root);
10799 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10801 struct btrfs_block_group_cache *cache;
10806 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10807 &start, &end, EXTENT_DIRTY);
10810 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10815 cache = btrfs_lookup_first_block_group(fs_info, start);
10820 start = cache->key.objectid + cache->key.offset;
10824 static int check_extent_refs(struct btrfs_root *root,
10825 struct cache_tree *extent_cache)
10827 struct extent_record *rec;
10828 struct cache_extent *cache;
10835 * if we're doing a repair, we have to make sure
10836 * we don't allocate from the problem extents.
10837 * In the worst case, this will be all the
10838 * extents in the FS
10840 cache = search_cache_extent(extent_cache, 0);
10842 rec = container_of(cache, struct extent_record, cache);
10843 set_extent_dirty(root->fs_info->excluded_extents,
10845 rec->start + rec->max_size - 1);
10846 cache = next_cache_extent(cache);
10849 /* pin down all the corrupted blocks too */
10850 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10852 set_extent_dirty(root->fs_info->excluded_extents,
10854 cache->start + cache->size - 1);
10855 cache = next_cache_extent(cache);
10857 prune_corrupt_blocks(root->fs_info);
10858 reset_cached_block_groups(root->fs_info);
10861 reset_cached_block_groups(root->fs_info);
10864 * We need to delete any duplicate entries we find first otherwise we
10865 * could mess up the extent tree when we have backrefs that actually
10866 * belong to a different extent item and not the weird duplicate one.
10868 while (repair && !list_empty(&duplicate_extents)) {
10869 rec = to_extent_record(duplicate_extents.next);
10870 list_del_init(&rec->list);
10872 /* Sometimes we can find a backref before we find an actual
10873 * extent, so we need to process it a little bit to see if there
10874 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10875 * if this is a backref screwup. If we need to delete stuff
10876 * process_duplicates() will return 0, otherwise it will return
10879 if (process_duplicates(extent_cache, rec))
10881 ret = delete_duplicate_records(root, rec);
10885 * delete_duplicate_records will return the number of entries
10886 * deleted, so if it's greater than 0 then we know we actually
10887 * did something and we need to remove.
10900 cache = search_cache_extent(extent_cache, 0);
10903 rec = container_of(cache, struct extent_record, cache);
10904 if (rec->num_duplicates) {
10905 fprintf(stderr, "extent item %llu has multiple extent "
10906 "items\n", (unsigned long long)rec->start);
10910 if (rec->refs != rec->extent_item_refs) {
10911 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10912 (unsigned long long)rec->start,
10913 (unsigned long long)rec->nr);
10914 fprintf(stderr, "extent item %llu, found %llu\n",
10915 (unsigned long long)rec->extent_item_refs,
10916 (unsigned long long)rec->refs);
10917 ret = record_orphan_data_extents(root->fs_info, rec);
10923 if (all_backpointers_checked(rec, 1)) {
10924 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10925 (unsigned long long)rec->start,
10926 (unsigned long long)rec->nr);
10930 if (!rec->owner_ref_checked) {
10931 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10932 (unsigned long long)rec->start,
10933 (unsigned long long)rec->nr);
10938 if (repair && fix) {
10939 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10945 if (rec->bad_full_backref) {
10946 fprintf(stderr, "bad full backref, on [%llu]\n",
10947 (unsigned long long)rec->start);
10949 ret = fixup_extent_flags(root->fs_info, rec);
10957 * Although it's not a extent ref's problem, we reuse this
10958 * routine for error reporting.
10959 * No repair function yet.
10961 if (rec->crossing_stripes) {
10963 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10964 rec->start, rec->start + rec->max_size);
10968 if (rec->wrong_chunk_type) {
10970 "bad extent [%llu, %llu), type mismatch with chunk\n",
10971 rec->start, rec->start + rec->max_size);
10976 remove_cache_extent(extent_cache, cache);
10977 free_all_extent_backrefs(rec);
10978 if (!init_extent_tree && repair && (!cur_err || fix))
10979 clear_extent_dirty(root->fs_info->excluded_extents,
10981 rec->start + rec->max_size - 1);
10986 if (ret && ret != -EAGAIN) {
10987 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10990 struct btrfs_trans_handle *trans;
10992 root = root->fs_info->extent_root;
10993 trans = btrfs_start_transaction(root, 1);
10994 if (IS_ERR(trans)) {
10995 ret = PTR_ERR(trans);
10999 ret = btrfs_fix_block_accounting(trans, root);
11002 ret = btrfs_commit_transaction(trans, root);
11014 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11018 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11019 stripe_size = length;
11020 stripe_size /= num_stripes;
11021 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11022 stripe_size = length * 2;
11023 stripe_size /= num_stripes;
11024 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11025 stripe_size = length;
11026 stripe_size /= (num_stripes - 1);
11027 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11028 stripe_size = length;
11029 stripe_size /= (num_stripes - 2);
11031 stripe_size = length;
11033 return stripe_size;
11037 * Check the chunk with its block group/dev list ref:
11038 * Return 0 if all refs seems valid.
11039 * Return 1 if part of refs seems valid, need later check for rebuild ref
11040 * like missing block group and needs to search extent tree to rebuild them.
11041 * Return -1 if essential refs are missing and unable to rebuild.
11043 static int check_chunk_refs(struct chunk_record *chunk_rec,
11044 struct block_group_tree *block_group_cache,
11045 struct device_extent_tree *dev_extent_cache,
11048 struct cache_extent *block_group_item;
11049 struct block_group_record *block_group_rec;
11050 struct cache_extent *dev_extent_item;
11051 struct device_extent_record *dev_extent_rec;
11055 int metadump_v2 = 0;
11059 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11061 chunk_rec->length);
11062 if (block_group_item) {
11063 block_group_rec = container_of(block_group_item,
11064 struct block_group_record,
11066 if (chunk_rec->length != block_group_rec->offset ||
11067 chunk_rec->offset != block_group_rec->objectid ||
11069 chunk_rec->type_flags != block_group_rec->flags)) {
11072 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11073 chunk_rec->objectid,
11078 chunk_rec->type_flags,
11079 block_group_rec->objectid,
11080 block_group_rec->type,
11081 block_group_rec->offset,
11082 block_group_rec->offset,
11083 block_group_rec->objectid,
11084 block_group_rec->flags);
11087 list_del_init(&block_group_rec->list);
11088 chunk_rec->bg_rec = block_group_rec;
11093 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11094 chunk_rec->objectid,
11099 chunk_rec->type_flags);
11106 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11107 chunk_rec->num_stripes);
11108 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11109 devid = chunk_rec->stripes[i].devid;
11110 offset = chunk_rec->stripes[i].offset;
11111 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11112 devid, offset, length);
11113 if (dev_extent_item) {
11114 dev_extent_rec = container_of(dev_extent_item,
11115 struct device_extent_record,
11117 if (dev_extent_rec->objectid != devid ||
11118 dev_extent_rec->offset != offset ||
11119 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11120 dev_extent_rec->length != length) {
11123 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11124 chunk_rec->objectid,
11127 chunk_rec->stripes[i].devid,
11128 chunk_rec->stripes[i].offset,
11129 dev_extent_rec->objectid,
11130 dev_extent_rec->offset,
11131 dev_extent_rec->length);
11134 list_move(&dev_extent_rec->chunk_list,
11135 &chunk_rec->dextents);
11140 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11141 chunk_rec->objectid,
11144 chunk_rec->stripes[i].devid,
11145 chunk_rec->stripes[i].offset);
11152 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11153 int check_chunks(struct cache_tree *chunk_cache,
11154 struct block_group_tree *block_group_cache,
11155 struct device_extent_tree *dev_extent_cache,
11156 struct list_head *good, struct list_head *bad,
11157 struct list_head *rebuild, int silent)
11159 struct cache_extent *chunk_item;
11160 struct chunk_record *chunk_rec;
11161 struct block_group_record *bg_rec;
11162 struct device_extent_record *dext_rec;
11166 chunk_item = first_cache_extent(chunk_cache);
11167 while (chunk_item) {
11168 chunk_rec = container_of(chunk_item, struct chunk_record,
11170 err = check_chunk_refs(chunk_rec, block_group_cache,
11171 dev_extent_cache, silent);
11174 if (err == 0 && good)
11175 list_add_tail(&chunk_rec->list, good);
11176 if (err > 0 && rebuild)
11177 list_add_tail(&chunk_rec->list, rebuild);
11178 if (err < 0 && bad)
11179 list_add_tail(&chunk_rec->list, bad);
11180 chunk_item = next_cache_extent(chunk_item);
11183 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11186 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11194 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11198 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11199 dext_rec->objectid,
11209 static int check_device_used(struct device_record *dev_rec,
11210 struct device_extent_tree *dext_cache)
11212 struct cache_extent *cache;
11213 struct device_extent_record *dev_extent_rec;
11214 u64 total_byte = 0;
11216 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11218 dev_extent_rec = container_of(cache,
11219 struct device_extent_record,
11221 if (dev_extent_rec->objectid != dev_rec->devid)
11224 list_del_init(&dev_extent_rec->device_list);
11225 total_byte += dev_extent_rec->length;
11226 cache = next_cache_extent(cache);
11229 if (total_byte != dev_rec->byte_used) {
11231 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11232 total_byte, dev_rec->byte_used, dev_rec->objectid,
11233 dev_rec->type, dev_rec->offset);
11241 * Extra (optional) check for dev_item size to report possbile problem on a new
11244 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11246 if (!IS_ALIGNED(total_bytes, sectorsize)) {
11248 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11249 devid, total_bytes, sectorsize);
11251 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11252 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11257 * Unlike device size alignment check above, some super total_bytes check
11258 * failure can lead to mount failure for newer kernel.
11260 * So this function will return the error for a fatal super total_bytes problem.
11262 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11264 struct btrfs_device *dev;
11265 struct list_head *dev_list = &fs_info->fs_devices->devices;
11266 u64 total_bytes = 0;
11267 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11269 list_for_each_entry(dev, dev_list, dev_list)
11270 total_bytes += dev->total_bytes;
11272 /* Important check, which can cause unmountable fs */
11273 if (super_bytes < total_bytes) {
11274 error("super total bytes %llu smaller than real device(s) size %llu",
11275 super_bytes, total_bytes);
11276 error("mounting this fs may fail for newer kernels");
11277 error("this can be fixed by 'btrfs rescue fix-device-size'");
11282 * Optional check, just to make everything aligned and match with each
11285 * For a btrfs-image restored fs, we don't need to check it anyway.
11287 if (btrfs_super_flags(fs_info->super_copy) &
11288 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11290 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11291 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11292 super_bytes != total_bytes) {
11293 warning("minor unaligned/mismatch device size detected");
11295 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11300 /* check btrfs_dev_item -> btrfs_dev_extent */
11301 static int check_devices(struct rb_root *dev_cache,
11302 struct device_extent_tree *dev_extent_cache)
11304 struct rb_node *dev_node;
11305 struct device_record *dev_rec;
11306 struct device_extent_record *dext_rec;
11310 dev_node = rb_first(dev_cache);
11312 dev_rec = container_of(dev_node, struct device_record, node);
11313 err = check_device_used(dev_rec, dev_extent_cache);
11317 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11318 global_info->sectorsize);
11319 dev_node = rb_next(dev_node);
11321 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11324 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11325 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11332 static int add_root_item_to_list(struct list_head *head,
11333 u64 objectid, u64 bytenr, u64 last_snapshot,
11334 u8 level, u8 drop_level,
11335 struct btrfs_key *drop_key)
11338 struct root_item_record *ri_rec;
11339 ri_rec = malloc(sizeof(*ri_rec));
11342 ri_rec->bytenr = bytenr;
11343 ri_rec->objectid = objectid;
11344 ri_rec->level = level;
11345 ri_rec->drop_level = drop_level;
11346 ri_rec->last_snapshot = last_snapshot;
11348 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11349 list_add_tail(&ri_rec->list, head);
11354 static void free_root_item_list(struct list_head *list)
11356 struct root_item_record *ri_rec;
11358 while (!list_empty(list)) {
11359 ri_rec = list_first_entry(list, struct root_item_record,
11361 list_del_init(&ri_rec->list);
11366 static int deal_root_from_list(struct list_head *list,
11367 struct btrfs_root *root,
11368 struct block_info *bits,
11370 struct cache_tree *pending,
11371 struct cache_tree *seen,
11372 struct cache_tree *reada,
11373 struct cache_tree *nodes,
11374 struct cache_tree *extent_cache,
11375 struct cache_tree *chunk_cache,
11376 struct rb_root *dev_cache,
11377 struct block_group_tree *block_group_cache,
11378 struct device_extent_tree *dev_extent_cache)
11383 while (!list_empty(list)) {
11384 struct root_item_record *rec;
11385 struct extent_buffer *buf;
11386 rec = list_entry(list->next,
11387 struct root_item_record, list);
11389 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11390 if (!extent_buffer_uptodate(buf)) {
11391 free_extent_buffer(buf);
11395 ret = add_root_to_pending(buf, extent_cache, pending,
11396 seen, nodes, rec->objectid);
11400 * To rebuild extent tree, we need deal with snapshot
11401 * one by one, otherwise we deal with node firstly which
11402 * can maximize readahead.
11405 ret = run_next_block(root, bits, bits_nr, &last,
11406 pending, seen, reada, nodes,
11407 extent_cache, chunk_cache,
11408 dev_cache, block_group_cache,
11409 dev_extent_cache, rec);
11413 free_extent_buffer(buf);
11414 list_del(&rec->list);
11420 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11421 reada, nodes, extent_cache, chunk_cache,
11422 dev_cache, block_group_cache,
11423 dev_extent_cache, NULL);
11433 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11435 struct rb_root dev_cache;
11436 struct cache_tree chunk_cache;
11437 struct block_group_tree block_group_cache;
11438 struct device_extent_tree dev_extent_cache;
11439 struct cache_tree extent_cache;
11440 struct cache_tree seen;
11441 struct cache_tree pending;
11442 struct cache_tree reada;
11443 struct cache_tree nodes;
11444 struct extent_io_tree excluded_extents;
11445 struct cache_tree corrupt_blocks;
11446 struct btrfs_path path;
11447 struct btrfs_key key;
11448 struct btrfs_key found_key;
11450 struct block_info *bits;
11452 struct extent_buffer *leaf;
11454 struct btrfs_root_item ri;
11455 struct list_head dropping_trees;
11456 struct list_head normal_trees;
11457 struct btrfs_root *root1;
11458 struct btrfs_root *root;
11462 root = fs_info->fs_root;
11463 dev_cache = RB_ROOT;
11464 cache_tree_init(&chunk_cache);
11465 block_group_tree_init(&block_group_cache);
11466 device_extent_tree_init(&dev_extent_cache);
11468 cache_tree_init(&extent_cache);
11469 cache_tree_init(&seen);
11470 cache_tree_init(&pending);
11471 cache_tree_init(&nodes);
11472 cache_tree_init(&reada);
11473 cache_tree_init(&corrupt_blocks);
11474 extent_io_tree_init(&excluded_extents);
11475 INIT_LIST_HEAD(&dropping_trees);
11476 INIT_LIST_HEAD(&normal_trees);
11479 fs_info->excluded_extents = &excluded_extents;
11480 fs_info->fsck_extent_cache = &extent_cache;
11481 fs_info->free_extent_hook = free_extent_hook;
11482 fs_info->corrupt_blocks = &corrupt_blocks;
11486 bits = malloc(bits_nr * sizeof(struct block_info));
11492 if (ctx.progress_enabled) {
11493 ctx.tp = TASK_EXTENTS;
11494 task_start(ctx.info);
11498 root1 = fs_info->tree_root;
11499 level = btrfs_header_level(root1->node);
11500 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11501 root1->node->start, 0, level, 0, NULL);
11504 root1 = fs_info->chunk_root;
11505 level = btrfs_header_level(root1->node);
11506 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11507 root1->node->start, 0, level, 0, NULL);
11510 btrfs_init_path(&path);
11513 key.type = BTRFS_ROOT_ITEM_KEY;
11514 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11518 leaf = path.nodes[0];
11519 slot = path.slots[0];
11520 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11521 ret = btrfs_next_leaf(root, &path);
11524 leaf = path.nodes[0];
11525 slot = path.slots[0];
11527 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11528 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11529 unsigned long offset;
11532 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11533 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11534 last_snapshot = btrfs_root_last_snapshot(&ri);
11535 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11536 level = btrfs_root_level(&ri);
11537 ret = add_root_item_to_list(&normal_trees,
11538 found_key.objectid,
11539 btrfs_root_bytenr(&ri),
11540 last_snapshot, level,
11545 level = btrfs_root_level(&ri);
11546 objectid = found_key.objectid;
11547 btrfs_disk_key_to_cpu(&found_key,
11548 &ri.drop_progress);
11549 ret = add_root_item_to_list(&dropping_trees,
11551 btrfs_root_bytenr(&ri),
11552 last_snapshot, level,
11553 ri.drop_level, &found_key);
11560 btrfs_release_path(&path);
11563 * check_block can return -EAGAIN if it fixes something, please keep
11564 * this in mind when dealing with return values from these functions, if
11565 * we get -EAGAIN we want to fall through and restart the loop.
11567 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11568 &seen, &reada, &nodes, &extent_cache,
11569 &chunk_cache, &dev_cache, &block_group_cache,
11570 &dev_extent_cache);
11572 if (ret == -EAGAIN)
11576 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11577 &pending, &seen, &reada, &nodes,
11578 &extent_cache, &chunk_cache, &dev_cache,
11579 &block_group_cache, &dev_extent_cache);
11581 if (ret == -EAGAIN)
11586 ret = check_chunks(&chunk_cache, &block_group_cache,
11587 &dev_extent_cache, NULL, NULL, NULL, 0);
11589 if (ret == -EAGAIN)
11594 ret = check_extent_refs(root, &extent_cache);
11596 if (ret == -EAGAIN)
11601 ret = check_devices(&dev_cache, &dev_extent_cache);
11606 task_stop(ctx.info);
11608 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11609 extent_io_tree_cleanup(&excluded_extents);
11610 fs_info->fsck_extent_cache = NULL;
11611 fs_info->free_extent_hook = NULL;
11612 fs_info->corrupt_blocks = NULL;
11613 fs_info->excluded_extents = NULL;
11616 free_chunk_cache_tree(&chunk_cache);
11617 free_device_cache_tree(&dev_cache);
11618 free_block_group_tree(&block_group_cache);
11619 free_device_extent_tree(&dev_extent_cache);
11620 free_extent_cache_tree(&seen);
11621 free_extent_cache_tree(&pending);
11622 free_extent_cache_tree(&reada);
11623 free_extent_cache_tree(&nodes);
11624 free_root_item_list(&normal_trees);
11625 free_root_item_list(&dropping_trees);
11628 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11629 free_extent_cache_tree(&seen);
11630 free_extent_cache_tree(&pending);
11631 free_extent_cache_tree(&reada);
11632 free_extent_cache_tree(&nodes);
11633 free_chunk_cache_tree(&chunk_cache);
11634 free_block_group_tree(&block_group_cache);
11635 free_device_cache_tree(&dev_cache);
11636 free_device_extent_tree(&dev_extent_cache);
11637 free_extent_record_cache(&extent_cache);
11638 free_root_item_list(&normal_trees);
11639 free_root_item_list(&dropping_trees);
11640 extent_io_tree_cleanup(&excluded_extents);
11644 static int check_extent_inline_ref(struct extent_buffer *eb,
11645 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11648 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11651 case BTRFS_TREE_BLOCK_REF_KEY:
11652 case BTRFS_EXTENT_DATA_REF_KEY:
11653 case BTRFS_SHARED_BLOCK_REF_KEY:
11654 case BTRFS_SHARED_DATA_REF_KEY:
11658 error("extent[%llu %u %llu] has unknown ref type: %d",
11659 key->objectid, key->type, key->offset, type);
11660 ret = UNKNOWN_TYPE;
11668 * Check backrefs of a tree block given by @bytenr or @eb.
11670 * @root: the root containing the @bytenr or @eb
11671 * @eb: tree block extent buffer, can be NULL
11672 * @bytenr: bytenr of the tree block to search
11673 * @level: tree level of the tree block
11674 * @owner: owner of the tree block
11676 * Return >0 for any error found and output error message
11677 * Return 0 for no error found
11679 static int check_tree_block_ref(struct btrfs_root *root,
11680 struct extent_buffer *eb, u64 bytenr,
11681 int level, u64 owner, struct node_refs *nrefs)
11683 struct btrfs_key key;
11684 struct btrfs_root *extent_root = root->fs_info->extent_root;
11685 struct btrfs_path path;
11686 struct btrfs_extent_item *ei;
11687 struct btrfs_extent_inline_ref *iref;
11688 struct extent_buffer *leaf;
11693 int root_level = btrfs_header_level(root->node);
11695 u32 nodesize = root->fs_info->nodesize;
11698 int tree_reloc_root = 0;
11705 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11706 btrfs_header_bytenr(root->node) == bytenr)
11707 tree_reloc_root = 1;
11708 btrfs_init_path(&path);
11709 key.objectid = bytenr;
11710 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11711 key.type = BTRFS_METADATA_ITEM_KEY;
11713 key.type = BTRFS_EXTENT_ITEM_KEY;
11714 key.offset = (u64)-1;
11716 /* Search for the backref in extent tree */
11717 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11719 err |= BACKREF_MISSING;
11722 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11724 err |= BACKREF_MISSING;
11728 leaf = path.nodes[0];
11729 slot = path.slots[0];
11730 btrfs_item_key_to_cpu(leaf, &key, slot);
11732 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11734 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11735 skinny_level = (int)key.offset;
11736 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11738 struct btrfs_tree_block_info *info;
11740 info = (struct btrfs_tree_block_info *)(ei + 1);
11741 skinny_level = btrfs_tree_block_level(leaf, info);
11742 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11751 * Due to the feature of shared tree blocks, if the upper node
11752 * is a fs root or shared node, the extent of checked node may
11753 * not be updated until the next CoW.
11756 strict = should_check_extent_strictly(root, nrefs,
11758 if (!(btrfs_extent_flags(leaf, ei) &
11759 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11761 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11762 key.objectid, nodesize,
11763 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11764 err = BACKREF_MISMATCH;
11766 header_gen = btrfs_header_generation(eb);
11767 extent_gen = btrfs_extent_generation(leaf, ei);
11768 if (header_gen != extent_gen) {
11770 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11771 key.objectid, nodesize, header_gen,
11773 err = BACKREF_MISMATCH;
11775 if (level != skinny_level) {
11777 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11778 key.objectid, nodesize, level, skinny_level);
11779 err = BACKREF_MISMATCH;
11781 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11783 "extent[%llu %u] is referred by other roots than %llu",
11784 key.objectid, nodesize, root->objectid);
11785 err = BACKREF_MISMATCH;
11790 * Iterate the extent/metadata item to find the exact backref
11792 item_size = btrfs_item_size_nr(leaf, slot);
11793 ptr = (unsigned long)iref;
11794 end = (unsigned long)ei + item_size;
11796 while (ptr < end) {
11797 iref = (struct btrfs_extent_inline_ref *)ptr;
11798 type = btrfs_extent_inline_ref_type(leaf, iref);
11799 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11801 ret = check_extent_inline_ref(leaf, &key, iref);
11806 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11807 if (offset == root->objectid)
11809 if (!strict && owner == offset)
11811 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11813 * Backref of tree reloc root points to itself, no need
11814 * to check backref any more.
11816 if (tree_reloc_root) {
11820 * Check if the backref points to valid
11823 found_ref = !check_tree_block_ref( root, NULL,
11824 offset, level + 1, owner,
11831 ptr += btrfs_extent_inline_ref_size(type);
11835 * Inlined extent item doesn't have what we need, check
11836 * TREE_BLOCK_REF_KEY
11839 btrfs_release_path(&path);
11840 key.objectid = bytenr;
11841 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11842 key.offset = root->objectid;
11844 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11849 err |= BACKREF_MISSING;
11851 btrfs_release_path(&path);
11852 if (nrefs && strict &&
11853 level < root_level && nrefs->full_backref[level + 1])
11854 parent = nrefs->bytenr[level + 1];
11855 if (eb && (err & BACKREF_MISSING))
11857 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11858 bytenr, nodesize, owner, level,
11859 parent ? "parent" : "root",
11860 parent ? parent : root->objectid);
11865 * If @err contains BACKREF_MISSING then add extent of the
11866 * file_extent_data_item.
11868 * Returns error bits after reapir.
11870 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11871 struct btrfs_root *root,
11872 struct btrfs_path *pathp,
11873 struct node_refs *nrefs,
11876 struct btrfs_file_extent_item *fi;
11877 struct btrfs_key fi_key;
11878 struct btrfs_key key;
11879 struct btrfs_extent_item *ei;
11880 struct btrfs_path path;
11881 struct btrfs_root *extent_root = root->fs_info->extent_root;
11882 struct extent_buffer *eb;
11894 eb = pathp->nodes[0];
11895 slot = pathp->slots[0];
11896 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11897 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11899 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11900 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11903 file_offset = fi_key.offset;
11904 generation = btrfs_file_extent_generation(eb, fi);
11905 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11906 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11907 extent_offset = btrfs_file_extent_offset(eb, fi);
11908 offset = file_offset - extent_offset;
11910 /* now repair only adds backref */
11911 if ((err & BACKREF_MISSING) == 0)
11914 /* search extent item */
11915 key.objectid = disk_bytenr;
11916 key.type = BTRFS_EXTENT_ITEM_KEY;
11917 key.offset = num_bytes;
11919 btrfs_init_path(&path);
11920 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11926 /* insert an extent item */
11928 key.objectid = disk_bytenr;
11929 key.type = BTRFS_EXTENT_ITEM_KEY;
11930 key.offset = num_bytes;
11931 size = sizeof(*ei);
11933 btrfs_release_path(&path);
11934 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11938 eb = path.nodes[0];
11939 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11941 btrfs_set_extent_refs(eb, ei, 0);
11942 btrfs_set_extent_generation(eb, ei, generation);
11943 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11945 btrfs_mark_buffer_dirty(eb);
11946 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11948 btrfs_release_path(&path);
11951 if (nrefs->full_backref[0])
11952 parent = btrfs_header_bytenr(eb);
11956 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11958 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11962 "failed to increase extent data backref[%llu %llu] root %llu",
11963 disk_bytenr, num_bytes, root->objectid);
11966 printf("Add one extent data backref [%llu %llu]\n",
11967 disk_bytenr, num_bytes);
11970 err &= ~BACKREF_MISSING;
11973 error("can't repair root %llu extent data item[%llu %llu]",
11974 root->objectid, disk_bytenr, num_bytes);
11979 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11981 * Return >0 any error found and output error message
11982 * Return 0 for no error found
11984 static int check_extent_data_item(struct btrfs_root *root,
11985 struct btrfs_path *pathp,
11986 struct node_refs *nrefs, int account_bytes)
11988 struct btrfs_file_extent_item *fi;
11989 struct extent_buffer *eb = pathp->nodes[0];
11990 struct btrfs_path path;
11991 struct btrfs_root *extent_root = root->fs_info->extent_root;
11992 struct btrfs_key fi_key;
11993 struct btrfs_key dbref_key;
11994 struct extent_buffer *leaf;
11995 struct btrfs_extent_item *ei;
11996 struct btrfs_extent_inline_ref *iref;
11997 struct btrfs_extent_data_ref *dref;
12000 u64 disk_num_bytes;
12001 u64 extent_num_bytes;
12008 int found_dbackref = 0;
12009 int slot = pathp->slots[0];
12014 btrfs_item_key_to_cpu(eb, &fi_key, slot);
12015 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12017 /* Nothing to check for hole and inline data extents */
12018 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12019 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12022 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12023 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12024 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12026 /* Check unaligned disk_num_bytes and num_bytes */
12027 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12029 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12030 fi_key.objectid, fi_key.offset, disk_num_bytes,
12031 root->fs_info->sectorsize);
12032 err |= BYTES_UNALIGNED;
12033 } else if (account_bytes) {
12034 data_bytes_allocated += disk_num_bytes;
12036 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12038 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12039 fi_key.objectid, fi_key.offset, extent_num_bytes,
12040 root->fs_info->sectorsize);
12041 err |= BYTES_UNALIGNED;
12042 } else if (account_bytes) {
12043 data_bytes_referenced += extent_num_bytes;
12045 owner = btrfs_header_owner(eb);
12047 /* Check the extent item of the file extent in extent tree */
12048 btrfs_init_path(&path);
12049 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12050 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12051 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12053 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12057 leaf = path.nodes[0];
12058 slot = path.slots[0];
12059 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12061 extent_flags = btrfs_extent_flags(leaf, ei);
12063 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12065 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12066 disk_bytenr, disk_num_bytes,
12067 BTRFS_EXTENT_FLAG_DATA);
12068 err |= BACKREF_MISMATCH;
12071 /* Check data backref inside that extent item */
12072 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12073 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12074 ptr = (unsigned long)iref;
12075 end = (unsigned long)ei + item_size;
12076 strict = should_check_extent_strictly(root, nrefs, -1);
12078 while (ptr < end) {
12079 iref = (struct btrfs_extent_inline_ref *)ptr;
12080 type = btrfs_extent_inline_ref_type(leaf, iref);
12081 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12083 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12088 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12089 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12090 if (ref_root == root->objectid)
12091 found_dbackref = 1;
12092 else if (!strict && owner == ref_root)
12093 found_dbackref = 1;
12094 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12095 found_dbackref = !check_tree_block_ref(root, NULL,
12096 btrfs_extent_inline_ref_offset(leaf, iref),
12100 if (found_dbackref)
12102 ptr += btrfs_extent_inline_ref_size(type);
12105 if (!found_dbackref) {
12106 btrfs_release_path(&path);
12108 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12109 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12110 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12111 dbref_key.offset = hash_extent_data_ref(root->objectid,
12112 fi_key.objectid, fi_key.offset);
12114 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12115 &dbref_key, &path, 0, 0);
12117 found_dbackref = 1;
12121 btrfs_release_path(&path);
12124 * Neither inlined nor EXTENT_DATA_REF found, try
12125 * SHARED_DATA_REF as last chance.
12127 dbref_key.objectid = disk_bytenr;
12128 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12129 dbref_key.offset = eb->start;
12131 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12132 &dbref_key, &path, 0, 0);
12134 found_dbackref = 1;
12140 if (!found_dbackref)
12141 err |= BACKREF_MISSING;
12142 btrfs_release_path(&path);
12143 if (err & BACKREF_MISSING) {
12144 error("data extent[%llu %llu] backref lost",
12145 disk_bytenr, disk_num_bytes);
12151 * Get real tree block level for the case like shared block
12152 * Return >= 0 as tree level
12153 * Return <0 for error
12155 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12157 struct extent_buffer *eb;
12158 struct btrfs_path path;
12159 struct btrfs_key key;
12160 struct btrfs_extent_item *ei;
12167 /* Search extent tree for extent generation and level */
12168 key.objectid = bytenr;
12169 key.type = BTRFS_METADATA_ITEM_KEY;
12170 key.offset = (u64)-1;
12172 btrfs_init_path(&path);
12173 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12176 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12184 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12185 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12186 struct btrfs_extent_item);
12187 flags = btrfs_extent_flags(path.nodes[0], ei);
12188 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12193 /* Get transid for later read_tree_block() check */
12194 transid = btrfs_extent_generation(path.nodes[0], ei);
12196 /* Get backref level as one source */
12197 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12198 backref_level = key.offset;
12200 struct btrfs_tree_block_info *info;
12202 info = (struct btrfs_tree_block_info *)(ei + 1);
12203 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12205 btrfs_release_path(&path);
12207 /* Get level from tree block as an alternative source */
12208 eb = read_tree_block(fs_info, bytenr, transid);
12209 if (!extent_buffer_uptodate(eb)) {
12210 free_extent_buffer(eb);
12213 header_level = btrfs_header_level(eb);
12214 free_extent_buffer(eb);
12216 if (header_level != backref_level)
12218 return header_level;
12221 btrfs_release_path(&path);
12226 * Check if a tree block backref is valid (points to a valid tree block)
12227 * if level == -1, level will be resolved
12228 * Return >0 for any error found and print error message
12230 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12231 u64 bytenr, int level)
12233 struct btrfs_root *root;
12234 struct btrfs_key key;
12235 struct btrfs_path path;
12236 struct extent_buffer *eb;
12237 struct extent_buffer *node;
12238 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12242 /* Query level for level == -1 special case */
12244 level = query_tree_block_level(fs_info, bytenr);
12246 err |= REFERENCER_MISSING;
12250 key.objectid = root_id;
12251 key.type = BTRFS_ROOT_ITEM_KEY;
12252 key.offset = (u64)-1;
12254 root = btrfs_read_fs_root(fs_info, &key);
12255 if (IS_ERR(root)) {
12256 err |= REFERENCER_MISSING;
12260 /* Read out the tree block to get item/node key */
12261 eb = read_tree_block(fs_info, bytenr, 0);
12262 if (!extent_buffer_uptodate(eb)) {
12263 err |= REFERENCER_MISSING;
12264 free_extent_buffer(eb);
12268 /* Empty tree, no need to check key */
12269 if (!btrfs_header_nritems(eb) && !level) {
12270 free_extent_buffer(eb);
12275 btrfs_node_key_to_cpu(eb, &key, 0);
12277 btrfs_item_key_to_cpu(eb, &key, 0);
12279 free_extent_buffer(eb);
12281 btrfs_init_path(&path);
12282 path.lowest_level = level;
12283 /* Search with the first key, to ensure we can reach it */
12284 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12286 err |= REFERENCER_MISSING;
12290 node = path.nodes[level];
12291 if (btrfs_header_bytenr(node) != bytenr) {
12293 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12294 bytenr, nodesize, bytenr,
12295 btrfs_header_bytenr(node));
12296 err |= REFERENCER_MISMATCH;
12298 if (btrfs_header_level(node) != level) {
12300 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12301 bytenr, nodesize, level,
12302 btrfs_header_level(node));
12303 err |= REFERENCER_MISMATCH;
12307 btrfs_release_path(&path);
12309 if (err & REFERENCER_MISSING) {
12311 error("extent [%llu %d] lost referencer (owner: %llu)",
12312 bytenr, nodesize, root_id);
12315 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12316 bytenr, nodesize, root_id, level);
12323 * Check if tree block @eb is tree reloc root.
12324 * Return 0 if it's not or any problem happens
12325 * Return 1 if it's a tree reloc root
12327 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12328 struct extent_buffer *eb)
12330 struct btrfs_root *tree_reloc_root;
12331 struct btrfs_key key;
12332 u64 bytenr = btrfs_header_bytenr(eb);
12333 u64 owner = btrfs_header_owner(eb);
12336 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12337 key.offset = owner;
12338 key.type = BTRFS_ROOT_ITEM_KEY;
12340 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12341 if (IS_ERR(tree_reloc_root))
12344 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12346 btrfs_free_fs_root(tree_reloc_root);
12351 * Check referencer for shared block backref
12352 * If level == -1, this function will resolve the level.
12354 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12355 u64 parent, u64 bytenr, int level)
12357 struct extent_buffer *eb;
12359 int found_parent = 0;
12362 eb = read_tree_block(fs_info, parent, 0);
12363 if (!extent_buffer_uptodate(eb))
12367 level = query_tree_block_level(fs_info, bytenr);
12371 /* It's possible it's a tree reloc root */
12372 if (parent == bytenr) {
12373 if (is_tree_reloc_root(fs_info, eb))
12378 if (level + 1 != btrfs_header_level(eb))
12381 nr = btrfs_header_nritems(eb);
12382 for (i = 0; i < nr; i++) {
12383 if (bytenr == btrfs_node_blockptr(eb, i)) {
12389 free_extent_buffer(eb);
12390 if (!found_parent) {
12392 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12393 bytenr, fs_info->nodesize, parent, level);
12394 return REFERENCER_MISSING;
12400 * Check referencer for normal (inlined) data ref
12401 * If len == 0, it will be resolved by searching in extent tree
12403 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12404 u64 root_id, u64 objectid, u64 offset,
12405 u64 bytenr, u64 len, u32 count)
12407 struct btrfs_root *root;
12408 struct btrfs_root *extent_root = fs_info->extent_root;
12409 struct btrfs_key key;
12410 struct btrfs_path path;
12411 struct extent_buffer *leaf;
12412 struct btrfs_file_extent_item *fi;
12413 u32 found_count = 0;
12418 key.objectid = bytenr;
12419 key.type = BTRFS_EXTENT_ITEM_KEY;
12420 key.offset = (u64)-1;
12422 btrfs_init_path(&path);
12423 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12426 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12429 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12430 if (key.objectid != bytenr ||
12431 key.type != BTRFS_EXTENT_ITEM_KEY)
12434 btrfs_release_path(&path);
12436 key.objectid = root_id;
12437 key.type = BTRFS_ROOT_ITEM_KEY;
12438 key.offset = (u64)-1;
12439 btrfs_init_path(&path);
12441 root = btrfs_read_fs_root(fs_info, &key);
12445 key.objectid = objectid;
12446 key.type = BTRFS_EXTENT_DATA_KEY;
12448 * It can be nasty as data backref offset is
12449 * file offset - file extent offset, which is smaller or
12450 * equal to original backref offset. The only special case is
12451 * overflow. So we need to special check and do further search.
12453 key.offset = offset & (1ULL << 63) ? 0 : offset;
12455 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12460 * Search afterwards to get correct one
12461 * NOTE: As we must do a comprehensive check on the data backref to
12462 * make sure the dref count also matches, we must iterate all file
12463 * extents for that inode.
12466 leaf = path.nodes[0];
12467 slot = path.slots[0];
12469 if (slot >= btrfs_header_nritems(leaf) ||
12470 btrfs_header_owner(leaf) != root_id)
12472 btrfs_item_key_to_cpu(leaf, &key, slot);
12473 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12475 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12477 * Except normal disk bytenr and disk num bytes, we still
12478 * need to do extra check on dbackref offset as
12479 * dbackref offset = file_offset - file_extent_offset
12481 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12482 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12483 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12488 ret = btrfs_next_item(root, &path);
12493 btrfs_release_path(&path);
12494 if (found_count != count) {
12496 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12497 bytenr, len, root_id, objectid, offset, count, found_count);
12498 return REFERENCER_MISSING;
12504 * Check if the referencer of a shared data backref exists
12506 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12507 u64 parent, u64 bytenr)
12509 struct extent_buffer *eb;
12510 struct btrfs_key key;
12511 struct btrfs_file_extent_item *fi;
12513 int found_parent = 0;
12516 eb = read_tree_block(fs_info, parent, 0);
12517 if (!extent_buffer_uptodate(eb))
12520 nr = btrfs_header_nritems(eb);
12521 for (i = 0; i < nr; i++) {
12522 btrfs_item_key_to_cpu(eb, &key, i);
12523 if (key.type != BTRFS_EXTENT_DATA_KEY)
12526 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12527 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12530 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12537 free_extent_buffer(eb);
12538 if (!found_parent) {
12539 error("shared extent %llu referencer lost (parent: %llu)",
12541 return REFERENCER_MISSING;
12547 * Only delete backref if REFERENCER_MISSING now
12549 * Returns <0 the extent was deleted
12550 * Returns >0 the backref was deleted but extent still exists, returned value
12551 * means error after repair
12552 * Returns 0 nothing happened
12554 static int repair_extent_item(struct btrfs_trans_handle *trans,
12555 struct btrfs_root *root, struct btrfs_path *path,
12556 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12557 u64 owner, u64 offset, int err)
12559 struct btrfs_key old_key;
12563 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12565 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12566 /* delete the backref */
12567 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12568 num_bytes, parent, root_objectid, owner, offset);
12571 err &= ~REFERENCER_MISSING;
12572 printf("Delete backref in extent [%llu %llu]\n",
12573 bytenr, num_bytes);
12575 error("fail to delete backref in extent [%llu %llu]",
12576 bytenr, num_bytes);
12580 /* btrfs_free_extent may delete the extent */
12581 btrfs_release_path(path);
12582 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12592 * This function will check a given extent item, including its backref and
12593 * itself (like crossing stripe boundary and type)
12595 * Since we don't use extent_record anymore, introduce new error bit
12597 static int check_extent_item(struct btrfs_trans_handle *trans,
12598 struct btrfs_fs_info *fs_info,
12599 struct btrfs_path *path)
12601 struct btrfs_extent_item *ei;
12602 struct btrfs_extent_inline_ref *iref;
12603 struct btrfs_extent_data_ref *dref;
12604 struct extent_buffer *eb = path->nodes[0];
12607 int slot = path->slots[0];
12609 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12610 u32 item_size = btrfs_item_size_nr(eb, slot);
12620 struct btrfs_key key;
12624 btrfs_item_key_to_cpu(eb, &key, slot);
12625 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12626 bytes_used += key.offset;
12627 num_bytes = key.offset;
12629 bytes_used += nodesize;
12630 num_bytes = nodesize;
12633 if (item_size < sizeof(*ei)) {
12635 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12636 * old thing when on disk format is still un-determined.
12637 * No need to care about it anymore
12639 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12643 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12644 flags = btrfs_extent_flags(eb, ei);
12646 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12648 if (metadata && check_crossing_stripes(global_info, key.objectid,
12650 error("bad metadata [%llu, %llu) crossing stripe boundary",
12651 key.objectid, key.objectid + nodesize);
12652 err |= CROSSING_STRIPE_BOUNDARY;
12655 ptr = (unsigned long)(ei + 1);
12657 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12658 /* Old EXTENT_ITEM metadata */
12659 struct btrfs_tree_block_info *info;
12661 info = (struct btrfs_tree_block_info *)ptr;
12662 level = btrfs_tree_block_level(eb, info);
12663 ptr += sizeof(struct btrfs_tree_block_info);
12665 /* New METADATA_ITEM */
12666 level = key.offset;
12668 end = (unsigned long)ei + item_size;
12671 /* Reached extent item end normally */
12675 /* Beyond extent item end, wrong item size */
12677 err |= ITEM_SIZE_MISMATCH;
12678 error("extent item at bytenr %llu slot %d has wrong size",
12687 /* Now check every backref in this extent item */
12688 iref = (struct btrfs_extent_inline_ref *)ptr;
12689 type = btrfs_extent_inline_ref_type(eb, iref);
12690 offset = btrfs_extent_inline_ref_offset(eb, iref);
12692 case BTRFS_TREE_BLOCK_REF_KEY:
12693 root_objectid = offset;
12695 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12699 case BTRFS_SHARED_BLOCK_REF_KEY:
12701 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12705 case BTRFS_EXTENT_DATA_REF_KEY:
12706 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12707 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12708 owner = btrfs_extent_data_ref_objectid(eb, dref);
12709 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12710 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12711 owner_offset, key.objectid, key.offset,
12712 btrfs_extent_data_ref_count(eb, dref));
12715 case BTRFS_SHARED_DATA_REF_KEY:
12717 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12721 error("extent[%llu %d %llu] has unknown ref type: %d",
12722 key.objectid, key.type, key.offset, type);
12723 ret = UNKNOWN_TYPE;
12728 if (err && repair) {
12729 ret = repair_extent_item(trans, fs_info->extent_root, path,
12730 key.objectid, num_bytes, parent, root_objectid,
12731 owner, owner_offset, ret);
12740 ptr += btrfs_extent_inline_ref_size(type);
12748 * Check if a dev extent item is referred correctly by its chunk
12750 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12751 struct extent_buffer *eb, int slot)
12753 struct btrfs_root *chunk_root = fs_info->chunk_root;
12754 struct btrfs_dev_extent *ptr;
12755 struct btrfs_path path;
12756 struct btrfs_key chunk_key;
12757 struct btrfs_key devext_key;
12758 struct btrfs_chunk *chunk;
12759 struct extent_buffer *l;
12763 int found_chunk = 0;
12766 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12767 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12768 length = btrfs_dev_extent_length(eb, ptr);
12770 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12771 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12772 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12774 btrfs_init_path(&path);
12775 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12780 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12781 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12786 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12789 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12790 for (i = 0; i < num_stripes; i++) {
12791 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12792 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12794 if (devid == devext_key.objectid &&
12795 offset == devext_key.offset) {
12801 btrfs_release_path(&path);
12802 if (!found_chunk) {
12804 "device extent[%llu, %llu, %llu] did not find the related chunk",
12805 devext_key.objectid, devext_key.offset, length);
12806 return REFERENCER_MISSING;
12812 * Check if the used space is correct with the dev item
12814 static int check_dev_item(struct btrfs_fs_info *fs_info,
12815 struct extent_buffer *eb, int slot)
12817 struct btrfs_root *dev_root = fs_info->dev_root;
12818 struct btrfs_dev_item *dev_item;
12819 struct btrfs_path path;
12820 struct btrfs_key key;
12821 struct btrfs_dev_extent *ptr;
12828 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12829 dev_id = btrfs_device_id(eb, dev_item);
12830 used = btrfs_device_bytes_used(eb, dev_item);
12831 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12833 key.objectid = dev_id;
12834 key.type = BTRFS_DEV_EXTENT_KEY;
12837 btrfs_init_path(&path);
12838 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12840 btrfs_item_key_to_cpu(eb, &key, slot);
12841 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12842 key.objectid, key.type, key.offset);
12843 btrfs_release_path(&path);
12844 return REFERENCER_MISSING;
12847 /* Iterate dev_extents to calculate the used space of a device */
12849 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12852 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12853 if (key.objectid > dev_id)
12855 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12858 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12859 struct btrfs_dev_extent);
12860 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12862 ret = btrfs_next_item(dev_root, &path);
12866 btrfs_release_path(&path);
12868 if (used != total) {
12869 btrfs_item_key_to_cpu(eb, &key, slot);
12871 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12872 total, used, BTRFS_ROOT_TREE_OBJECTID,
12873 BTRFS_DEV_EXTENT_KEY, dev_id);
12874 return ACCOUNTING_MISMATCH;
12876 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12882 * Check a block group item with its referener (chunk) and its used space
12883 * with extent/metadata item
12885 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12886 struct extent_buffer *eb, int slot)
12888 struct btrfs_root *extent_root = fs_info->extent_root;
12889 struct btrfs_root *chunk_root = fs_info->chunk_root;
12890 struct btrfs_block_group_item *bi;
12891 struct btrfs_block_group_item bg_item;
12892 struct btrfs_path path;
12893 struct btrfs_key bg_key;
12894 struct btrfs_key chunk_key;
12895 struct btrfs_key extent_key;
12896 struct btrfs_chunk *chunk;
12897 struct extent_buffer *leaf;
12898 struct btrfs_extent_item *ei;
12899 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12907 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12908 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12909 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12910 used = btrfs_block_group_used(&bg_item);
12911 bg_flags = btrfs_block_group_flags(&bg_item);
12913 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12914 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12915 chunk_key.offset = bg_key.objectid;
12917 btrfs_init_path(&path);
12918 /* Search for the referencer chunk */
12919 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12922 "block group[%llu %llu] did not find the related chunk item",
12923 bg_key.objectid, bg_key.offset);
12924 err |= REFERENCER_MISSING;
12926 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12927 struct btrfs_chunk);
12928 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12931 "block group[%llu %llu] related chunk item length does not match",
12932 bg_key.objectid, bg_key.offset);
12933 err |= REFERENCER_MISMATCH;
12936 btrfs_release_path(&path);
12938 /* Search from the block group bytenr */
12939 extent_key.objectid = bg_key.objectid;
12940 extent_key.type = 0;
12941 extent_key.offset = 0;
12943 btrfs_init_path(&path);
12944 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12948 /* Iterate extent tree to account used space */
12950 leaf = path.nodes[0];
12952 /* Search slot can point to the last item beyond leaf nritems */
12953 if (path.slots[0] >= btrfs_header_nritems(leaf))
12956 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12957 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12960 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12961 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12963 if (extent_key.objectid < bg_key.objectid)
12966 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12969 total += extent_key.offset;
12971 ei = btrfs_item_ptr(leaf, path.slots[0],
12972 struct btrfs_extent_item);
12973 flags = btrfs_extent_flags(leaf, ei);
12974 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12975 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12977 "bad extent[%llu, %llu) type mismatch with chunk",
12978 extent_key.objectid,
12979 extent_key.objectid + extent_key.offset);
12980 err |= CHUNK_TYPE_MISMATCH;
12982 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12983 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12984 BTRFS_BLOCK_GROUP_METADATA))) {
12986 "bad extent[%llu, %llu) type mismatch with chunk",
12987 extent_key.objectid,
12988 extent_key.objectid + nodesize);
12989 err |= CHUNK_TYPE_MISMATCH;
12993 ret = btrfs_next_item(extent_root, &path);
12999 btrfs_release_path(&path);
13001 if (total != used) {
13003 "block group[%llu %llu] used %llu but extent items used %llu",
13004 bg_key.objectid, bg_key.offset, used, total);
13005 err |= BG_ACCOUNTING_ERROR;
13011 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13012 * FIXME: We still need to repair error of dev_item.
13014 * Returns error after repair.
13016 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13017 struct btrfs_root *chunk_root,
13018 struct btrfs_path *path, int err)
13020 struct btrfs_chunk *chunk;
13021 struct btrfs_key chunk_key;
13022 struct extent_buffer *eb = path->nodes[0];
13024 int slot = path->slots[0];
13028 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13029 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13031 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13032 type = btrfs_chunk_type(path->nodes[0], chunk);
13033 length = btrfs_chunk_length(eb, chunk);
13035 if (err & REFERENCER_MISSING) {
13036 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13037 type, chunk_key.objectid, chunk_key.offset, length);
13039 error("fail to add block group item[%llu %llu]",
13040 chunk_key.offset, length);
13043 err &= ~REFERENCER_MISSING;
13044 printf("Added block group item[%llu %llu]\n",
13045 chunk_key.offset, length);
13054 * Check a chunk item.
13055 * Including checking all referred dev_extents and block group
13057 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13058 struct extent_buffer *eb, int slot)
13060 struct btrfs_root *extent_root = fs_info->extent_root;
13061 struct btrfs_root *dev_root = fs_info->dev_root;
13062 struct btrfs_path path;
13063 struct btrfs_key chunk_key;
13064 struct btrfs_key bg_key;
13065 struct btrfs_key devext_key;
13066 struct btrfs_chunk *chunk;
13067 struct extent_buffer *leaf;
13068 struct btrfs_block_group_item *bi;
13069 struct btrfs_block_group_item bg_item;
13070 struct btrfs_dev_extent *ptr;
13082 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13083 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13084 length = btrfs_chunk_length(eb, chunk);
13085 chunk_end = chunk_key.offset + length;
13086 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13089 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13091 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13094 type = btrfs_chunk_type(eb, chunk);
13096 bg_key.objectid = chunk_key.offset;
13097 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13098 bg_key.offset = length;
13100 btrfs_init_path(&path);
13101 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13104 "chunk[%llu %llu) did not find the related block group item",
13105 chunk_key.offset, chunk_end);
13106 err |= REFERENCER_MISSING;
13108 leaf = path.nodes[0];
13109 bi = btrfs_item_ptr(leaf, path.slots[0],
13110 struct btrfs_block_group_item);
13111 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13113 if (btrfs_block_group_flags(&bg_item) != type) {
13115 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13116 chunk_key.offset, chunk_end, type,
13117 btrfs_block_group_flags(&bg_item));
13118 err |= REFERENCER_MISSING;
13122 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13123 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13124 for (i = 0; i < num_stripes; i++) {
13125 btrfs_release_path(&path);
13126 btrfs_init_path(&path);
13127 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13128 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13129 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13131 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13134 goto not_match_dev;
13136 leaf = path.nodes[0];
13137 ptr = btrfs_item_ptr(leaf, path.slots[0],
13138 struct btrfs_dev_extent);
13139 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13140 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13141 if (objectid != chunk_key.objectid ||
13142 offset != chunk_key.offset ||
13143 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13144 goto not_match_dev;
13147 err |= BACKREF_MISSING;
13149 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13150 chunk_key.objectid, chunk_end, i);
13153 btrfs_release_path(&path);
13158 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13159 struct btrfs_root *root,
13160 struct btrfs_path *path)
13162 struct btrfs_key key;
13165 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13166 btrfs_release_path(path);
13167 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13173 ret = btrfs_del_item(trans, root, path);
13177 if (path->slots[0] == 0)
13178 btrfs_prev_leaf(root, path);
13183 error("failed to delete root %llu item[%llu, %u, %llu]",
13184 root->objectid, key.objectid, key.type, key.offset);
13186 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13187 root->objectid, key.objectid, key.type, key.offset);
13192 * Main entry function to check known items and update related accounting info
13194 static int check_leaf_items(struct btrfs_trans_handle *trans,
13195 struct btrfs_root *root, struct btrfs_path *path,
13196 struct node_refs *nrefs, int account_bytes)
13198 struct btrfs_fs_info *fs_info = root->fs_info;
13199 struct btrfs_key key;
13200 struct extent_buffer *eb;
13203 struct btrfs_extent_data_ref *dref;
13208 eb = path->nodes[0];
13209 slot = path->slots[0];
13210 if (slot >= btrfs_header_nritems(eb)) {
13212 error("empty leaf [%llu %u] root %llu", eb->start,
13213 root->fs_info->nodesize, root->objectid);
13219 btrfs_item_key_to_cpu(eb, &key, slot);
13223 case BTRFS_EXTENT_DATA_KEY:
13224 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13226 ret = repair_extent_data_item(trans, root, path, nrefs,
13230 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13231 ret = check_block_group_item(fs_info, eb, slot);
13233 ret & REFERENCER_MISSING)
13234 ret = delete_extent_tree_item(trans, root, path);
13237 case BTRFS_DEV_ITEM_KEY:
13238 ret = check_dev_item(fs_info, eb, slot);
13241 case BTRFS_CHUNK_ITEM_KEY:
13242 ret = check_chunk_item(fs_info, eb, slot);
13244 ret = repair_chunk_item(trans, root, path, ret);
13247 case BTRFS_DEV_EXTENT_KEY:
13248 ret = check_dev_extent_item(fs_info, eb, slot);
13251 case BTRFS_EXTENT_ITEM_KEY:
13252 case BTRFS_METADATA_ITEM_KEY:
13253 ret = check_extent_item(trans, fs_info, path);
13256 case BTRFS_EXTENT_CSUM_KEY:
13257 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13260 case BTRFS_TREE_BLOCK_REF_KEY:
13261 ret = check_tree_block_backref(fs_info, key.offset,
13264 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13265 ret = delete_extent_tree_item(trans, root, path);
13268 case BTRFS_EXTENT_DATA_REF_KEY:
13269 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13270 ret = check_extent_data_backref(fs_info,
13271 btrfs_extent_data_ref_root(eb, dref),
13272 btrfs_extent_data_ref_objectid(eb, dref),
13273 btrfs_extent_data_ref_offset(eb, dref),
13275 btrfs_extent_data_ref_count(eb, dref));
13277 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13278 ret = delete_extent_tree_item(trans, root, path);
13281 case BTRFS_SHARED_BLOCK_REF_KEY:
13282 ret = check_shared_block_backref(fs_info, key.offset,
13285 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13286 ret = delete_extent_tree_item(trans, root, path);
13289 case BTRFS_SHARED_DATA_REF_KEY:
13290 ret = check_shared_data_backref(fs_info, key.offset,
13293 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13294 ret = delete_extent_tree_item(trans, root, path);
13308 * Low memory usage version check_chunks_and_extents.
13310 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13312 struct btrfs_trans_handle *trans = NULL;
13313 struct btrfs_path path;
13314 struct btrfs_key old_key;
13315 struct btrfs_key key;
13316 struct btrfs_root *root1;
13317 struct btrfs_root *root;
13318 struct btrfs_root *cur_root;
13322 root = fs_info->fs_root;
13325 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13326 if (IS_ERR(trans)) {
13327 error("failed to start transaction before check");
13328 return PTR_ERR(trans);
13332 root1 = root->fs_info->chunk_root;
13333 ret = check_btrfs_root(trans, root1, 0, 1);
13336 root1 = root->fs_info->tree_root;
13337 ret = check_btrfs_root(trans, root1, 0, 1);
13340 btrfs_init_path(&path);
13341 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13343 key.type = BTRFS_ROOT_ITEM_KEY;
13345 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13347 error("cannot find extent tree in tree_root");
13352 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13353 if (key.type != BTRFS_ROOT_ITEM_KEY)
13356 key.offset = (u64)-1;
13358 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13359 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13362 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13363 if (IS_ERR(cur_root) || !cur_root) {
13364 error("failed to read tree: %lld", key.objectid);
13368 ret = check_btrfs_root(trans, cur_root, 0, 1);
13371 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13372 btrfs_free_fs_root(cur_root);
13374 btrfs_release_path(&path);
13375 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13376 &old_key, &path, 0, 0);
13380 ret = btrfs_next_item(root1, &path);
13386 /* if repair, update block accounting */
13388 ret = btrfs_fix_block_accounting(trans, root);
13392 err &= ~BG_ACCOUNTING_ERROR;
13396 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13398 btrfs_release_path(&path);
13403 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13407 if (!ctx.progress_enabled)
13408 fprintf(stderr, "checking extents\n");
13409 if (check_mode == CHECK_MODE_LOWMEM)
13410 ret = check_chunks_and_extents_v2(fs_info);
13412 ret = check_chunks_and_extents(fs_info);
13414 /* Also repair device size related problems */
13415 if (repair && !ret) {
13416 ret = btrfs_fix_device_and_super_size(fs_info);
13423 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13424 struct btrfs_root *root, int overwrite)
13426 struct extent_buffer *c;
13427 struct extent_buffer *old = root->node;
13430 struct btrfs_disk_key disk_key = {0,0,0};
13436 extent_buffer_get(c);
13439 c = btrfs_alloc_free_block(trans, root,
13440 root->fs_info->nodesize,
13441 root->root_key.objectid,
13442 &disk_key, level, 0, 0);
13445 extent_buffer_get(c);
13449 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13450 btrfs_set_header_level(c, level);
13451 btrfs_set_header_bytenr(c, c->start);
13452 btrfs_set_header_generation(c, trans->transid);
13453 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13454 btrfs_set_header_owner(c, root->root_key.objectid);
13456 write_extent_buffer(c, root->fs_info->fsid,
13457 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13459 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13460 btrfs_header_chunk_tree_uuid(c),
13463 btrfs_mark_buffer_dirty(c);
13465 * this case can happen in the following case:
13467 * 1.overwrite previous root.
13469 * 2.reinit reloc data root, this is because we skip pin
13470 * down reloc data tree before which means we can allocate
13471 * same block bytenr here.
13473 if (old->start == c->start) {
13474 btrfs_set_root_generation(&root->root_item,
13476 root->root_item.level = btrfs_header_level(root->node);
13477 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13478 &root->root_key, &root->root_item);
13480 free_extent_buffer(c);
13484 free_extent_buffer(old);
13486 add_root_to_dirty_list(root);
13490 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13491 struct extent_buffer *eb, int tree_root)
13493 struct extent_buffer *tmp;
13494 struct btrfs_root_item *ri;
13495 struct btrfs_key key;
13497 int level = btrfs_header_level(eb);
13503 * If we have pinned this block before, don't pin it again.
13504 * This can not only avoid forever loop with broken filesystem
13505 * but also give us some speedups.
13507 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13508 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13511 btrfs_pin_extent(fs_info, eb->start, eb->len);
13513 nritems = btrfs_header_nritems(eb);
13514 for (i = 0; i < nritems; i++) {
13516 btrfs_item_key_to_cpu(eb, &key, i);
13517 if (key.type != BTRFS_ROOT_ITEM_KEY)
13519 /* Skip the extent root and reloc roots */
13520 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13521 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13522 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13524 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13525 bytenr = btrfs_disk_root_bytenr(eb, ri);
13528 * If at any point we start needing the real root we
13529 * will have to build a stump root for the root we are
13530 * in, but for now this doesn't actually use the root so
13531 * just pass in extent_root.
13533 tmp = read_tree_block(fs_info, bytenr, 0);
13534 if (!extent_buffer_uptodate(tmp)) {
13535 fprintf(stderr, "Error reading root block\n");
13538 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13539 free_extent_buffer(tmp);
13543 bytenr = btrfs_node_blockptr(eb, i);
13545 /* If we aren't the tree root don't read the block */
13546 if (level == 1 && !tree_root) {
13547 btrfs_pin_extent(fs_info, bytenr,
13548 fs_info->nodesize);
13552 tmp = read_tree_block(fs_info, bytenr, 0);
13553 if (!extent_buffer_uptodate(tmp)) {
13554 fprintf(stderr, "Error reading tree block\n");
13557 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13558 free_extent_buffer(tmp);
13567 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13571 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13575 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13578 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13580 struct btrfs_block_group_cache *cache;
13581 struct btrfs_path path;
13582 struct extent_buffer *leaf;
13583 struct btrfs_chunk *chunk;
13584 struct btrfs_key key;
13588 btrfs_init_path(&path);
13590 key.type = BTRFS_CHUNK_ITEM_KEY;
13592 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13594 btrfs_release_path(&path);
13599 * We do this in case the block groups were screwed up and had alloc
13600 * bits that aren't actually set on the chunks. This happens with
13601 * restored images every time and could happen in real life I guess.
13603 fs_info->avail_data_alloc_bits = 0;
13604 fs_info->avail_metadata_alloc_bits = 0;
13605 fs_info->avail_system_alloc_bits = 0;
13607 /* First we need to create the in-memory block groups */
13609 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13610 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13612 btrfs_release_path(&path);
13620 leaf = path.nodes[0];
13621 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13622 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13627 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13628 btrfs_add_block_group(fs_info, 0,
13629 btrfs_chunk_type(leaf, chunk),
13630 key.objectid, key.offset,
13631 btrfs_chunk_length(leaf, chunk));
13632 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13633 key.offset + btrfs_chunk_length(leaf, chunk));
13638 cache = btrfs_lookup_first_block_group(fs_info, start);
13642 start = cache->key.objectid + cache->key.offset;
13645 btrfs_release_path(&path);
13649 static int reset_balance(struct btrfs_trans_handle *trans,
13650 struct btrfs_fs_info *fs_info)
13652 struct btrfs_root *root = fs_info->tree_root;
13653 struct btrfs_path path;
13654 struct extent_buffer *leaf;
13655 struct btrfs_key key;
13656 int del_slot, del_nr = 0;
13660 btrfs_init_path(&path);
13661 key.objectid = BTRFS_BALANCE_OBJECTID;
13662 key.type = BTRFS_BALANCE_ITEM_KEY;
13664 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13669 goto reinit_data_reloc;
13674 ret = btrfs_del_item(trans, root, &path);
13677 btrfs_release_path(&path);
13679 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13680 key.type = BTRFS_ROOT_ITEM_KEY;
13682 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13686 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13691 ret = btrfs_del_items(trans, root, &path,
13698 btrfs_release_path(&path);
13701 ret = btrfs_search_slot(trans, root, &key, &path,
13708 leaf = path.nodes[0];
13709 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13710 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13712 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13717 del_slot = path.slots[0];
13726 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13730 btrfs_release_path(&path);
13733 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13734 key.type = BTRFS_ROOT_ITEM_KEY;
13735 key.offset = (u64)-1;
13736 root = btrfs_read_fs_root(fs_info, &key);
13737 if (IS_ERR(root)) {
13738 fprintf(stderr, "Error reading data reloc tree\n");
13739 ret = PTR_ERR(root);
13742 record_root_in_trans(trans, root);
13743 ret = btrfs_fsck_reinit_root(trans, root, 0);
13746 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13748 btrfs_release_path(&path);
13752 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13753 struct btrfs_fs_info *fs_info)
13759 * The only reason we don't do this is because right now we're just
13760 * walking the trees we find and pinning down their bytes, we don't look
13761 * at any of the leaves. In order to do mixed groups we'd have to check
13762 * the leaves of any fs roots and pin down the bytes for any file
13763 * extents we find. Not hard but why do it if we don't have to?
13765 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13766 fprintf(stderr, "We don't support re-initing the extent tree "
13767 "for mixed block groups yet, please notify a btrfs "
13768 "developer you want to do this so they can add this "
13769 "functionality.\n");
13774 * first we need to walk all of the trees except the extent tree and pin
13775 * down the bytes that are in use so we don't overwrite any existing
13778 ret = pin_metadata_blocks(fs_info);
13780 fprintf(stderr, "error pinning down used bytes\n");
13785 * Need to drop all the block groups since we're going to recreate all
13788 btrfs_free_block_groups(fs_info);
13789 ret = reset_block_groups(fs_info);
13791 fprintf(stderr, "error resetting the block groups\n");
13795 /* Ok we can allocate now, reinit the extent root */
13796 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13798 fprintf(stderr, "extent root initialization failed\n");
13800 * When the transaction code is updated we should end the
13801 * transaction, but for now progs only knows about commit so
13802 * just return an error.
13808 * Now we have all the in-memory block groups setup so we can make
13809 * allocations properly, and the metadata we care about is safe since we
13810 * pinned all of it above.
13813 struct btrfs_block_group_cache *cache;
13815 cache = btrfs_lookup_first_block_group(fs_info, start);
13818 start = cache->key.objectid + cache->key.offset;
13819 ret = btrfs_insert_item(trans, fs_info->extent_root,
13820 &cache->key, &cache->item,
13821 sizeof(cache->item));
13823 fprintf(stderr, "Error adding block group\n");
13826 btrfs_extent_post_op(trans, fs_info->extent_root);
13829 ret = reset_balance(trans, fs_info);
13831 fprintf(stderr, "error resetting the pending balance\n");
13836 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13838 struct btrfs_path path;
13839 struct btrfs_trans_handle *trans;
13840 struct btrfs_key key;
13843 printf("Recowing metadata block %llu\n", eb->start);
13844 key.objectid = btrfs_header_owner(eb);
13845 key.type = BTRFS_ROOT_ITEM_KEY;
13846 key.offset = (u64)-1;
13848 root = btrfs_read_fs_root(root->fs_info, &key);
13849 if (IS_ERR(root)) {
13850 fprintf(stderr, "Couldn't find owner root %llu\n",
13852 return PTR_ERR(root);
13855 trans = btrfs_start_transaction(root, 1);
13857 return PTR_ERR(trans);
13859 btrfs_init_path(&path);
13860 path.lowest_level = btrfs_header_level(eb);
13861 if (path.lowest_level)
13862 btrfs_node_key_to_cpu(eb, &key, 0);
13864 btrfs_item_key_to_cpu(eb, &key, 0);
13866 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13867 btrfs_commit_transaction(trans, root);
13868 btrfs_release_path(&path);
13872 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13874 struct btrfs_path path;
13875 struct btrfs_trans_handle *trans;
13876 struct btrfs_key key;
13879 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13880 bad->key.type, bad->key.offset);
13881 key.objectid = bad->root_id;
13882 key.type = BTRFS_ROOT_ITEM_KEY;
13883 key.offset = (u64)-1;
13885 root = btrfs_read_fs_root(root->fs_info, &key);
13886 if (IS_ERR(root)) {
13887 fprintf(stderr, "Couldn't find owner root %llu\n",
13889 return PTR_ERR(root);
13892 trans = btrfs_start_transaction(root, 1);
13894 return PTR_ERR(trans);
13896 btrfs_init_path(&path);
13897 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13903 ret = btrfs_del_item(trans, root, &path);
13905 btrfs_commit_transaction(trans, root);
13906 btrfs_release_path(&path);
13910 static int zero_log_tree(struct btrfs_root *root)
13912 struct btrfs_trans_handle *trans;
13915 trans = btrfs_start_transaction(root, 1);
13916 if (IS_ERR(trans)) {
13917 ret = PTR_ERR(trans);
13920 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13921 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13922 ret = btrfs_commit_transaction(trans, root);
13926 static int populate_csum(struct btrfs_trans_handle *trans,
13927 struct btrfs_root *csum_root, char *buf, u64 start,
13930 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13935 while (offset < len) {
13936 sectorsize = fs_info->sectorsize;
13937 ret = read_extent_data(fs_info, buf, start + offset,
13941 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13942 start + offset, buf, sectorsize);
13945 offset += sectorsize;
13950 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13951 struct btrfs_root *csum_root,
13952 struct btrfs_root *cur_root)
13954 struct btrfs_path path;
13955 struct btrfs_key key;
13956 struct extent_buffer *node;
13957 struct btrfs_file_extent_item *fi;
13964 buf = malloc(cur_root->fs_info->sectorsize);
13968 btrfs_init_path(&path);
13972 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13975 /* Iterate all regular file extents and fill its csum */
13977 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13979 if (key.type != BTRFS_EXTENT_DATA_KEY)
13981 node = path.nodes[0];
13982 slot = path.slots[0];
13983 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13984 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13986 start = btrfs_file_extent_disk_bytenr(node, fi);
13987 len = btrfs_file_extent_disk_num_bytes(node, fi);
13989 ret = populate_csum(trans, csum_root, buf, start, len);
13990 if (ret == -EEXIST)
13996 * TODO: if next leaf is corrupted, jump to nearest next valid
13999 ret = btrfs_next_item(cur_root, &path);
14009 btrfs_release_path(&path);
14014 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14015 struct btrfs_root *csum_root)
14017 struct btrfs_fs_info *fs_info = csum_root->fs_info;
14018 struct btrfs_path path;
14019 struct btrfs_root *tree_root = fs_info->tree_root;
14020 struct btrfs_root *cur_root;
14021 struct extent_buffer *node;
14022 struct btrfs_key key;
14026 btrfs_init_path(&path);
14027 key.objectid = BTRFS_FS_TREE_OBJECTID;
14029 key.type = BTRFS_ROOT_ITEM_KEY;
14030 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14039 node = path.nodes[0];
14040 slot = path.slots[0];
14041 btrfs_item_key_to_cpu(node, &key, slot);
14042 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14044 if (key.type != BTRFS_ROOT_ITEM_KEY)
14046 if (!is_fstree(key.objectid))
14048 key.offset = (u64)-1;
14050 cur_root = btrfs_read_fs_root(fs_info, &key);
14051 if (IS_ERR(cur_root) || !cur_root) {
14052 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14056 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14061 ret = btrfs_next_item(tree_root, &path);
14071 btrfs_release_path(&path);
14075 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14076 struct btrfs_root *csum_root)
14078 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14079 struct btrfs_path path;
14080 struct btrfs_extent_item *ei;
14081 struct extent_buffer *leaf;
14083 struct btrfs_key key;
14086 btrfs_init_path(&path);
14088 key.type = BTRFS_EXTENT_ITEM_KEY;
14090 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14092 btrfs_release_path(&path);
14096 buf = malloc(csum_root->fs_info->sectorsize);
14098 btrfs_release_path(&path);
14103 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14104 ret = btrfs_next_leaf(extent_root, &path);
14112 leaf = path.nodes[0];
14114 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14115 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14120 ei = btrfs_item_ptr(leaf, path.slots[0],
14121 struct btrfs_extent_item);
14122 if (!(btrfs_extent_flags(leaf, ei) &
14123 BTRFS_EXTENT_FLAG_DATA)) {
14128 ret = populate_csum(trans, csum_root, buf, key.objectid,
14135 btrfs_release_path(&path);
14141 * Recalculate the csum and put it into the csum tree.
14143 * Extent tree init will wipe out all the extent info, so in that case, we
14144 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14145 * will use fs/subvol trees to init the csum tree.
14147 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14148 struct btrfs_root *csum_root,
14149 int search_fs_tree)
14151 if (search_fs_tree)
14152 return fill_csum_tree_from_fs(trans, csum_root);
14154 return fill_csum_tree_from_extent(trans, csum_root);
14157 static void free_roots_info_cache(void)
14159 if (!roots_info_cache)
14162 while (!cache_tree_empty(roots_info_cache)) {
14163 struct cache_extent *entry;
14164 struct root_item_info *rii;
14166 entry = first_cache_extent(roots_info_cache);
14169 remove_cache_extent(roots_info_cache, entry);
14170 rii = container_of(entry, struct root_item_info, cache_extent);
14174 free(roots_info_cache);
14175 roots_info_cache = NULL;
14178 static int build_roots_info_cache(struct btrfs_fs_info *info)
14181 struct btrfs_key key;
14182 struct extent_buffer *leaf;
14183 struct btrfs_path path;
14185 if (!roots_info_cache) {
14186 roots_info_cache = malloc(sizeof(*roots_info_cache));
14187 if (!roots_info_cache)
14189 cache_tree_init(roots_info_cache);
14192 btrfs_init_path(&path);
14194 key.type = BTRFS_EXTENT_ITEM_KEY;
14196 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14199 leaf = path.nodes[0];
14202 struct btrfs_key found_key;
14203 struct btrfs_extent_item *ei;
14204 struct btrfs_extent_inline_ref *iref;
14205 int slot = path.slots[0];
14210 struct cache_extent *entry;
14211 struct root_item_info *rii;
14213 if (slot >= btrfs_header_nritems(leaf)) {
14214 ret = btrfs_next_leaf(info->extent_root, &path);
14221 leaf = path.nodes[0];
14222 slot = path.slots[0];
14225 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14227 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14228 found_key.type != BTRFS_METADATA_ITEM_KEY)
14231 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14232 flags = btrfs_extent_flags(leaf, ei);
14234 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14235 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14238 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14239 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14240 level = found_key.offset;
14242 struct btrfs_tree_block_info *binfo;
14244 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14245 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14246 level = btrfs_tree_block_level(leaf, binfo);
14250 * For a root extent, it must be of the following type and the
14251 * first (and only one) iref in the item.
14253 type = btrfs_extent_inline_ref_type(leaf, iref);
14254 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14257 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14258 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14260 rii = malloc(sizeof(struct root_item_info));
14265 rii->cache_extent.start = root_id;
14266 rii->cache_extent.size = 1;
14267 rii->level = (u8)-1;
14268 entry = &rii->cache_extent;
14269 ret = insert_cache_extent(roots_info_cache, entry);
14272 rii = container_of(entry, struct root_item_info,
14276 ASSERT(rii->cache_extent.start == root_id);
14277 ASSERT(rii->cache_extent.size == 1);
14279 if (level > rii->level || rii->level == (u8)-1) {
14280 rii->level = level;
14281 rii->bytenr = found_key.objectid;
14282 rii->gen = btrfs_extent_generation(leaf, ei);
14283 rii->node_count = 1;
14284 } else if (level == rii->level) {
14292 btrfs_release_path(&path);
14297 static int maybe_repair_root_item(struct btrfs_path *path,
14298 const struct btrfs_key *root_key,
14299 const int read_only_mode)
14301 const u64 root_id = root_key->objectid;
14302 struct cache_extent *entry;
14303 struct root_item_info *rii;
14304 struct btrfs_root_item ri;
14305 unsigned long offset;
14307 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14310 "Error: could not find extent items for root %llu\n",
14311 root_key->objectid);
14315 rii = container_of(entry, struct root_item_info, cache_extent);
14316 ASSERT(rii->cache_extent.start == root_id);
14317 ASSERT(rii->cache_extent.size == 1);
14319 if (rii->node_count != 1) {
14321 "Error: could not find btree root extent for root %llu\n",
14326 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14327 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14329 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14330 btrfs_root_level(&ri) != rii->level ||
14331 btrfs_root_generation(&ri) != rii->gen) {
14334 * If we're in repair mode but our caller told us to not update
14335 * the root item, i.e. just check if it needs to be updated, don't
14336 * print this message, since the caller will call us again shortly
14337 * for the same root item without read only mode (the caller will
14338 * open a transaction first).
14340 if (!(read_only_mode && repair))
14342 "%sroot item for root %llu,"
14343 " current bytenr %llu, current gen %llu, current level %u,"
14344 " new bytenr %llu, new gen %llu, new level %u\n",
14345 (read_only_mode ? "" : "fixing "),
14347 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14348 btrfs_root_level(&ri),
14349 rii->bytenr, rii->gen, rii->level);
14351 if (btrfs_root_generation(&ri) > rii->gen) {
14353 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14354 root_id, btrfs_root_generation(&ri), rii->gen);
14358 if (!read_only_mode) {
14359 btrfs_set_root_bytenr(&ri, rii->bytenr);
14360 btrfs_set_root_level(&ri, rii->level);
14361 btrfs_set_root_generation(&ri, rii->gen);
14362 write_extent_buffer(path->nodes[0], &ri,
14363 offset, sizeof(ri));
14373 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14374 * caused read-only snapshots to be corrupted if they were created at a moment
14375 * when the source subvolume/snapshot had orphan items. The issue was that the
14376 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14377 * node instead of the post orphan cleanup root node.
14378 * So this function, and its callees, just detects and fixes those cases. Even
14379 * though the regression was for read-only snapshots, this function applies to
14380 * any snapshot/subvolume root.
14381 * This must be run before any other repair code - not doing it so, makes other
14382 * repair code delete or modify backrefs in the extent tree for example, which
14383 * will result in an inconsistent fs after repairing the root items.
14385 static int repair_root_items(struct btrfs_fs_info *info)
14387 struct btrfs_path path;
14388 struct btrfs_key key;
14389 struct extent_buffer *leaf;
14390 struct btrfs_trans_handle *trans = NULL;
14393 int need_trans = 0;
14395 btrfs_init_path(&path);
14397 ret = build_roots_info_cache(info);
14401 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14402 key.type = BTRFS_ROOT_ITEM_KEY;
14407 * Avoid opening and committing transactions if a leaf doesn't have
14408 * any root items that need to be fixed, so that we avoid rotating
14409 * backup roots unnecessarily.
14412 trans = btrfs_start_transaction(info->tree_root, 1);
14413 if (IS_ERR(trans)) {
14414 ret = PTR_ERR(trans);
14419 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14423 leaf = path.nodes[0];
14426 struct btrfs_key found_key;
14428 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14429 int no_more_keys = find_next_key(&path, &key);
14431 btrfs_release_path(&path);
14433 ret = btrfs_commit_transaction(trans,
14445 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14447 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14449 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14452 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14456 if (!trans && repair) {
14459 btrfs_release_path(&path);
14469 free_roots_info_cache();
14470 btrfs_release_path(&path);
14472 btrfs_commit_transaction(trans, info->tree_root);
14479 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14481 struct btrfs_trans_handle *trans;
14482 struct btrfs_block_group_cache *bg_cache;
14486 /* Clear all free space cache inodes and its extent data */
14488 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14491 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14494 current = bg_cache->key.objectid + bg_cache->key.offset;
14497 /* Don't forget to set cache_generation to -1 */
14498 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14499 if (IS_ERR(trans)) {
14500 error("failed to update super block cache generation");
14501 return PTR_ERR(trans);
14503 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14504 btrfs_commit_transaction(trans, fs_info->tree_root);
14509 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14514 if (clear_version == 1) {
14515 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14517 "free space cache v2 detected, use --clear-space-cache v2");
14521 printf("Clearing free space cache\n");
14522 ret = clear_free_space_cache(fs_info);
14524 error("failed to clear free space cache");
14527 printf("Free space cache cleared\n");
14529 } else if (clear_version == 2) {
14530 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14531 printf("no free space cache v2 to clear\n");
14535 printf("Clear free space cache v2\n");
14536 ret = btrfs_clear_free_space_tree(fs_info);
14538 error("failed to clear free space cache v2: %d", ret);
14541 printf("free space cache v2 cleared\n");
14548 const char * const cmd_check_usage[] = {
14549 "btrfs check [options] <device>",
14550 "Check structural integrity of a filesystem (unmounted).",
14551 "Check structural integrity of an unmounted filesystem. Verify internal",
14552 "trees' consistency and item connectivity. In the repair mode try to",
14553 "fix the problems found. ",
14554 "WARNING: the repair mode is considered dangerous",
14556 "-s|--super <superblock> use this superblock copy",
14557 "-b|--backup use the first valid backup root copy",
14558 "--force skip mount checks, repair is not possible",
14559 "--repair try to repair the filesystem",
14560 "--readonly run in read-only mode (default)",
14561 "--init-csum-tree create a new CRC tree",
14562 "--init-extent-tree create a new extent tree",
14563 "--mode <MODE> allows choice of memory/IO trade-offs",
14564 " where MODE is one of:",
14565 " original - read inodes and extents to memory (requires",
14566 " more memory, does less IO)",
14567 " lowmem - try to use less memory but read blocks again",
14569 "--check-data-csum verify checksums of data blocks",
14570 "-Q|--qgroup-report print a report on qgroup consistency",
14571 "-E|--subvol-extents <subvolid>",
14572 " print subvolume extents and sharing state",
14573 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14574 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14575 "-p|--progress indicate progress",
14576 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14580 int cmd_check(int argc, char **argv)
14582 struct cache_tree root_cache;
14583 struct btrfs_root *root;
14584 struct btrfs_fs_info *info;
14587 u64 tree_root_bytenr = 0;
14588 u64 chunk_root_bytenr = 0;
14589 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14593 int init_csum_tree = 0;
14595 int clear_space_cache = 0;
14596 int qgroup_report = 0;
14597 int qgroups_repaired = 0;
14598 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14603 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14604 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14605 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14606 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14607 GETOPT_VAL_FORCE };
14608 static const struct option long_options[] = {
14609 { "super", required_argument, NULL, 's' },
14610 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14611 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14612 { "init-csum-tree", no_argument, NULL,
14613 GETOPT_VAL_INIT_CSUM },
14614 { "init-extent-tree", no_argument, NULL,
14615 GETOPT_VAL_INIT_EXTENT },
14616 { "check-data-csum", no_argument, NULL,
14617 GETOPT_VAL_CHECK_CSUM },
14618 { "backup", no_argument, NULL, 'b' },
14619 { "subvol-extents", required_argument, NULL, 'E' },
14620 { "qgroup-report", no_argument, NULL, 'Q' },
14621 { "tree-root", required_argument, NULL, 'r' },
14622 { "chunk-root", required_argument, NULL,
14623 GETOPT_VAL_CHUNK_TREE },
14624 { "progress", no_argument, NULL, 'p' },
14625 { "mode", required_argument, NULL,
14627 { "clear-space-cache", required_argument, NULL,
14628 GETOPT_VAL_CLEAR_SPACE_CACHE},
14629 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14630 { NULL, 0, NULL, 0}
14633 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14637 case 'a': /* ignored */ break;
14639 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14642 num = arg_strtou64(optarg);
14643 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14645 "super mirror should be less than %d",
14646 BTRFS_SUPER_MIRROR_MAX);
14649 bytenr = btrfs_sb_offset(((int)num));
14650 printf("using SB copy %llu, bytenr %llu\n", num,
14651 (unsigned long long)bytenr);
14657 subvolid = arg_strtou64(optarg);
14660 tree_root_bytenr = arg_strtou64(optarg);
14662 case GETOPT_VAL_CHUNK_TREE:
14663 chunk_root_bytenr = arg_strtou64(optarg);
14666 ctx.progress_enabled = true;
14670 usage(cmd_check_usage);
14671 case GETOPT_VAL_REPAIR:
14672 printf("enabling repair mode\n");
14674 ctree_flags |= OPEN_CTREE_WRITES;
14676 case GETOPT_VAL_READONLY:
14679 case GETOPT_VAL_INIT_CSUM:
14680 printf("Creating a new CRC tree\n");
14681 init_csum_tree = 1;
14683 ctree_flags |= OPEN_CTREE_WRITES;
14685 case GETOPT_VAL_INIT_EXTENT:
14686 init_extent_tree = 1;
14687 ctree_flags |= (OPEN_CTREE_WRITES |
14688 OPEN_CTREE_NO_BLOCK_GROUPS);
14691 case GETOPT_VAL_CHECK_CSUM:
14692 check_data_csum = 1;
14694 case GETOPT_VAL_MODE:
14695 check_mode = parse_check_mode(optarg);
14696 if (check_mode == CHECK_MODE_UNKNOWN) {
14697 error("unknown mode: %s", optarg);
14701 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14702 if (strcmp(optarg, "v1") == 0) {
14703 clear_space_cache = 1;
14704 } else if (strcmp(optarg, "v2") == 0) {
14705 clear_space_cache = 2;
14706 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14709 "invalid argument to --clear-space-cache, must be v1 or v2");
14712 ctree_flags |= OPEN_CTREE_WRITES;
14714 case GETOPT_VAL_FORCE:
14720 if (check_argc_exact(argc - optind, 1))
14721 usage(cmd_check_usage);
14723 if (ctx.progress_enabled) {
14724 ctx.tp = TASK_NOTHING;
14725 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14728 /* This check is the only reason for --readonly to exist */
14729 if (readonly && repair) {
14730 error("repair options are not compatible with --readonly");
14735 * experimental and dangerous
14737 if (repair && check_mode == CHECK_MODE_LOWMEM)
14738 warning("low-memory mode repair support is only partial");
14741 cache_tree_init(&root_cache);
14743 ret = check_mounted(argv[optind]);
14746 error("could not check mount status: %s",
14752 "%s is currently mounted, use --force if you really intend to check the filesystem",
14760 error("repair and --force is not yet supported");
14767 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14771 "filesystem mounted, continuing because of --force");
14773 /* A block device is mounted in exclusive mode by kernel */
14774 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14777 /* only allow partial opening under repair mode */
14779 ctree_flags |= OPEN_CTREE_PARTIAL;
14781 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14782 chunk_root_bytenr, ctree_flags);
14784 error("cannot open file system");
14790 global_info = info;
14791 root = info->fs_root;
14792 uuid_unparse(info->super_copy->fsid, uuidbuf);
14794 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14797 * Check the bare minimum before starting anything else that could rely
14798 * on it, namely the tree roots, any local consistency checks
14800 if (!extent_buffer_uptodate(info->tree_root->node) ||
14801 !extent_buffer_uptodate(info->dev_root->node) ||
14802 !extent_buffer_uptodate(info->chunk_root->node)) {
14803 error("critical roots corrupted, unable to check the filesystem");
14809 if (clear_space_cache) {
14810 ret = do_clear_free_space_cache(info, clear_space_cache);
14816 * repair mode will force us to commit transaction which
14817 * will make us fail to load log tree when mounting.
14819 if (repair && btrfs_super_log_root(info->super_copy)) {
14820 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14826 ret = zero_log_tree(root);
14829 error("failed to zero log tree: %d", ret);
14834 if (qgroup_report) {
14835 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14837 ret = qgroup_verify_all(info);
14844 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14845 subvolid, argv[optind], uuidbuf);
14846 ret = print_extent_state(info, subvolid);
14851 if (init_extent_tree || init_csum_tree) {
14852 struct btrfs_trans_handle *trans;
14854 trans = btrfs_start_transaction(info->extent_root, 0);
14855 if (IS_ERR(trans)) {
14856 error("error starting transaction");
14857 ret = PTR_ERR(trans);
14862 if (init_extent_tree) {
14863 printf("Creating a new extent tree\n");
14864 ret = reinit_extent_tree(trans, info);
14870 if (init_csum_tree) {
14871 printf("Reinitialize checksum tree\n");
14872 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14874 error("checksum tree initialization failed: %d",
14881 ret = fill_csum_tree(trans, info->csum_root,
14885 error("checksum tree refilling failed: %d", ret);
14890 * Ok now we commit and run the normal fsck, which will add
14891 * extent entries for all of the items it finds.
14893 ret = btrfs_commit_transaction(trans, info->extent_root);
14898 if (!extent_buffer_uptodate(info->extent_root->node)) {
14899 error("critical: extent_root, unable to check the filesystem");
14904 if (!extent_buffer_uptodate(info->csum_root->node)) {
14905 error("critical: csum_root, unable to check the filesystem");
14911 if (!init_extent_tree) {
14912 ret = repair_root_items(info);
14915 error("failed to repair root items: %s", strerror(-ret));
14919 fprintf(stderr, "Fixed %d roots.\n", ret);
14921 } else if (ret > 0) {
14923 "Found %d roots with an outdated root item.\n",
14926 "Please run a filesystem check with the option --repair to fix them.\n");
14933 ret = do_check_chunks_and_extents(info);
14937 "errors found in extent allocation tree or chunk allocation");
14939 /* Only re-check super size after we checked and repaired the fs */
14940 err |= !is_super_size_valid(info);
14942 if (!ctx.progress_enabled) {
14943 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14944 fprintf(stderr, "checking free space tree\n");
14946 fprintf(stderr, "checking free space cache\n");
14948 ret = check_space_cache(root);
14951 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14952 error("errors found in free space tree");
14954 error("errors found in free space cache");
14959 * We used to have to have these hole extents in between our real
14960 * extents so if we don't have this flag set we need to make sure there
14961 * are no gaps in the file extents for inodes, otherwise we can just
14962 * ignore it when this happens.
14964 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14965 ret = do_check_fs_roots(info, &root_cache);
14968 error("errors found in fs roots");
14972 fprintf(stderr, "checking csums\n");
14973 ret = check_csums(root);
14976 error("errors found in csum tree");
14980 fprintf(stderr, "checking root refs\n");
14981 /* For low memory mode, check_fs_roots_v2 handles root refs */
14982 if (check_mode != CHECK_MODE_LOWMEM) {
14983 ret = check_root_refs(root, &root_cache);
14986 error("errors found in root refs");
14991 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14992 struct extent_buffer *eb;
14994 eb = list_first_entry(&root->fs_info->recow_ebs,
14995 struct extent_buffer, recow);
14996 list_del_init(&eb->recow);
14997 ret = recow_extent_buffer(root, eb);
15000 error("fails to fix transid errors");
15005 while (!list_empty(&delete_items)) {
15006 struct bad_item *bad;
15008 bad = list_first_entry(&delete_items, struct bad_item, list);
15009 list_del_init(&bad->list);
15011 ret = delete_bad_item(root, bad);
15017 if (info->quota_enabled) {
15018 fprintf(stderr, "checking quota groups\n");
15019 ret = qgroup_verify_all(info);
15022 error("failed to check quota groups");
15026 ret = repair_qgroups(info, &qgroups_repaired);
15029 error("failed to repair quota groups");
15035 if (!list_empty(&root->fs_info->recow_ebs)) {
15036 error("transid errors in file system");
15041 printf("found %llu bytes used, ",
15042 (unsigned long long)bytes_used);
15044 printf("error(s) found\n");
15046 printf("no error found\n");
15047 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15048 printf("total tree bytes: %llu\n",
15049 (unsigned long long)total_btree_bytes);
15050 printf("total fs tree bytes: %llu\n",
15051 (unsigned long long)total_fs_tree_bytes);
15052 printf("total extent tree bytes: %llu\n",
15053 (unsigned long long)total_extent_tree_bytes);
15054 printf("btree space waste bytes: %llu\n",
15055 (unsigned long long)btree_space_waste);
15056 printf("file data blocks allocated: %llu\n referenced %llu\n",
15057 (unsigned long long)data_bytes_allocated,
15058 (unsigned long long)data_bytes_referenced);
15060 free_qgroup_counts();
15061 free_root_recs_tree(&root_cache);
15065 if (ctx.progress_enabled)
15066 task_deinit(ctx.info);