2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2153 * @roots can be empty if it belongs to tree reloc tree
2154 * In that case, we should always check the leaf, as we can't use
2155 * the tree owner to ensure some other root will check it.
2157 if (roots->nnodes == 1 || roots->nnodes == 0)
2160 node = rb_first(&roots->root);
2161 u = rb_entry(node, struct ulist_node, rb_node);
2163 * current root id is not smallest, we skip it and let it be checked
2164 * in the fs or file tree who hash the smallest root id.
2166 if (root->objectid != u->val)
2172 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2175 struct btrfs_root *extent_root = root->fs_info->extent_root;
2176 struct btrfs_root_item *ri = &root->root_item;
2177 struct btrfs_extent_inline_ref *iref;
2178 struct btrfs_extent_item *ei;
2179 struct btrfs_key key;
2180 struct btrfs_path *path = NULL;
2191 * Except file/reloc tree, we can not have FULL BACKREF MODE
2193 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2197 if (eb->start == btrfs_root_bytenr(ri))
2200 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2203 owner = btrfs_header_owner(eb);
2204 if (owner == root->objectid)
2207 path = btrfs_alloc_path();
2211 key.objectid = btrfs_header_bytenr(eb);
2213 key.offset = (u64)-1;
2215 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2222 ret = btrfs_previous_extent_item(extent_root, path,
2228 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2230 eb = path->nodes[0];
2231 slot = path->slots[0];
2232 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2234 flags = btrfs_extent_flags(eb, ei);
2235 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2238 ptr = (unsigned long)(ei + 1);
2239 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2241 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2242 ptr += sizeof(struct btrfs_tree_block_info);
2245 /* Reached extent item ends normally */
2249 /* Beyond extent item end, wrong item size */
2251 error("extent item at bytenr %llu slot %d has wrong size",
2256 iref = (struct btrfs_extent_inline_ref *)ptr;
2257 offset = btrfs_extent_inline_ref_offset(eb, iref);
2258 type = btrfs_extent_inline_ref_type(eb, iref);
2260 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2262 ptr += btrfs_extent_inline_ref_size(type);
2266 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2270 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2272 btrfs_free_path(path);
2277 * for a tree node or leaf, we record its reference count, so later if we still
2278 * process this node or leaf, don't need to compute its reference count again.
2280 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2282 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2283 struct extent_buffer *eb, struct node_refs *nrefs,
2284 u64 level, int check_all)
2286 struct ulist *roots;
2289 int root_level = btrfs_header_level(root->node);
2293 if (nrefs->bytenr[level] == bytenr)
2296 if (bytenr != (u64)-1) {
2297 /* the return value of this function seems a mistake */
2298 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2299 level, 1, &refs, &flags);
2301 if (ret < 0 && !check_all)
2304 nrefs->bytenr[level] = bytenr;
2305 nrefs->refs[level] = refs;
2306 nrefs->full_backref[level] = 0;
2307 nrefs->checked[level] = 0;
2310 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2315 check = need_check(root, roots);
2317 nrefs->need_check[level] = check;
2320 nrefs->need_check[level] = 1;
2322 if (level == root_level) {
2323 nrefs->need_check[level] = 1;
2326 * The node refs may have not been
2327 * updated if upper needs checking (the
2328 * lowest root_objectid) the node can
2331 nrefs->need_check[level] =
2332 nrefs->need_check[level + 1];
2338 if (check_all && eb) {
2339 calc_extent_flag_v2(root, eb, &flags);
2340 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2341 nrefs->full_backref[level] = 1;
2348 * @level if @level == -1 means extent data item
2349 * else normal treeblocl.
2351 static int should_check_extent_strictly(struct btrfs_root *root,
2352 struct node_refs *nrefs, int level)
2354 int root_level = btrfs_header_level(root->node);
2356 if (level > root_level || level < -1)
2358 if (level == root_level)
2361 * if the upper node is marked full backref, it should contain shared
2362 * backref of the parent (except owner == root->objectid).
2364 while (++level <= root_level)
2365 if (nrefs->refs[level] > 1)
2371 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2372 struct walk_control *wc, int *level,
2373 struct node_refs *nrefs)
2375 enum btrfs_tree_block_status status;
2378 struct btrfs_fs_info *fs_info = root->fs_info;
2379 struct extent_buffer *next;
2380 struct extent_buffer *cur;
2384 WARN_ON(*level < 0);
2385 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2387 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2388 refs = nrefs->refs[*level];
2391 ret = btrfs_lookup_extent_info(NULL, root,
2392 path->nodes[*level]->start,
2393 *level, 1, &refs, NULL);
2398 nrefs->bytenr[*level] = path->nodes[*level]->start;
2399 nrefs->refs[*level] = refs;
2403 ret = enter_shared_node(root, path->nodes[*level]->start,
2411 while (*level >= 0) {
2412 WARN_ON(*level < 0);
2413 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2414 cur = path->nodes[*level];
2416 if (btrfs_header_level(cur) != *level)
2419 if (path->slots[*level] >= btrfs_header_nritems(cur))
2422 ret = process_one_leaf(root, cur, wc);
2427 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2428 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2430 if (bytenr == nrefs->bytenr[*level - 1]) {
2431 refs = nrefs->refs[*level - 1];
2433 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2434 *level - 1, 1, &refs, NULL);
2438 nrefs->bytenr[*level - 1] = bytenr;
2439 nrefs->refs[*level - 1] = refs;
2444 ret = enter_shared_node(root, bytenr, refs,
2447 path->slots[*level]++;
2452 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2453 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2454 free_extent_buffer(next);
2455 reada_walk_down(root, cur, path->slots[*level]);
2456 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2457 if (!extent_buffer_uptodate(next)) {
2458 struct btrfs_key node_key;
2460 btrfs_node_key_to_cpu(path->nodes[*level],
2462 path->slots[*level]);
2463 btrfs_add_corrupt_extent_record(root->fs_info,
2465 path->nodes[*level]->start,
2466 root->fs_info->nodesize,
2473 ret = check_child_node(cur, path->slots[*level], next);
2475 free_extent_buffer(next);
2480 if (btrfs_is_leaf(next))
2481 status = btrfs_check_leaf(root, NULL, next);
2483 status = btrfs_check_node(root, NULL, next);
2484 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2485 free_extent_buffer(next);
2490 *level = *level - 1;
2491 free_extent_buffer(path->nodes[*level]);
2492 path->nodes[*level] = next;
2493 path->slots[*level] = 0;
2496 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2500 static int fs_root_objectid(u64 objectid);
2503 * Update global fs information.
2505 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2509 struct extent_buffer *eb = path->nodes[level];
2511 total_btree_bytes += eb->len;
2512 if (fs_root_objectid(root->objectid))
2513 total_fs_tree_bytes += eb->len;
2514 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2515 total_extent_tree_bytes += eb->len;
2518 btree_space_waste += btrfs_leaf_free_space(root, eb);
2520 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2521 btrfs_header_nritems(eb));
2522 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2527 * This function only handles BACKREF_MISSING,
2528 * If corresponding extent item exists, increase the ref, else insert an extent
2531 * Returns error bits after repair.
2533 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2534 struct btrfs_root *root,
2535 struct extent_buffer *node,
2536 struct node_refs *nrefs, int level, int err)
2538 struct btrfs_fs_info *fs_info = root->fs_info;
2539 struct btrfs_root *extent_root = fs_info->extent_root;
2540 struct btrfs_path path;
2541 struct btrfs_extent_item *ei;
2542 struct btrfs_tree_block_info *bi;
2543 struct btrfs_key key;
2544 struct extent_buffer *eb;
2545 u32 size = sizeof(*ei);
2546 u32 node_size = root->fs_info->nodesize;
2547 int insert_extent = 0;
2548 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2549 int root_level = btrfs_header_level(root->node);
2554 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2557 if ((err & BACKREF_MISSING) == 0)
2560 WARN_ON(level > BTRFS_MAX_LEVEL);
2563 btrfs_init_path(&path);
2564 bytenr = btrfs_header_bytenr(node);
2565 owner = btrfs_header_owner(node);
2566 generation = btrfs_header_generation(node);
2568 key.objectid = bytenr;
2570 key.offset = (u64)-1;
2572 /* Search for the extent item */
2573 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2579 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2583 /* calculate if the extent item flag is full backref or not */
2584 if (nrefs->full_backref[level] != 0)
2585 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2587 /* insert an extent item */
2588 if (insert_extent) {
2589 struct btrfs_disk_key copy_key;
2591 generation = btrfs_header_generation(node);
2593 if (level < root_level && nrefs->full_backref[level + 1] &&
2594 owner != root->objectid) {
2595 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2598 key.objectid = bytenr;
2599 if (!skinny_metadata) {
2600 key.type = BTRFS_EXTENT_ITEM_KEY;
2601 key.offset = node_size;
2602 size += sizeof(*bi);
2604 key.type = BTRFS_METADATA_ITEM_KEY;
2608 btrfs_release_path(&path);
2609 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2615 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2617 btrfs_set_extent_refs(eb, ei, 0);
2618 btrfs_set_extent_generation(eb, ei, generation);
2619 btrfs_set_extent_flags(eb, ei, flags);
2621 if (!skinny_metadata) {
2622 bi = (struct btrfs_tree_block_info *)(ei + 1);
2623 memset_extent_buffer(eb, 0, (unsigned long)bi,
2625 btrfs_set_disk_key_objectid(©_key, root->objectid);
2626 btrfs_set_disk_key_type(©_key, 0);
2627 btrfs_set_disk_key_offset(©_key, 0);
2629 btrfs_set_tree_block_level(eb, bi, level);
2630 btrfs_set_tree_block_key(eb, bi, ©_key);
2632 btrfs_mark_buffer_dirty(eb);
2633 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2634 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2637 nrefs->refs[level] = 0;
2638 nrefs->full_backref[level] =
2639 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640 btrfs_release_path(&path);
2643 if (level < root_level && nrefs->full_backref[level + 1] &&
2644 owner != root->objectid)
2645 parent = nrefs->bytenr[level + 1];
2647 /* increase the ref */
2648 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649 parent, root->objectid, level, 0);
2651 nrefs->refs[level]++;
2653 btrfs_release_path(&path);
2656 "failed to repair tree block ref start %llu root %llu due to %s",
2657 bytenr, root->objectid, strerror(-ret));
2659 printf("Added one tree block ref start %llu %s %llu\n",
2660 bytenr, parent ? "parent" : "root",
2661 parent ? parent : root->objectid);
2662 err &= ~BACKREF_MISSING;
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669 unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671 struct extent_buffer *eb, u64 bytenr,
2672 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674 struct btrfs_root *root, struct btrfs_path *path,
2675 struct node_refs *nrefs, int account_bytes);
2678 * @trans just for lowmem repair mode
2679 * @check all if not 0 then check all tree block backrefs and items
2680 * 0 then just check relationship of items in fs tree(s)
2682 * Returns >0 Found error, should continue
2683 * Returns <0 Fatal error, must exit the whole check
2684 * Returns 0 No errors found
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687 struct btrfs_root *root, struct btrfs_path *path,
2688 int *level, struct node_refs *nrefs, int ext_ref,
2692 enum btrfs_tree_block_status status;
2695 struct btrfs_fs_info *fs_info = root->fs_info;
2696 struct extent_buffer *next;
2697 struct extent_buffer *cur;
2701 int account_file_data = 0;
2703 WARN_ON(*level < 0);
2704 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2706 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707 path->nodes[*level], nrefs, *level, check_all);
2711 while (*level >= 0) {
2712 WARN_ON(*level < 0);
2713 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714 cur = path->nodes[*level];
2715 bytenr = btrfs_header_bytenr(cur);
2716 check = nrefs->need_check[*level];
2718 if (btrfs_header_level(cur) != *level)
2721 * Update bytes accounting and check tree block ref
2722 * NOTE: Doing accounting and check before checking nritems
2723 * is necessary because of empty node/leaf.
2725 if ((check_all && !nrefs->checked[*level]) ||
2726 (!check_all && nrefs->need_check[*level])) {
2727 ret = check_tree_block_ref(root, cur,
2728 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729 btrfs_header_owner(cur), nrefs);
2732 ret = repair_tree_block_ref(trans, root,
2733 path->nodes[*level], nrefs, *level, ret);
2736 if (check_all && nrefs->need_check[*level] &&
2737 nrefs->refs[*level]) {
2738 account_bytes(root, path, *level);
2739 account_file_data = 1;
2741 nrefs->checked[*level] = 1;
2744 if (path->slots[*level] >= btrfs_header_nritems(cur))
2747 /* Don't forgot to check leaf/node validation */
2749 /* skip duplicate check */
2750 if (check || !check_all) {
2751 ret = btrfs_check_leaf(root, NULL, cur);
2752 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2760 ret = process_one_leaf_v2(root, path, nrefs,
2763 ret = check_leaf_items(trans, root, path,
2764 nrefs, account_file_data);
2768 if (check || !check_all) {
2769 ret = btrfs_check_node(root, NULL, cur);
2770 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2777 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2780 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2785 * check all trees in check_chunks_and_extent_v2
2786 * check shared node once in check_fs_roots
2788 if (!check_all && !nrefs->need_check[*level - 1]) {
2789 path->slots[*level]++;
2793 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795 free_extent_buffer(next);
2796 reada_walk_down(root, cur, path->slots[*level]);
2797 next = read_tree_block(fs_info, bytenr, ptr_gen);
2798 if (!extent_buffer_uptodate(next)) {
2799 struct btrfs_key node_key;
2801 btrfs_node_key_to_cpu(path->nodes[*level],
2803 path->slots[*level]);
2804 btrfs_add_corrupt_extent_record(fs_info,
2805 &node_key, path->nodes[*level]->start,
2806 fs_info->nodesize, *level);
2812 ret = check_child_node(cur, path->slots[*level], next);
2817 if (btrfs_is_leaf(next))
2818 status = btrfs_check_leaf(root, NULL, next);
2820 status = btrfs_check_node(root, NULL, next);
2821 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822 free_extent_buffer(next);
2827 *level = *level - 1;
2828 free_extent_buffer(path->nodes[*level]);
2829 path->nodes[*level] = next;
2830 path->slots[*level] = 0;
2831 account_file_data = 0;
2833 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839 struct walk_control *wc, int *level)
2842 struct extent_buffer *leaf;
2844 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845 leaf = path->nodes[i];
2846 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2851 free_extent_buffer(path->nodes[*level]);
2852 path->nodes[*level] = NULL;
2853 BUG_ON(*level > wc->active_node);
2854 if (*level == wc->active_node)
2855 leave_shared_node(root, wc, *level);
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2866 struct extent_buffer *leaf;
2868 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869 leaf = path->nodes[i];
2870 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2875 free_extent_buffer(path->nodes[*level]);
2876 path->nodes[*level] = NULL;
2883 static int check_root_dir(struct inode_record *rec)
2885 struct inode_backref *backref;
2888 if (!rec->found_inode_item || rec->errors)
2890 if (rec->nlink != 1 || rec->found_link != 0)
2892 if (list_empty(&rec->backrefs))
2894 backref = to_inode_backref(rec->backrefs.next);
2895 if (!backref->found_inode_ref)
2897 if (backref->index != 0 || backref->namelen != 2 ||
2898 memcmp(backref->name, "..", 2))
2900 if (backref->found_dir_index || backref->found_dir_item)
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root, struct btrfs_path *path,
2909 struct inode_record *rec)
2911 struct btrfs_inode_item *ei;
2912 struct btrfs_key key;
2915 key.objectid = rec->ino;
2916 key.type = BTRFS_INODE_ITEM_KEY;
2917 key.offset = (u64)-1;
2919 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2923 if (!path->slots[0]) {
2930 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931 if (key.objectid != rec->ino) {
2936 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937 struct btrfs_inode_item);
2938 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939 btrfs_mark_buffer_dirty(path->nodes[0]);
2940 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942 root->root_key.objectid);
2944 btrfs_release_path(path);
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949 struct btrfs_root *root,
2950 struct btrfs_path *path,
2951 struct inode_record *rec)
2955 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956 btrfs_release_path(path);
2958 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963 struct btrfs_root *root,
2964 struct btrfs_path *path,
2965 struct inode_record *rec)
2967 struct btrfs_inode_item *ei;
2968 struct btrfs_key key;
2971 key.objectid = rec->ino;
2972 key.type = BTRFS_INODE_ITEM_KEY;
2975 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2982 /* Since ret == 0, no need to check anything */
2983 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984 struct btrfs_inode_item);
2985 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986 btrfs_mark_buffer_dirty(path->nodes[0]);
2987 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988 printf("reset nbytes for ino %llu root %llu\n",
2989 rec->ino, root->root_key.objectid);
2991 btrfs_release_path(path);
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996 struct cache_tree *inode_cache,
2997 struct inode_record *rec,
2998 struct inode_backref *backref)
3000 struct btrfs_path path;
3001 struct btrfs_trans_handle *trans;
3002 struct btrfs_dir_item *dir_item;
3003 struct extent_buffer *leaf;
3004 struct btrfs_key key;
3005 struct btrfs_disk_key disk_key;
3006 struct inode_record *dir_rec;
3007 unsigned long name_ptr;
3008 u32 data_size = sizeof(*dir_item) + backref->namelen;
3011 trans = btrfs_start_transaction(root, 1);
3013 return PTR_ERR(trans);
3015 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016 (unsigned long long)rec->ino);
3018 btrfs_init_path(&path);
3019 key.objectid = backref->dir;
3020 key.type = BTRFS_DIR_INDEX_KEY;
3021 key.offset = backref->index;
3022 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3025 leaf = path.nodes[0];
3026 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3028 disk_key.objectid = cpu_to_le64(rec->ino);
3029 disk_key.type = BTRFS_INODE_ITEM_KEY;
3030 disk_key.offset = 0;
3032 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034 btrfs_set_dir_data_len(leaf, dir_item, 0);
3035 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036 name_ptr = (unsigned long)(dir_item + 1);
3037 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038 btrfs_mark_buffer_dirty(leaf);
3039 btrfs_release_path(&path);
3040 btrfs_commit_transaction(trans, root);
3042 backref->found_dir_index = 1;
3043 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044 BUG_ON(IS_ERR(dir_rec));
3047 dir_rec->found_size += backref->namelen;
3048 if (dir_rec->found_size == dir_rec->isize &&
3049 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051 if (dir_rec->found_size != dir_rec->isize)
3052 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3057 static int delete_dir_index(struct btrfs_root *root,
3058 struct inode_backref *backref)
3060 struct btrfs_trans_handle *trans;
3061 struct btrfs_dir_item *di;
3062 struct btrfs_path path;
3065 trans = btrfs_start_transaction(root, 1);
3067 return PTR_ERR(trans);
3069 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070 (unsigned long long)backref->dir,
3071 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072 (unsigned long long)root->objectid);
3074 btrfs_init_path(&path);
3075 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076 backref->name, backref->namelen,
3077 backref->index, -1);
3080 btrfs_release_path(&path);
3081 btrfs_commit_transaction(trans, root);
3088 ret = btrfs_del_item(trans, root, &path);
3090 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3092 btrfs_release_path(&path);
3093 btrfs_commit_transaction(trans, root);
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098 struct btrfs_root *root, u64 ino, u64 size,
3099 u64 nbytes, u64 nlink, u32 mode)
3101 struct btrfs_inode_item ii;
3102 time_t now = time(NULL);
3105 btrfs_set_stack_inode_size(&ii, size);
3106 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107 btrfs_set_stack_inode_nlink(&ii, nlink);
3108 btrfs_set_stack_inode_mode(&ii, mode);
3109 btrfs_set_stack_inode_generation(&ii, trans->transid);
3110 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3118 ret = btrfs_insert_inode(trans, root, ino, &ii);
3121 warning("root %llu inode %llu recreating inode item, this may "
3122 "be incomplete, please check permissions and content after "
3123 "the fsck completes.\n", (unsigned long long)root->objectid,
3124 (unsigned long long)ino);
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130 struct btrfs_root *root, u64 ino,
3133 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3135 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3138 static int create_inode_item(struct btrfs_root *root,
3139 struct inode_record *rec, int root_dir)
3141 struct btrfs_trans_handle *trans;
3147 trans = btrfs_start_transaction(root, 1);
3148 if (IS_ERR(trans)) {
3149 ret = PTR_ERR(trans);
3153 nlink = root_dir ? 1 : rec->found_link;
3154 if (rec->found_dir_item) {
3155 if (rec->found_file_extent)
3156 fprintf(stderr, "root %llu inode %llu has both a dir "
3157 "item and extents, unsure if it is a dir or a "
3158 "regular file so setting it as a directory\n",
3159 (unsigned long long)root->objectid,
3160 (unsigned long long)rec->ino);
3161 mode = S_IFDIR | 0755;
3162 size = rec->found_size;
3163 } else if (!rec->found_dir_item) {
3164 size = rec->extent_end;
3165 mode = S_IFREG | 0755;
3168 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3170 btrfs_commit_transaction(trans, root);
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175 struct inode_record *rec,
3176 struct cache_tree *inode_cache,
3179 struct inode_backref *tmp, *backref;
3180 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3184 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185 if (!delete && rec->ino == root_dirid) {
3186 if (!rec->found_inode_item) {
3187 ret = create_inode_item(root, rec, 1);
3194 /* Index 0 for root dir's are special, don't mess with it */
3195 if (rec->ino == root_dirid && backref->index == 0)
3199 ((backref->found_dir_index && !backref->found_inode_ref) ||
3200 (backref->found_dir_index && backref->found_inode_ref &&
3201 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202 ret = delete_dir_index(root, backref);
3206 list_del(&backref->list);
3211 if (!delete && !backref->found_dir_index &&
3212 backref->found_dir_item && backref->found_inode_ref) {
3213 ret = add_missing_dir_index(root, inode_cache, rec,
3218 if (backref->found_dir_item &&
3219 backref->found_dir_index) {
3220 if (!backref->errors &&
3221 backref->found_inode_ref) {
3222 list_del(&backref->list);
3229 if (!delete && (!backref->found_dir_index &&
3230 !backref->found_dir_item &&
3231 backref->found_inode_ref)) {
3232 struct btrfs_trans_handle *trans;
3233 struct btrfs_key location;
3235 ret = check_dir_conflict(root, backref->name,
3241 * let nlink fixing routine to handle it,
3242 * which can do it better.
3247 location.objectid = rec->ino;
3248 location.type = BTRFS_INODE_ITEM_KEY;
3249 location.offset = 0;
3251 trans = btrfs_start_transaction(root, 1);
3252 if (IS_ERR(trans)) {
3253 ret = PTR_ERR(trans);
3256 fprintf(stderr, "adding missing dir index/item pair "
3258 (unsigned long long)rec->ino);
3259 ret = btrfs_insert_dir_item(trans, root, backref->name,
3261 backref->dir, &location,
3262 imode_to_type(rec->imode),
3265 btrfs_commit_transaction(trans, root);
3269 if (!delete && (backref->found_inode_ref &&
3270 backref->found_dir_index &&
3271 backref->found_dir_item &&
3272 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273 !rec->found_inode_item)) {
3274 ret = create_inode_item(root, rec, 0);
3281 return ret ? ret : repaired;
3285 * To determine the file type for nlink/inode_item repair
3287 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288 * Return -ENOENT if file type is not found.
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3292 struct inode_backref *backref;
3294 /* For inode item recovered case */
3295 if (rec->found_inode_item) {
3296 *type = imode_to_type(rec->imode);
3300 list_for_each_entry(backref, &rec->backrefs, list) {
3301 if (backref->found_dir_index || backref->found_dir_item) {
3302 *type = backref->filetype;
3310 * To determine the file name for nlink repair
3312 * Return 0 if file name is found, set name and namelen.
3313 * Return -ENOENT if file name is not found.
3315 static int find_file_name(struct inode_record *rec,
3316 char *name, int *namelen)
3318 struct inode_backref *backref;
3320 list_for_each_entry(backref, &rec->backrefs, list) {
3321 if (backref->found_dir_index || backref->found_dir_item ||
3322 backref->found_inode_ref) {
3323 memcpy(name, backref->name, backref->namelen);
3324 *namelen = backref->namelen;
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root,
3334 struct btrfs_path *path,
3335 struct inode_record *rec)
3337 struct inode_backref *backref;
3338 struct inode_backref *tmp;
3339 struct btrfs_key key;
3340 struct btrfs_inode_item *inode_item;
3343 /* We don't believe this either, reset it and iterate backref */
3344 rec->found_link = 0;
3346 /* Remove all backref including the valid ones */
3347 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349 backref->index, backref->name,
3350 backref->namelen, 0);
3354 /* remove invalid backref, so it won't be added back */
3355 if (!(backref->found_dir_index &&
3356 backref->found_dir_item &&
3357 backref->found_inode_ref)) {
3358 list_del(&backref->list);
3365 /* Set nlink to 0 */
3366 key.objectid = rec->ino;
3367 key.type = BTRFS_INODE_ITEM_KEY;
3369 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3376 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377 struct btrfs_inode_item);
3378 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379 btrfs_mark_buffer_dirty(path->nodes[0]);
3380 btrfs_release_path(path);
3383 * Add back valid inode_ref/dir_item/dir_index,
3384 * add_link() will handle the nlink inc, so new nlink must be correct
3386 list_for_each_entry(backref, &rec->backrefs, list) {
3387 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388 backref->name, backref->namelen,
3389 backref->filetype, &backref->index, 1, 0);
3394 btrfs_release_path(path);
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399 struct btrfs_root *root,
3400 struct btrfs_path *path,
3403 struct btrfs_key key, found_key;
3406 btrfs_init_path(path);
3407 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3409 key.type = BTRFS_INODE_ITEM_KEY;
3410 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3412 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413 path->slots[0] - 1);
3414 *highest_ino = found_key.objectid;
3417 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3419 btrfs_release_path(path);
3424 * Link inode to dir 'lost+found'. Increase @ref_count.
3426 * Returns 0 means success.
3427 * Returns <0 means failure.
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430 struct btrfs_root *root,
3431 struct btrfs_path *path,
3432 u64 ino, char *namebuf, u32 name_len,
3433 u8 filetype, u64 *ref_count)
3435 char *dir_name = "lost+found";
3440 btrfs_release_path(path);
3441 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3446 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3450 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3453 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454 namebuf, name_len, filetype, NULL, 1, 0);
3456 * Add ".INO" suffix several times to handle case where
3457 * "FILENAME.INO" is already taken by another file.
3459 while (ret == -EEXIST) {
3461 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3463 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3467 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3469 name_len += count_digits(ino) + 1;
3470 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471 name_len, filetype, NULL, 1, 0);
3474 error("failed to link the inode %llu to %s dir: %s",
3475 ino, dir_name, strerror(-ret));
3480 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481 name_len, namebuf, dir_name);
3483 btrfs_release_path(path);
3485 error("failed to move file '%.*s' to '%s' dir", name_len,
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491 struct btrfs_root *root,
3492 struct btrfs_path *path,
3493 struct inode_record *rec)
3495 char namebuf[BTRFS_NAME_LEN] = {0};
3498 int name_recovered = 0;
3499 int type_recovered = 0;
3503 * Get file name and type first before these invalid inode ref
3504 * are deleted by remove_all_invalid_backref()
3506 name_recovered = !find_file_name(rec, namebuf, &namelen);
3507 type_recovered = !find_file_type(rec, &type);
3509 if (!name_recovered) {
3510 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511 rec->ino, rec->ino);
3512 namelen = count_digits(rec->ino);
3513 sprintf(namebuf, "%llu", rec->ino);
3516 if (!type_recovered) {
3517 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3519 type = BTRFS_FT_REG_FILE;
3523 ret = reset_nlink(trans, root, path, rec);
3526 "Failed to reset nlink for inode %llu: %s\n",
3527 rec->ino, strerror(-ret));
3531 if (rec->found_link == 0) {
3532 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533 namebuf, namelen, type,
3534 (u64 *)&rec->found_link);
3538 printf("Fixed the nlink of inode %llu\n", rec->ino);
3541 * Clear the flag anyway, or we will loop forever for the same inode
3542 * as it will not be removed from the bad inode list and the dead loop
3545 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546 btrfs_release_path(path);
3551 * Check if there is any normal(reg or prealloc) file extent for given
3553 * This is used to determine the file type when neither its dir_index/item or
3554 * inode_item exists.
3556 * This will *NOT* report error, if any error happens, just consider it does
3557 * not have any normal file extent.
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3561 struct btrfs_path path;
3562 struct btrfs_key key;
3563 struct btrfs_key found_key;
3564 struct btrfs_file_extent_item *fi;
3568 btrfs_init_path(&path);
3570 key.type = BTRFS_EXTENT_DATA_KEY;
3573 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3578 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579 ret = btrfs_next_leaf(root, &path);
3586 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3588 if (found_key.objectid != ino ||
3589 found_key.type != BTRFS_EXTENT_DATA_KEY)
3591 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592 struct btrfs_file_extent_item);
3593 type = btrfs_file_extent_type(path.nodes[0], fi);
3594 if (type != BTRFS_FILE_EXTENT_INLINE) {
3600 btrfs_release_path(&path);
3604 static u32 btrfs_type_to_imode(u8 type)
3606 static u32 imode_by_btrfs_type[] = {
3607 [BTRFS_FT_REG_FILE] = S_IFREG,
3608 [BTRFS_FT_DIR] = S_IFDIR,
3609 [BTRFS_FT_CHRDEV] = S_IFCHR,
3610 [BTRFS_FT_BLKDEV] = S_IFBLK,
3611 [BTRFS_FT_FIFO] = S_IFIFO,
3612 [BTRFS_FT_SOCK] = S_IFSOCK,
3613 [BTRFS_FT_SYMLINK] = S_IFLNK,
3616 return imode_by_btrfs_type[(type)];
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620 struct btrfs_root *root,
3621 struct btrfs_path *path,
3622 struct inode_record *rec)
3626 int type_recovered = 0;
3629 printf("Trying to rebuild inode:%llu\n", rec->ino);
3631 type_recovered = !find_file_type(rec, &filetype);
3634 * Try to determine inode type if type not found.
3636 * For found regular file extent, it must be FILE.
3637 * For found dir_item/index, it must be DIR.
3639 * For undetermined one, use FILE as fallback.
3642 * 1. If found backref(inode_index/item is already handled) to it,
3644 * Need new inode-inode ref structure to allow search for that.
3646 if (!type_recovered) {
3647 if (rec->found_file_extent &&
3648 find_normal_file_extent(root, rec->ino)) {
3650 filetype = BTRFS_FT_REG_FILE;
3651 } else if (rec->found_dir_item) {
3653 filetype = BTRFS_FT_DIR;
3654 } else if (!list_empty(&rec->orphan_extents)) {
3656 filetype = BTRFS_FT_REG_FILE;
3658 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3661 filetype = BTRFS_FT_REG_FILE;
3665 ret = btrfs_new_inode(trans, root, rec->ino,
3666 mode | btrfs_type_to_imode(filetype));
3671 * Here inode rebuild is done, we only rebuild the inode item,
3672 * don't repair the nlink(like move to lost+found).
3673 * That is the job of nlink repair.
3675 * We just fill the record and return
3677 rec->found_dir_item = 1;
3678 rec->imode = mode | btrfs_type_to_imode(filetype);
3680 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681 /* Ensure the inode_nlinks repair function will be called */
3682 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688 struct btrfs_root *root,
3689 struct btrfs_path *path,
3690 struct inode_record *rec)
3692 struct orphan_data_extent *orphan;
3693 struct orphan_data_extent *tmp;
3696 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3698 * Check for conflicting file extents
3700 * Here we don't know whether the extents is compressed or not,
3701 * so we can only assume it not compressed nor data offset,
3702 * and use its disk_len as extent length.
3704 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705 orphan->offset, orphan->disk_len, 0);
3706 btrfs_release_path(path);
3711 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712 orphan->disk_bytenr, orphan->disk_len);
3713 ret = btrfs_free_extent(trans,
3714 root->fs_info->extent_root,
3715 orphan->disk_bytenr, orphan->disk_len,
3716 0, root->objectid, orphan->objectid,
3721 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722 orphan->offset, orphan->disk_bytenr,
3723 orphan->disk_len, orphan->disk_len);
3727 /* Update file size info */
3728 rec->found_size += orphan->disk_len;
3729 if (rec->found_size == rec->nbytes)
3730 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3732 /* Update the file extent hole info too */
3733 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3737 if (RB_EMPTY_ROOT(&rec->holes))
3738 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3740 list_del(&orphan->list);
3743 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749 struct btrfs_root *root,
3750 struct btrfs_path *path,
3751 struct inode_record *rec)
3753 struct rb_node *node;
3754 struct file_extent_hole *hole;
3758 node = rb_first(&rec->holes);
3762 hole = rb_entry(node, struct file_extent_hole, node);
3763 ret = btrfs_punch_hole(trans, root, rec->ino,
3764 hole->start, hole->len);
3767 ret = del_file_extent_hole(&rec->holes, hole->start,
3771 if (RB_EMPTY_ROOT(&rec->holes))
3772 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773 node = rb_first(&rec->holes);
3775 /* special case for a file losing all its file extent */
3777 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778 round_up(rec->isize,
3779 root->fs_info->sectorsize));
3783 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784 rec->ino, root->objectid);
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3791 struct btrfs_trans_handle *trans;
3792 struct btrfs_path path;
3795 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796 I_ERR_NO_ORPHAN_ITEM |
3797 I_ERR_LINK_COUNT_WRONG |
3798 I_ERR_NO_INODE_ITEM |
3799 I_ERR_FILE_EXTENT_ORPHAN |
3800 I_ERR_FILE_EXTENT_DISCOUNT|
3801 I_ERR_FILE_NBYTES_WRONG)))
3805 * For nlink repair, it may create a dir and add link, so
3806 * 2 for parent(256)'s dir_index and dir_item
3807 * 2 for lost+found dir's inode_item and inode_ref
3808 * 1 for the new inode_ref of the file
3809 * 2 for lost+found dir's dir_index and dir_item for the file
3811 trans = btrfs_start_transaction(root, 7);
3813 return PTR_ERR(trans);
3815 btrfs_init_path(&path);
3816 if (rec->errors & I_ERR_NO_INODE_ITEM)
3817 ret = repair_inode_no_item(trans, root, &path, rec);
3818 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823 ret = repair_inode_isize(trans, root, &path, rec);
3824 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827 ret = repair_inode_nlinks(trans, root, &path, rec);
3828 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829 ret = repair_inode_nbytes(trans, root, &path, rec);
3830 btrfs_commit_transaction(trans, root);
3831 btrfs_release_path(&path);
3835 static int check_inode_recs(struct btrfs_root *root,
3836 struct cache_tree *inode_cache)
3838 struct cache_extent *cache;
3839 struct ptr_node *node;
3840 struct inode_record *rec;
3841 struct inode_backref *backref;
3846 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3848 if (btrfs_root_refs(&root->root_item) == 0) {
3849 if (!cache_tree_empty(inode_cache))
3850 fprintf(stderr, "warning line %d\n", __LINE__);
3855 * We need to repair backrefs first because we could change some of the
3856 * errors in the inode recs.
3858 * We also need to go through and delete invalid backrefs first and then
3859 * add the correct ones second. We do this because we may get EEXIST
3860 * when adding back the correct index because we hadn't yet deleted the
3863 * For example, if we were missing a dir index then the directories
3864 * isize would be wrong, so if we fixed the isize to what we thought it
3865 * would be and then fixed the backref we'd still have a invalid fs, so
3866 * we need to add back the dir index and then check to see if the isize
3871 if (stage == 3 && !err)
3874 cache = search_cache_extent(inode_cache, 0);
3875 while (repair && cache) {
3876 node = container_of(cache, struct ptr_node, cache);
3878 cache = next_cache_extent(cache);
3880 /* Need to free everything up and rescan */
3882 remove_cache_extent(inode_cache, &node->cache);
3884 free_inode_rec(rec);
3888 if (list_empty(&rec->backrefs))
3891 ret = repair_inode_backrefs(root, rec, inode_cache,
3905 rec = get_inode_rec(inode_cache, root_dirid, 0);
3906 BUG_ON(IS_ERR(rec));
3908 ret = check_root_dir(rec);
3910 fprintf(stderr, "root %llu root dir %llu error\n",
3911 (unsigned long long)root->root_key.objectid,
3912 (unsigned long long)root_dirid);
3913 print_inode_error(root, rec);
3918 struct btrfs_trans_handle *trans;
3920 trans = btrfs_start_transaction(root, 1);
3921 if (IS_ERR(trans)) {
3922 err = PTR_ERR(trans);
3927 "root %llu missing its root dir, recreating\n",
3928 (unsigned long long)root->objectid);
3930 ret = btrfs_make_root_dir(trans, root, root_dirid);
3933 btrfs_commit_transaction(trans, root);
3937 fprintf(stderr, "root %llu root dir %llu not found\n",
3938 (unsigned long long)root->root_key.objectid,
3939 (unsigned long long)root_dirid);
3943 cache = search_cache_extent(inode_cache, 0);
3946 node = container_of(cache, struct ptr_node, cache);
3948 remove_cache_extent(inode_cache, &node->cache);
3950 if (rec->ino == root_dirid ||
3951 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952 free_inode_rec(rec);
3956 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957 ret = check_orphan_item(root, rec->ino);
3959 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960 if (can_free_inode_rec(rec)) {
3961 free_inode_rec(rec);
3966 if (!rec->found_inode_item)
3967 rec->errors |= I_ERR_NO_INODE_ITEM;
3968 if (rec->found_link != rec->nlink)
3969 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3971 ret = try_repair_inode(root, rec);
3972 if (ret == 0 && can_free_inode_rec(rec)) {
3973 free_inode_rec(rec);
3979 if (!(repair && ret == 0))
3981 print_inode_error(root, rec);
3982 list_for_each_entry(backref, &rec->backrefs, list) {
3983 if (!backref->found_dir_item)
3984 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985 if (!backref->found_dir_index)
3986 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987 if (!backref->found_inode_ref)
3988 backref->errors |= REF_ERR_NO_INODE_REF;
3989 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990 " namelen %u name %s filetype %d errors %x",
3991 (unsigned long long)backref->dir,
3992 (unsigned long long)backref->index,
3993 backref->namelen, backref->name,
3994 backref->filetype, backref->errors);
3995 print_ref_error(backref->errors);
3997 free_inode_rec(rec);
3999 return (error > 0) ? -1 : 0;
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4005 struct cache_extent *cache;
4006 struct root_record *rec = NULL;
4009 cache = lookup_cache_extent(root_cache, objectid, 1);
4011 rec = container_of(cache, struct root_record, cache);
4013 rec = calloc(1, sizeof(*rec));
4015 return ERR_PTR(-ENOMEM);
4016 rec->objectid = objectid;
4017 INIT_LIST_HEAD(&rec->backrefs);
4018 rec->cache.start = objectid;
4019 rec->cache.size = 1;
4021 ret = insert_cache_extent(root_cache, &rec->cache);
4023 return ERR_PTR(-EEXIST);
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029 u64 ref_root, u64 dir, u64 index,
4030 const char *name, int namelen)
4032 struct root_backref *backref;
4034 list_for_each_entry(backref, &rec->backrefs, list) {
4035 if (backref->ref_root != ref_root || backref->dir != dir ||
4036 backref->namelen != namelen)
4038 if (memcmp(name, backref->name, namelen))
4043 backref = calloc(1, sizeof(*backref) + namelen + 1);
4046 backref->ref_root = ref_root;
4048 backref->index = index;
4049 backref->namelen = namelen;
4050 memcpy(backref->name, name, namelen);
4051 backref->name[namelen] = '\0';
4052 list_add_tail(&backref->list, &rec->backrefs);
4056 static void free_root_record(struct cache_extent *cache)
4058 struct root_record *rec;
4059 struct root_backref *backref;
4061 rec = container_of(cache, struct root_record, cache);
4062 while (!list_empty(&rec->backrefs)) {
4063 backref = to_root_backref(rec->backrefs.next);
4064 list_del(&backref->list);
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4073 static int add_root_backref(struct cache_tree *root_cache,
4074 u64 root_id, u64 ref_root, u64 dir, u64 index,
4075 const char *name, int namelen,
4076 int item_type, int errors)
4078 struct root_record *rec;
4079 struct root_backref *backref;
4081 rec = get_root_rec(root_cache, root_id);
4082 BUG_ON(IS_ERR(rec));
4083 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4086 backref->errors |= errors;
4088 if (item_type != BTRFS_DIR_ITEM_KEY) {
4089 if (backref->found_dir_index || backref->found_back_ref ||
4090 backref->found_forward_ref) {
4091 if (backref->index != index)
4092 backref->errors |= REF_ERR_INDEX_UNMATCH;
4094 backref->index = index;
4098 if (item_type == BTRFS_DIR_ITEM_KEY) {
4099 if (backref->found_forward_ref)
4101 backref->found_dir_item = 1;
4102 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103 backref->found_dir_index = 1;
4104 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105 if (backref->found_forward_ref)
4106 backref->errors |= REF_ERR_DUP_ROOT_REF;
4107 else if (backref->found_dir_item)
4109 backref->found_forward_ref = 1;
4110 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111 if (backref->found_back_ref)
4112 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113 backref->found_back_ref = 1;
4118 if (backref->found_forward_ref && backref->found_dir_item)
4119 backref->reachable = 1;
4123 static int merge_root_recs(struct btrfs_root *root,
4124 struct cache_tree *src_cache,
4125 struct cache_tree *dst_cache)
4127 struct cache_extent *cache;
4128 struct ptr_node *node;
4129 struct inode_record *rec;
4130 struct inode_backref *backref;
4133 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134 free_inode_recs_tree(src_cache);
4139 cache = search_cache_extent(src_cache, 0);
4142 node = container_of(cache, struct ptr_node, cache);
4144 remove_cache_extent(src_cache, &node->cache);
4147 ret = is_child_root(root, root->objectid, rec->ino);
4153 list_for_each_entry(backref, &rec->backrefs, list) {
4154 BUG_ON(backref->found_inode_ref);
4155 if (backref->found_dir_item)
4156 add_root_backref(dst_cache, rec->ino,
4157 root->root_key.objectid, backref->dir,
4158 backref->index, backref->name,
4159 backref->namelen, BTRFS_DIR_ITEM_KEY,
4161 if (backref->found_dir_index)
4162 add_root_backref(dst_cache, rec->ino,
4163 root->root_key.objectid, backref->dir,
4164 backref->index, backref->name,
4165 backref->namelen, BTRFS_DIR_INDEX_KEY,
4169 free_inode_rec(rec);
4176 static int check_root_refs(struct btrfs_root *root,
4177 struct cache_tree *root_cache)
4179 struct root_record *rec;
4180 struct root_record *ref_root;
4181 struct root_backref *backref;
4182 struct cache_extent *cache;
4188 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189 BUG_ON(IS_ERR(rec));
4192 /* fixme: this can not detect circular references */
4195 cache = search_cache_extent(root_cache, 0);
4199 rec = container_of(cache, struct root_record, cache);
4200 cache = next_cache_extent(cache);
4202 if (rec->found_ref == 0)
4205 list_for_each_entry(backref, &rec->backrefs, list) {
4206 if (!backref->reachable)
4209 ref_root = get_root_rec(root_cache,
4211 BUG_ON(IS_ERR(ref_root));
4212 if (ref_root->found_ref > 0)
4215 backref->reachable = 0;
4217 if (rec->found_ref == 0)
4223 cache = search_cache_extent(root_cache, 0);
4227 rec = container_of(cache, struct root_record, cache);
4228 cache = next_cache_extent(cache);
4230 if (rec->found_ref == 0 &&
4231 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233 ret = check_orphan_item(root->fs_info->tree_root,
4239 * If we don't have a root item then we likely just have
4240 * a dir item in a snapshot for this root but no actual
4241 * ref key or anything so it's meaningless.
4243 if (!rec->found_root_item)
4246 fprintf(stderr, "fs tree %llu not referenced\n",
4247 (unsigned long long)rec->objectid);
4251 if (rec->found_ref > 0 && !rec->found_root_item)
4253 list_for_each_entry(backref, &rec->backrefs, list) {
4254 if (!backref->found_dir_item)
4255 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256 if (!backref->found_dir_index)
4257 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258 if (!backref->found_back_ref)
4259 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260 if (!backref->found_forward_ref)
4261 backref->errors |= REF_ERR_NO_ROOT_REF;
4262 if (backref->reachable && backref->errors)
4269 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270 (unsigned long long)rec->objectid, rec->found_ref,
4271 rec->found_root_item ? "" : "not found");
4273 list_for_each_entry(backref, &rec->backrefs, list) {
4274 if (!backref->reachable)
4276 if (!backref->errors && rec->found_root_item)
4278 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279 " index %llu namelen %u name %s errors %x\n",
4280 (unsigned long long)backref->ref_root,
4281 (unsigned long long)backref->dir,
4282 (unsigned long long)backref->index,
4283 backref->namelen, backref->name,
4285 print_ref_error(backref->errors);
4288 return errors > 0 ? 1 : 0;
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292 struct btrfs_key *key,
4293 struct cache_tree *root_cache)
4299 struct btrfs_root_ref *ref;
4300 char namebuf[BTRFS_NAME_LEN];
4303 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4305 dirid = btrfs_root_ref_dirid(eb, ref);
4306 index = btrfs_root_ref_sequence(eb, ref);
4307 name_len = btrfs_root_ref_name_len(eb, ref);
4309 if (name_len <= BTRFS_NAME_LEN) {
4313 len = BTRFS_NAME_LEN;
4314 error = REF_ERR_NAME_TOO_LONG;
4316 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4318 if (key->type == BTRFS_ROOT_REF_KEY) {
4319 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320 index, namebuf, len, key->type, error);
4322 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323 index, namebuf, len, key->type, error);
4328 static void free_corrupt_block(struct cache_extent *cache)
4330 struct btrfs_corrupt_block *corrupt;
4332 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4339 * Repair the btree of the given root.
4341 * The fix is to remove the node key in corrupt_blocks cache_tree.
4342 * and rebalance the tree.
4343 * After the fix, the btree should be writeable.
4345 static int repair_btree(struct btrfs_root *root,
4346 struct cache_tree *corrupt_blocks)
4348 struct btrfs_trans_handle *trans;
4349 struct btrfs_path path;
4350 struct btrfs_corrupt_block *corrupt;
4351 struct cache_extent *cache;
4352 struct btrfs_key key;
4357 if (cache_tree_empty(corrupt_blocks))
4360 trans = btrfs_start_transaction(root, 1);
4361 if (IS_ERR(trans)) {
4362 ret = PTR_ERR(trans);
4363 fprintf(stderr, "Error starting transaction: %s\n",
4367 btrfs_init_path(&path);
4368 cache = first_cache_extent(corrupt_blocks);
4370 corrupt = container_of(cache, struct btrfs_corrupt_block,
4372 level = corrupt->level;
4373 path.lowest_level = level;
4374 key.objectid = corrupt->key.objectid;
4375 key.type = corrupt->key.type;
4376 key.offset = corrupt->key.offset;
4379 * Here we don't want to do any tree balance, since it may
4380 * cause a balance with corrupted brother leaf/node,
4381 * so ins_len set to 0 here.
4382 * Balance will be done after all corrupt node/leaf is deleted.
4384 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4387 offset = btrfs_node_blockptr(path.nodes[level],
4390 /* Remove the ptr */
4391 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4395 * Remove the corresponding extent
4396 * return value is not concerned.
4398 btrfs_release_path(&path);
4399 ret = btrfs_free_extent(trans, root, offset,
4400 root->fs_info->nodesize, 0,
4401 root->root_key.objectid, level - 1, 0);
4402 cache = next_cache_extent(cache);
4405 /* Balance the btree using btrfs_search_slot() */
4406 cache = first_cache_extent(corrupt_blocks);
4408 corrupt = container_of(cache, struct btrfs_corrupt_block,
4410 memcpy(&key, &corrupt->key, sizeof(key));
4411 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4414 /* return will always >0 since it won't find the item */
4416 btrfs_release_path(&path);
4417 cache = next_cache_extent(cache);
4420 btrfs_commit_transaction(trans, root);
4421 btrfs_release_path(&path);
4425 static int check_fs_root(struct btrfs_root *root,
4426 struct cache_tree *root_cache,
4427 struct walk_control *wc)
4433 struct btrfs_path path;
4434 struct shared_node root_node;
4435 struct root_record *rec;
4436 struct btrfs_root_item *root_item = &root->root_item;
4437 struct cache_tree corrupt_blocks;
4438 struct orphan_data_extent *orphan;
4439 struct orphan_data_extent *tmp;
4440 enum btrfs_tree_block_status status;
4441 struct node_refs nrefs;
4444 * Reuse the corrupt_block cache tree to record corrupted tree block
4446 * Unlike the usage in extent tree check, here we do it in a per
4447 * fs/subvol tree base.
4449 cache_tree_init(&corrupt_blocks);
4450 root->fs_info->corrupt_blocks = &corrupt_blocks;
4452 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453 rec = get_root_rec(root_cache, root->root_key.objectid);
4454 BUG_ON(IS_ERR(rec));
4455 if (btrfs_root_refs(root_item) > 0)
4456 rec->found_root_item = 1;
4459 btrfs_init_path(&path);
4460 memset(&root_node, 0, sizeof(root_node));
4461 cache_tree_init(&root_node.root_cache);
4462 cache_tree_init(&root_node.inode_cache);
4463 memset(&nrefs, 0, sizeof(nrefs));
4465 /* Move the orphan extent record to corresponding inode_record */
4466 list_for_each_entry_safe(orphan, tmp,
4467 &root->orphan_data_extents, list) {
4468 struct inode_record *inode;
4470 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4472 BUG_ON(IS_ERR(inode));
4473 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474 list_move(&orphan->list, &inode->orphan_extents);
4477 level = btrfs_header_level(root->node);
4478 memset(wc->nodes, 0, sizeof(wc->nodes));
4479 wc->nodes[level] = &root_node;
4480 wc->active_node = level;
4481 wc->root_level = level;
4483 /* We may not have checked the root block, lets do that now */
4484 if (btrfs_is_leaf(root->node))
4485 status = btrfs_check_leaf(root, NULL, root->node);
4487 status = btrfs_check_node(root, NULL, root->node);
4488 if (status != BTRFS_TREE_BLOCK_CLEAN)
4491 if (btrfs_root_refs(root_item) > 0 ||
4492 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493 path.nodes[level] = root->node;
4494 extent_buffer_get(root->node);
4495 path.slots[level] = 0;
4497 struct btrfs_key key;
4498 struct btrfs_disk_key found_key;
4500 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501 level = root_item->drop_level;
4502 path.lowest_level = level;
4503 if (level > btrfs_header_level(root->node) ||
4504 level >= BTRFS_MAX_LEVEL) {
4505 error("ignoring invalid drop level: %u", level);
4508 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4511 btrfs_node_key(path.nodes[level], &found_key,
4513 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514 sizeof(found_key)));
4518 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4524 wret = walk_up_tree(root, &path, wc, &level);
4531 btrfs_release_path(&path);
4533 if (!cache_tree_empty(&corrupt_blocks)) {
4534 struct cache_extent *cache;
4535 struct btrfs_corrupt_block *corrupt;
4537 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538 root->root_key.objectid);
4539 cache = first_cache_extent(&corrupt_blocks);
4541 corrupt = container_of(cache,
4542 struct btrfs_corrupt_block,
4544 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545 cache->start, corrupt->level,
4546 corrupt->key.objectid, corrupt->key.type,
4547 corrupt->key.offset);
4548 cache = next_cache_extent(cache);
4551 printf("Try to repair the btree for root %llu\n",
4552 root->root_key.objectid);
4553 ret = repair_btree(root, &corrupt_blocks);
4555 fprintf(stderr, "Failed to repair btree: %s\n",
4558 printf("Btree for root %llu is fixed\n",
4559 root->root_key.objectid);
4563 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4567 if (root_node.current) {
4568 root_node.current->checked = 1;
4569 maybe_free_inode_rec(&root_node.inode_cache,
4573 err = check_inode_recs(root, &root_node.inode_cache);
4577 free_corrupt_blocks_tree(&corrupt_blocks);
4578 root->fs_info->corrupt_blocks = NULL;
4579 free_orphan_data_extents(&root->orphan_data_extents);
4583 static int fs_root_objectid(u64 objectid)
4585 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4588 return is_fstree(objectid);
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592 struct cache_tree *root_cache)
4594 struct btrfs_path path;
4595 struct btrfs_key key;
4596 struct walk_control wc;
4597 struct extent_buffer *leaf, *tree_node;
4598 struct btrfs_root *tmp_root;
4599 struct btrfs_root *tree_root = fs_info->tree_root;
4603 if (ctx.progress_enabled) {
4604 ctx.tp = TASK_FS_ROOTS;
4605 task_start(ctx.info);
4609 * Just in case we made any changes to the extent tree that weren't
4610 * reflected into the free space cache yet.
4613 reset_cached_block_groups(fs_info);
4614 memset(&wc, 0, sizeof(wc));
4615 cache_tree_init(&wc.shared);
4616 btrfs_init_path(&path);
4621 key.type = BTRFS_ROOT_ITEM_KEY;
4622 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4627 tree_node = tree_root->node;
4629 if (tree_node != tree_root->node) {
4630 free_root_recs_tree(root_cache);
4631 btrfs_release_path(&path);
4634 leaf = path.nodes[0];
4635 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636 ret = btrfs_next_leaf(tree_root, &path);
4642 leaf = path.nodes[0];
4644 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646 fs_root_objectid(key.objectid)) {
4647 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648 tmp_root = btrfs_read_fs_root_no_cache(
4651 key.offset = (u64)-1;
4652 tmp_root = btrfs_read_fs_root(
4655 if (IS_ERR(tmp_root)) {
4659 ret = check_fs_root(tmp_root, root_cache, &wc);
4660 if (ret == -EAGAIN) {
4661 free_root_recs_tree(root_cache);
4662 btrfs_release_path(&path);
4667 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668 btrfs_free_fs_root(tmp_root);
4669 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670 key.type == BTRFS_ROOT_BACKREF_KEY) {
4671 process_root_ref(leaf, path.slots[0], &key,
4678 btrfs_release_path(&path);
4680 free_extent_cache_tree(&wc.shared);
4681 if (!cache_tree_empty(&wc.shared))
4682 fprintf(stderr, "warning line %d\n", __LINE__);
4684 task_stop(ctx.info);
4690 * Find the @index according by @ino and name.
4691 * Notice:time efficiency is O(N)
4693 * @root: the root of the fs/file tree
4694 * @index_ret: the index as return value
4695 * @namebuf: the name to match
4696 * @name_len: the length of name to match
4697 * @file_type: the file_type of INODE_ITEM to match
4699 * Returns 0 if found and *@index_ret will be modified with right value
4700 * Returns< 0 not found and *@index_ret will be (u64)-1
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703 u64 *index_ret, char *namebuf, u32 name_len,
4706 struct btrfs_path path;
4707 struct extent_buffer *node;
4708 struct btrfs_dir_item *di;
4709 struct btrfs_key key;
4710 struct btrfs_key location;
4711 char name[BTRFS_NAME_LEN] = {0};
4723 /* search from the last index */
4724 key.objectid = dirid;
4725 key.offset = (u64)-1;
4726 key.type = BTRFS_DIR_INDEX_KEY;
4728 btrfs_init_path(&path);
4729 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4734 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4737 *index_ret = (64)-1;
4740 /* Check whether inode_id/filetype/name match */
4741 node = path.nodes[0];
4742 slot = path.slots[0];
4743 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744 total = btrfs_item_size_nr(node, slot);
4745 while (cur < total) {
4747 len = btrfs_dir_name_len(node, di);
4748 data_len = btrfs_dir_data_len(node, di);
4750 btrfs_dir_item_key_to_cpu(node, di, &location);
4751 if (location.objectid != location_id ||
4752 location.type != BTRFS_INODE_ITEM_KEY ||
4753 location.offset != 0)
4756 filetype = btrfs_dir_type(node, di);
4757 if (file_type != filetype)
4760 if (len > BTRFS_NAME_LEN)
4761 len = BTRFS_NAME_LEN;
4763 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764 if (len != name_len || strncmp(namebuf, name, len))
4767 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768 *index_ret = key.offset;
4772 len += sizeof(*di) + data_len;
4773 di = (struct btrfs_dir_item *)((char *)di + len);
4779 btrfs_release_path(&path);
4784 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785 * INODE_REF/INODE_EXTREF match.
4787 * @root: the root of the fs/file tree
4788 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789 * value while find index
4790 * @location_key: location key of the struct btrfs_dir_item to match
4791 * @name: the name to match
4792 * @namelen: the length of name
4793 * @file_type: the type of file to math
4795 * Return 0 if no error occurred.
4796 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797 * DIR_ITEM/DIR_INDEX
4798 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799 * and DIR_ITEM/DIR_INDEX mismatch
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802 struct btrfs_key *location_key, char *name,
4803 u32 namelen, u8 file_type)
4805 struct btrfs_path path;
4806 struct extent_buffer *node;
4807 struct btrfs_dir_item *di;
4808 struct btrfs_key location;
4809 char namebuf[BTRFS_NAME_LEN] = {0};
4818 /* get the index by traversing all index */
4819 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820 ret = find_dir_index(root, key->objectid,
4821 location_key->objectid, &key->offset,
4822 name, namelen, file_type);
4824 ret = DIR_INDEX_MISSING;
4828 btrfs_init_path(&path);
4829 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4831 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4836 /* Check whether inode_id/filetype/name match */
4837 node = path.nodes[0];
4838 slot = path.slots[0];
4839 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840 total = btrfs_item_size_nr(node, slot);
4841 while (cur < total) {
4842 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4845 len = btrfs_dir_name_len(node, di);
4846 data_len = btrfs_dir_data_len(node, di);
4848 btrfs_dir_item_key_to_cpu(node, di, &location);
4849 if (location.objectid != location_key->objectid ||
4850 location.type != location_key->type ||
4851 location.offset != location_key->offset)
4854 filetype = btrfs_dir_type(node, di);
4855 if (file_type != filetype)
4858 if (len > BTRFS_NAME_LEN) {
4859 len = BTRFS_NAME_LEN;
4860 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4862 key->type == BTRFS_DIR_ITEM_KEY ?
4863 "DIR_ITEM" : "DIR_INDEX",
4864 key->objectid, key->offset, len);
4866 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4868 if (len != namelen || strncmp(namebuf, name, len))
4874 len += sizeof(*di) + data_len;
4875 di = (struct btrfs_dir_item *)((char *)di + len);
4880 btrfs_release_path(&path);
4885 * Prints inode ref error message
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888 u64 index, const char *namebuf, int name_len,
4889 u8 filetype, int err)
4894 /* root dir error */
4895 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4897 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898 root->objectid, key->objectid, key->offset, namebuf);
4903 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905 root->objectid, key->offset,
4906 btrfs_name_hash(namebuf, name_len),
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4909 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911 root->objectid, key->offset, index,
4912 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4917 * Insert the missing inode item.
4919 * Returns 0 means success.
4920 * Returns <0 means error.
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4925 struct btrfs_key key;
4926 struct btrfs_trans_handle *trans;
4927 struct btrfs_path path;
4931 key.type = BTRFS_INODE_ITEM_KEY;
4934 btrfs_init_path(&path);
4935 trans = btrfs_start_transaction(root, 1);
4936 if (IS_ERR(trans)) {
4941 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942 if (ret < 0 || !ret)
4945 /* insert inode item */
4946 create_inode_item_lowmem(trans, root, ino, filetype);
4949 btrfs_commit_transaction(trans, root);
4952 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953 root->objectid, ino);
4954 btrfs_release_path(&path);
4959 * The ternary means dir item, dir index and relative inode ref.
4960 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4963 * If two of three is missing or mismatched, delete the existing one.
4964 * If one of three is missing or mismatched, add the missing one.
4966 * returns 0 means success.
4967 * returns not 0 means on error;
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970 u64 index, char *name, int name_len, u8 filetype,
4973 struct btrfs_trans_handle *trans;
4978 * stage shall be one of following valild values:
4979 * 0: Fine, nothing to do.
4980 * 1: One of three is wrong, so add missing one.
4981 * 2: Two of three is wrong, so delete existed one.
4983 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4985 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4987 if (err & (INODE_REF_MISSING))
4990 /* stage must be smllarer than 3 */
4993 trans = btrfs_start_transaction(root, 1);
4995 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
5000 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001 filetype, &index, 1, 1);
5005 btrfs_commit_transaction(trans, root);
5008 error("fail to repair inode %llu name %s filetype %u",
5009 ino, name, filetype);
5011 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012 stage == 2 ? "Delete" : "Add",
5013 ino, name, filetype);
5019 * Traverse the given INODE_REF and call find_dir_item() to find related
5020 * DIR_ITEM/DIR_INDEX.
5022 * @root: the root of the fs/file tree
5023 * @ref_key: the key of the INODE_REF
5024 * @path the path provides node and slot
5025 * @refs: the count of INODE_REF
5026 * @mode: the st_mode of INODE_ITEM
5027 * @name_ret: returns with the first ref's name
5028 * @name_len_ret: len of the name_ret
5030 * Return 0 if no error occurred.
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033 struct btrfs_path *path, char *name_ret,
5034 u32 *namelen_ret, u64 *refs_ret, int mode)
5036 struct btrfs_key key;
5037 struct btrfs_key location;
5038 struct btrfs_inode_ref *ref;
5039 struct extent_buffer *node;
5040 char namebuf[BTRFS_NAME_LEN] = {0};
5050 int need_research = 0;
5058 /* since after repair, path and the dir item may be changed */
5059 if (need_research) {
5061 btrfs_release_path(path);
5062 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063 /* the item was deleted, let path point to the last checked item */
5065 if (path->slots[0] == 0)
5066 btrfs_prev_leaf(root, path);
5074 location.objectid = ref_key->objectid;
5075 location.type = BTRFS_INODE_ITEM_KEY;
5076 location.offset = 0;
5077 node = path->nodes[0];
5078 slot = path->slots[0];
5080 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082 total = btrfs_item_size_nr(node, slot);
5085 /* Update inode ref count */
5088 index = btrfs_inode_ref_index(node, ref);
5089 name_len = btrfs_inode_ref_name_len(node, ref);
5091 if (name_len <= BTRFS_NAME_LEN) {
5094 len = BTRFS_NAME_LEN;
5095 warning("root %llu INODE_REF[%llu %llu] name too long",
5096 root->objectid, ref_key->objectid, ref_key->offset);
5099 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5101 /* copy the first name found to name_ret */
5102 if (refs == 1 && name_ret) {
5103 memcpy(name_ret, namebuf, len);
5107 /* Check root dir ref */
5108 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109 if (index != 0 || len != strlen("..") ||
5110 strncmp("..", namebuf, len) ||
5111 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112 /* set err bits then repair will delete the ref */
5113 err |= DIR_INDEX_MISSING;
5114 err |= DIR_ITEM_MISSING;
5119 /* Find related DIR_INDEX */
5120 key.objectid = ref_key->offset;
5121 key.type = BTRFS_DIR_INDEX_KEY;
5123 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124 imode_to_type(mode));
5126 /* Find related dir_item */
5127 key.objectid = ref_key->offset;
5128 key.type = BTRFS_DIR_ITEM_KEY;
5129 key.offset = btrfs_name_hash(namebuf, len);
5130 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131 imode_to_type(mode));
5133 if (tmp_err && repair) {
5134 ret = repair_ternary_lowmem(root, ref_key->offset,
5135 ref_key->objectid, index, namebuf,
5136 name_len, imode_to_type(mode),
5143 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144 imode_to_type(mode), tmp_err);
5146 len = sizeof(*ref) + name_len;
5147 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5158 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159 * DIR_ITEM/DIR_INDEX.
5161 * @root: the root of the fs/file tree
5162 * @ref_key: the key of the INODE_EXTREF
5163 * @refs: the count of INODE_EXTREF
5164 * @mode: the st_mode of INODE_ITEM
5166 * Return 0 if no error occurred.
5168 static int check_inode_extref(struct btrfs_root *root,
5169 struct btrfs_key *ref_key,
5170 struct extent_buffer *node, int slot, u64 *refs,
5173 struct btrfs_key key;
5174 struct btrfs_key location;
5175 struct btrfs_inode_extref *extref;
5176 char namebuf[BTRFS_NAME_LEN] = {0};
5186 location.objectid = ref_key->objectid;
5187 location.type = BTRFS_INODE_ITEM_KEY;
5188 location.offset = 0;
5190 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191 total = btrfs_item_size_nr(node, slot);
5194 /* update inode ref count */
5196 name_len = btrfs_inode_extref_name_len(node, extref);
5197 index = btrfs_inode_extref_index(node, extref);
5198 parent = btrfs_inode_extref_parent(node, extref);
5199 if (name_len <= BTRFS_NAME_LEN) {
5202 len = BTRFS_NAME_LEN;
5203 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204 root->objectid, ref_key->objectid, ref_key->offset);
5206 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5208 /* Check root dir ref name */
5209 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211 root->objectid, ref_key->objectid, ref_key->offset,
5213 err |= ROOT_DIR_ERROR;
5216 /* find related dir_index */
5217 key.objectid = parent;
5218 key.type = BTRFS_DIR_INDEX_KEY;
5220 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223 /* find related dir_item */
5224 key.objectid = parent;
5225 key.type = BTRFS_DIR_ITEM_KEY;
5226 key.offset = btrfs_name_hash(namebuf, len);
5227 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5230 len = sizeof(*extref) + name_len;
5231 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5241 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242 * DIR_ITEM/DIR_INDEX match.
5243 * Return with @index_ret.
5245 * @root: the root of the fs/file tree
5246 * @key: the key of the INODE_REF/INODE_EXTREF
5247 * @name: the name in the INODE_REF/INODE_EXTREF
5248 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5249 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5250 * value (64)-1 means do not check index
5251 * @ext_ref: the EXTENDED_IREF feature
5253 * Return 0 if no error occurred.
5254 * Return >0 for error bitmap
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257 char *name, int namelen, u64 *index_ret,
5258 unsigned int ext_ref)
5260 struct btrfs_path path;
5261 struct btrfs_inode_ref *ref;
5262 struct btrfs_inode_extref *extref;
5263 struct extent_buffer *node;
5264 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5277 btrfs_init_path(&path);
5278 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5280 ret = INODE_REF_MISSING;
5284 node = path.nodes[0];
5285 slot = path.slots[0];
5287 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288 total = btrfs_item_size_nr(node, slot);
5290 /* Iterate all entry of INODE_REF */
5291 while (cur < total) {
5292 ret = INODE_REF_MISSING;
5294 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295 ref_index = btrfs_inode_ref_index(node, ref);
5296 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5299 if (cur + sizeof(*ref) + ref_namelen > total ||
5300 ref_namelen > BTRFS_NAME_LEN) {
5301 warning("root %llu INODE %s[%llu %llu] name too long",
5303 key->type == BTRFS_INODE_REF_KEY ?
5305 key->objectid, key->offset);
5307 if (cur + sizeof(*ref) > total)
5309 len = min_t(u32, total - cur - sizeof(*ref),
5315 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5318 if (len != namelen || strncmp(ref_namebuf, name, len))
5321 *index_ret = ref_index;
5325 len = sizeof(*ref) + ref_namelen;
5326 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5331 /* Skip if not support EXTENDED_IREF feature */
5335 btrfs_release_path(&path);
5336 btrfs_init_path(&path);
5338 dir_id = key->offset;
5339 key->type = BTRFS_INODE_EXTREF_KEY;
5340 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5342 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5344 ret = INODE_REF_MISSING;
5348 node = path.nodes[0];
5349 slot = path.slots[0];
5351 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5353 total = btrfs_item_size_nr(node, slot);
5355 /* Iterate all entry of INODE_EXTREF */
5356 while (cur < total) {
5357 ret = INODE_REF_MISSING;
5359 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360 ref_index = btrfs_inode_extref_index(node, extref);
5361 parent = btrfs_inode_extref_parent(node, extref);
5362 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5365 if (parent != dir_id)
5368 if (ref_namelen <= BTRFS_NAME_LEN) {
5371 len = BTRFS_NAME_LEN;
5372 warning("root %llu INODE %s[%llu %llu] name too long",
5374 key->type == BTRFS_INODE_REF_KEY ?
5376 key->objectid, key->offset);
5378 read_extent_buffer(node, ref_namebuf,
5379 (unsigned long)(extref + 1), len);
5381 if (len != namelen || strncmp(ref_namebuf, name, len))
5384 *index_ret = ref_index;
5389 len = sizeof(*extref) + ref_namelen;
5390 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5395 btrfs_release_path(&path);
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400 u64 ino, u64 index, const char *namebuf,
5401 int name_len, u8 filetype, int err)
5403 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405 root->objectid, key->objectid, key->offset, namebuf,
5407 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5410 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412 root->objectid, key->objectid, index, namebuf, filetype,
5413 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5416 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5418 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419 root->objectid, ino, index, namebuf, filetype,
5420 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5423 if (err & INODE_REF_MISSING)
5425 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426 root->objectid, ino, key->objectid, namebuf, filetype);
5431 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5433 * Returns error after repair
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436 u64 index, u8 filetype, char *namebuf, u32 name_len,
5441 if (err & INODE_ITEM_MISSING) {
5442 ret = repair_inode_item_missing(root, ino, filetype);
5444 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5447 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449 name_len, filetype, err);
5451 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453 err &= ~(INODE_REF_MISSING);
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5462 struct btrfs_key key;
5463 struct btrfs_path path;
5465 struct btrfs_dir_item *di;
5475 key.offset = (u64)-1;
5477 btrfs_init_path(&path);
5478 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5483 /* if found, go to spacial case */
5488 ret = btrfs_previous_item(root, &path, ino, type);
5496 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5498 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5500 while (cur < total) {
5501 len = btrfs_dir_name_len(path.nodes[0], di);
5502 if (len > BTRFS_NAME_LEN)
5503 len = BTRFS_NAME_LEN;
5506 len += btrfs_dir_data_len(path.nodes[0], di);
5508 di = (struct btrfs_dir_item *)((char *)di + len);
5514 btrfs_release_path(&path);
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5525 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5529 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5533 *size = item_size + index_size;
5537 error("failed to count root %llu INODE[%llu] root size",
5538 root->objectid, ino);
5543 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5546 * @root: the root of the fs/file tree
5547 * @key: the key of the INODE_REF/INODE_EXTREF
5549 * @size: the st_size of the INODE_ITEM
5550 * @ext_ref: the EXTENDED_IREF feature
5552 * Return 0 if no error occurred.
5553 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556 struct btrfs_path *path, u64 *size,
5557 unsigned int ext_ref)
5559 struct btrfs_dir_item *di;
5560 struct btrfs_inode_item *ii;
5561 struct btrfs_key key;
5562 struct btrfs_key location;
5563 struct extent_buffer *node;
5565 char namebuf[BTRFS_NAME_LEN] = {0};
5577 int need_research = 0;
5580 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581 * ignore index check.
5583 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584 index = di_key->offset;
5591 /* since after repair, path and the dir item may be changed */
5592 if (need_research) {
5594 err |= DIR_COUNT_AGAIN;
5595 btrfs_release_path(path);
5596 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597 /* the item was deleted, let path point the last checked item */
5599 if (path->slots[0] == 0)
5600 btrfs_prev_leaf(root, path);
5608 node = path->nodes[0];
5609 slot = path->slots[0];
5611 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612 total = btrfs_item_size_nr(node, slot);
5613 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5615 while (cur < total) {
5616 data_len = btrfs_dir_data_len(node, di);
5619 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5621 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622 di_key->objectid, di_key->offset, data_len);
5624 name_len = btrfs_dir_name_len(node, di);
5625 if (name_len <= BTRFS_NAME_LEN) {
5628 len = BTRFS_NAME_LEN;
5629 warning("root %llu %s[%llu %llu] name too long",
5631 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632 di_key->objectid, di_key->offset);
5634 (*size) += name_len;
5635 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5637 filetype = btrfs_dir_type(node, di);
5639 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640 di_key->offset != btrfs_name_hash(namebuf, len)) {
5642 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643 root->objectid, di_key->objectid, di_key->offset,
5644 namebuf, len, filetype, di_key->offset,
5645 btrfs_name_hash(namebuf, len));
5648 btrfs_dir_item_key_to_cpu(node, di, &location);
5649 /* Ignore related ROOT_ITEM check */
5650 if (location.type == BTRFS_ROOT_ITEM_KEY)
5653 btrfs_release_path(path);
5654 /* Check relative INODE_ITEM(existence/filetype) */
5655 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5657 tmp_err |= INODE_ITEM_MISSING;
5661 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662 struct btrfs_inode_item);
5663 mode = btrfs_inode_mode(path->nodes[0], ii);
5664 if (imode_to_type(mode) != filetype) {
5665 tmp_err |= INODE_ITEM_MISMATCH;
5669 /* Check relative INODE_REF/INODE_EXTREF */
5670 key.objectid = location.objectid;
5671 key.type = BTRFS_INODE_REF_KEY;
5672 key.offset = di_key->objectid;
5673 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5676 /* check relative INDEX/ITEM */
5677 key.objectid = di_key->objectid;
5678 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679 key.type = BTRFS_DIR_INDEX_KEY;
5682 key.type = BTRFS_DIR_ITEM_KEY;
5683 key.offset = btrfs_name_hash(namebuf, name_len);
5686 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687 name_len, filetype);
5688 /* find_dir_item may find index */
5689 if (key.type == BTRFS_DIR_INDEX_KEY)
5693 if (tmp_err && repair) {
5694 ret = repair_dir_item(root, di_key->objectid,
5695 location.objectid, index,
5696 imode_to_type(mode), namebuf,
5698 if (ret != tmp_err) {
5703 btrfs_release_path(path);
5704 print_dir_item_err(root, di_key, location.objectid, index,
5705 namebuf, name_len, filetype, tmp_err);
5707 len = sizeof(*di) + name_len + data_len;
5708 di = (struct btrfs_dir_item *)((char *)di + len);
5711 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713 root->objectid, di_key->objectid,
5720 btrfs_release_path(path);
5721 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5723 err |= ret > 0 ? -ENOENT : ret;
5728 * Wrapper function of btrfs_punch_hole.
5730 * Returns 0 means success.
5731 * Returns not 0 means error.
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5736 struct btrfs_trans_handle *trans;
5739 trans = btrfs_start_transaction(root, 1);
5741 return PTR_ERR(trans);
5743 ret = btrfs_punch_hole(trans, root, ino, start, len);
5745 error("failed to add hole [%llu, %llu] in inode [%llu]",
5748 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5751 btrfs_commit_transaction(trans, root);
5756 * Check file extent datasum/hole, update the size of the file extents,
5757 * check and update the last offset of the file extent.
5759 * @root: the root of fs/file tree.
5760 * @fkey: the key of the file extent.
5761 * @nodatasum: INODE_NODATASUM feature.
5762 * @size: the sum of all EXTENT_DATA items size for this inode.
5763 * @end: the offset of the last extent.
5765 * Return 0 if no error occurred.
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768 struct extent_buffer *node, int slot,
5769 unsigned int nodatasum, u64 *size, u64 *end)
5771 struct btrfs_file_extent_item *fi;
5774 u64 extent_num_bytes;
5776 u64 csum_found; /* In byte size, sectorsize aligned */
5777 u64 search_start; /* Logical range start we search for csum */
5778 u64 search_len; /* Logical range len we search for csum */
5779 unsigned int extent_type;
5780 unsigned int is_hole;
5785 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5787 /* Check inline extent */
5788 extent_type = btrfs_file_extent_type(node, fi);
5789 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790 struct btrfs_item *e = btrfs_item_nr(slot);
5791 u32 item_inline_len;
5793 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795 compressed = btrfs_file_extent_compression(node, fi);
5796 if (extent_num_bytes == 0) {
5798 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799 root->objectid, fkey->objectid, fkey->offset);
5800 err |= FILE_EXTENT_ERROR;
5802 if (!compressed && extent_num_bytes != item_inline_len) {
5804 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805 root->objectid, fkey->objectid, fkey->offset,
5806 extent_num_bytes, item_inline_len);
5807 err |= FILE_EXTENT_ERROR;
5809 *end += extent_num_bytes;
5810 *size += extent_num_bytes;
5814 /* Check extent type */
5815 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817 err |= FILE_EXTENT_ERROR;
5818 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819 root->objectid, fkey->objectid, fkey->offset);
5823 /* Check REG_EXTENT/PREALLOC_EXTENT */
5824 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827 extent_offset = btrfs_file_extent_offset(node, fi);
5828 compressed = btrfs_file_extent_compression(node, fi);
5829 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5832 * Check EXTENT_DATA csum
5834 * For plain (uncompressed) extent, we should only check the range
5835 * we're referring to, as it's possible that part of prealloc extent
5836 * has been written, and has csum:
5838 * |<--- Original large preallocated extent A ---->|
5839 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5842 * For compressed extent, we should check the whole range.
5845 search_start = disk_bytenr + extent_offset;
5846 search_len = extent_num_bytes;
5848 search_start = disk_bytenr;
5849 search_len = disk_num_bytes;
5851 ret = count_csum_range(root, search_start, search_len, &csum_found);
5852 if (csum_found > 0 && nodatasum) {
5853 err |= ODD_CSUM_ITEM;
5854 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855 root->objectid, fkey->objectid, fkey->offset);
5856 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857 !is_hole && (ret < 0 || csum_found < search_len)) {
5858 err |= CSUM_ITEM_MISSING;
5859 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860 root->objectid, fkey->objectid, fkey->offset,
5861 csum_found, search_len);
5862 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863 err |= ODD_CSUM_ITEM;
5864 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865 root->objectid, fkey->objectid, fkey->offset, csum_found);
5868 /* Check EXTENT_DATA hole */
5869 if (!no_holes && *end != fkey->offset) {
5871 ret = punch_extent_hole(root, fkey->objectid,
5872 *end, fkey->offset - *end);
5873 if (!repair || ret) {
5874 err |= FILE_EXTENT_ERROR;
5876 "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
5877 root->objectid, fkey->objectid, fkey->offset, *end);
5881 *end += extent_num_bytes;
5883 *size += extent_num_bytes;
5889 * Set inode item nbytes to @nbytes
5891 * Returns 0 on success
5892 * Returns != 0 on error
5894 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5895 struct btrfs_path *path,
5896 u64 ino, u64 nbytes)
5898 struct btrfs_trans_handle *trans;
5899 struct btrfs_inode_item *ii;
5900 struct btrfs_key key;
5901 struct btrfs_key research_key;
5905 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5908 key.type = BTRFS_INODE_ITEM_KEY;
5911 trans = btrfs_start_transaction(root, 1);
5912 if (IS_ERR(trans)) {
5913 ret = PTR_ERR(trans);
5918 btrfs_release_path(path);
5919 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5927 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5928 struct btrfs_inode_item);
5929 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5930 btrfs_mark_buffer_dirty(path->nodes[0]);
5932 btrfs_commit_transaction(trans, root);
5935 error("failed to set nbytes in inode %llu root %llu",
5936 ino, root->root_key.objectid);
5938 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5939 root->root_key.objectid, nbytes);
5942 btrfs_release_path(path);
5943 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5950 * Set directory inode isize to @isize.
5952 * Returns 0 on success.
5953 * Returns != 0 on error.
5955 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5956 struct btrfs_path *path,
5959 struct btrfs_trans_handle *trans;
5960 struct btrfs_inode_item *ii;
5961 struct btrfs_key key;
5962 struct btrfs_key research_key;
5966 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5969 key.type = BTRFS_INODE_ITEM_KEY;
5972 trans = btrfs_start_transaction(root, 1);
5973 if (IS_ERR(trans)) {
5974 ret = PTR_ERR(trans);
5979 btrfs_release_path(path);
5980 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5988 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5989 struct btrfs_inode_item);
5990 btrfs_set_inode_size(path->nodes[0], ii, isize);
5991 btrfs_mark_buffer_dirty(path->nodes[0]);
5993 btrfs_commit_transaction(trans, root);
5996 error("failed to set isize in inode %llu root %llu",
5997 ino, root->root_key.objectid);
5999 printf("Set isize in inode %llu root %llu to %llu\n",
6000 ino, root->root_key.objectid, isize);
6002 btrfs_release_path(path);
6003 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6010 * Wrapper function for btrfs_add_orphan_item().
6012 * Returns 0 on success.
6013 * Returns != 0 on error.
6015 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6016 struct btrfs_path *path, u64 ino)
6018 struct btrfs_trans_handle *trans;
6019 struct btrfs_key research_key;
6023 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6025 trans = btrfs_start_transaction(root, 1);
6026 if (IS_ERR(trans)) {
6027 ret = PTR_ERR(trans);
6032 btrfs_release_path(path);
6033 ret = btrfs_add_orphan_item(trans, root, path, ino);
6035 btrfs_commit_transaction(trans, root);
6038 error("failed to add inode %llu as orphan item root %llu",
6039 ino, root->root_key.objectid);
6041 printf("Added inode %llu as orphan item root %llu\n",
6042 ino, root->root_key.objectid);
6044 btrfs_release_path(path);
6045 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6051 /* Set inode_item nlink to @ref_count.
6052 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6054 * Returns 0 on success
6056 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6057 struct btrfs_path *path, u64 ino,
6058 const char *name, u32 namelen,
6059 u64 ref_count, u8 filetype, u64 *nlink)
6061 struct btrfs_trans_handle *trans;
6062 struct btrfs_inode_item *ii;
6063 struct btrfs_key key;
6064 struct btrfs_key old_key;
6065 char namebuf[BTRFS_NAME_LEN] = {0};
6071 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6073 if (name && namelen) {
6074 ASSERT(namelen <= BTRFS_NAME_LEN);
6075 memcpy(namebuf, name, namelen);
6078 sprintf(namebuf, "%llu", ino);
6079 name_len = count_digits(ino);
6080 printf("Can't find file name for inode %llu, use %s instead\n",
6084 trans = btrfs_start_transaction(root, 1);
6085 if (IS_ERR(trans)) {
6086 ret = PTR_ERR(trans);
6090 btrfs_release_path(path);
6091 /* if refs is 0, put it into lostfound */
6092 if (ref_count == 0) {
6093 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6094 name_len, filetype, &ref_count);
6099 /* reset inode_item's nlink to ref_count */
6101 key.type = BTRFS_INODE_ITEM_KEY;
6104 btrfs_release_path(path);
6105 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6111 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6112 struct btrfs_inode_item);
6113 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6114 btrfs_mark_buffer_dirty(path->nodes[0]);
6119 btrfs_commit_transaction(trans, root);
6123 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6124 root->objectid, ino, namebuf, filetype);
6126 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6127 root->objectid, ino, namebuf, filetype);
6130 btrfs_release_path(path);
6131 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6138 * Check INODE_ITEM and related ITEMs (the same inode number)
6139 * 1. check link count
6140 * 2. check inode ref/extref
6141 * 3. check dir item/index
6143 * @ext_ref: the EXTENDED_IREF feature
6145 * Return 0 if no error occurred.
6146 * Return >0 for error or hit the traversal is done(by error bitmap)
6148 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6149 unsigned int ext_ref)
6151 struct extent_buffer *node;
6152 struct btrfs_inode_item *ii;
6153 struct btrfs_key key;
6154 struct btrfs_key last_key;
6163 u64 extent_size = 0;
6165 unsigned int nodatasum;
6169 char namebuf[BTRFS_NAME_LEN] = {0};
6172 node = path->nodes[0];
6173 slot = path->slots[0];
6175 btrfs_item_key_to_cpu(node, &key, slot);
6176 inode_id = key.objectid;
6178 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6179 ret = btrfs_next_item(root, path);
6185 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6186 isize = btrfs_inode_size(node, ii);
6187 nbytes = btrfs_inode_nbytes(node, ii);
6188 mode = btrfs_inode_mode(node, ii);
6189 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6190 nlink = btrfs_inode_nlink(node, ii);
6191 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6194 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6195 ret = btrfs_next_item(root, path);
6197 /* out will fill 'err' rusing current statistics */
6199 } else if (ret > 0) {
6204 node = path->nodes[0];
6205 slot = path->slots[0];
6206 btrfs_item_key_to_cpu(node, &key, slot);
6207 if (key.objectid != inode_id)
6211 case BTRFS_INODE_REF_KEY:
6212 ret = check_inode_ref(root, &key, path, namebuf,
6213 &name_len, &refs, mode);
6216 case BTRFS_INODE_EXTREF_KEY:
6217 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6218 warning("root %llu EXTREF[%llu %llu] isn't supported",
6219 root->objectid, key.objectid,
6221 ret = check_inode_extref(root, &key, node, slot, &refs,
6225 case BTRFS_DIR_ITEM_KEY:
6226 case BTRFS_DIR_INDEX_KEY:
6228 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6229 root->objectid, inode_id,
6230 imode_to_type(mode), key.objectid,
6233 ret = check_dir_item(root, &key, path, &size, ext_ref);
6236 case BTRFS_EXTENT_DATA_KEY:
6238 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6239 root->objectid, inode_id, key.objectid,
6242 ret = check_file_extent(root, &key, node, slot,
6243 nodatasum, &extent_size,
6247 case BTRFS_XATTR_ITEM_KEY:
6250 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6251 key.objectid, key.type, key.offset);
6256 if (err & LAST_ITEM) {
6257 btrfs_release_path(path);
6258 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6263 /* verify INODE_ITEM nlink/isize/nbytes */
6265 if (repair && (err & DIR_COUNT_AGAIN)) {
6266 err &= ~DIR_COUNT_AGAIN;
6267 count_dir_isize(root, inode_id, &size);
6270 if ((nlink != 1 || refs != 1) && repair) {
6271 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6272 namebuf, name_len, refs, imode_to_type(mode),
6277 err |= LINK_COUNT_ERROR;
6278 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6279 root->objectid, inode_id, nlink);
6283 * Just a warning, as dir inode nbytes is just an
6284 * instructive value.
6286 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6287 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6288 root->objectid, inode_id,
6289 root->fs_info->nodesize);
6292 if (isize != size) {
6294 ret = repair_dir_isize_lowmem(root, path,
6296 if (!repair || ret) {
6299 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6300 root->objectid, inode_id, isize, size);
6304 if (nlink != refs) {
6306 ret = repair_inode_nlinks_lowmem(root, path,
6307 inode_id, namebuf, name_len, refs,
6308 imode_to_type(mode), &nlink);
6309 if (!repair || ret) {
6310 err |= LINK_COUNT_ERROR;
6312 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6313 root->objectid, inode_id, nlink, refs);
6315 } else if (!nlink) {
6317 ret = repair_inode_orphan_item_lowmem(root,
6319 if (!repair || ret) {
6321 error("root %llu INODE[%llu] is orphan item",
6322 root->objectid, inode_id);
6326 if (!nbytes && !no_holes && extent_end < isize) {
6328 ret = punch_extent_hole(root, inode_id,
6329 extent_end, isize - extent_end);
6330 if (!repair || ret) {
6331 err |= NBYTES_ERROR;
6333 "root %llu INODE[%llu] size %llu should have a file extent hole",
6334 root->objectid, inode_id, isize);
6338 if (nbytes != extent_size) {
6340 ret = repair_inode_nbytes_lowmem(root, path,
6341 inode_id, extent_size);
6342 if (!repair || ret) {
6343 err |= NBYTES_ERROR;
6345 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6346 root->objectid, inode_id, nbytes,
6352 if (err & LAST_ITEM)
6353 btrfs_next_item(root, path);
6358 * Insert the missing inode item and inode ref.
6360 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6361 * Root dir should be handled specially because root dir is the root of fs.
6363 * returns err (>0 or 0) after repair
6365 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6367 struct btrfs_trans_handle *trans;
6368 struct btrfs_key key;
6369 struct btrfs_path path;
6370 int filetype = BTRFS_FT_DIR;
6373 btrfs_init_path(&path);
6375 if (err & INODE_REF_MISSING) {
6376 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6377 key.type = BTRFS_INODE_REF_KEY;
6378 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6380 trans = btrfs_start_transaction(root, 1);
6381 if (IS_ERR(trans)) {
6382 ret = PTR_ERR(trans);
6386 btrfs_release_path(&path);
6387 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6391 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6392 BTRFS_FIRST_FREE_OBJECTID,
6393 BTRFS_FIRST_FREE_OBJECTID, 0);
6397 printf("Add INODE_REF[%llu %llu] name %s\n",
6398 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6400 err &= ~INODE_REF_MISSING;
6403 error("fail to insert first inode's ref");
6404 btrfs_commit_transaction(trans, root);
6407 if (err & INODE_ITEM_MISSING) {
6408 ret = repair_inode_item_missing(root,
6409 BTRFS_FIRST_FREE_OBJECTID, filetype);
6412 err &= ~INODE_ITEM_MISSING;
6416 error("fail to repair first inode");
6417 btrfs_release_path(&path);
6422 * check first root dir's inode_item and inode_ref
6424 * returns 0 means no error
6425 * returns >0 means error
6426 * returns <0 means fatal error
6428 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6430 struct btrfs_path path;
6431 struct btrfs_key key;
6432 struct btrfs_inode_item *ii;
6438 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6439 key.type = BTRFS_INODE_ITEM_KEY;
6442 /* For root being dropped, we don't need to check first inode */
6443 if (btrfs_root_refs(&root->root_item) == 0 &&
6444 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6445 BTRFS_FIRST_FREE_OBJECTID)
6448 btrfs_init_path(&path);
6449 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6454 err |= INODE_ITEM_MISSING;
6456 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6457 struct btrfs_inode_item);
6458 mode = btrfs_inode_mode(path.nodes[0], ii);
6459 if (imode_to_type(mode) != BTRFS_FT_DIR)
6460 err |= INODE_ITEM_MISMATCH;
6463 /* lookup first inode ref */
6464 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6465 key.type = BTRFS_INODE_REF_KEY;
6466 /* special index value */
6469 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6475 btrfs_release_path(&path);
6478 err = repair_fs_first_inode(root, err);
6480 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6481 error("root dir INODE_ITEM is %s",
6482 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6483 if (err & INODE_REF_MISSING)
6484 error("root dir INODE_REF is missing");
6486 return ret < 0 ? ret : err;
6489 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6490 u64 parent, u64 root)
6492 struct rb_node *node;
6493 struct tree_backref *back = NULL;
6494 struct tree_backref match = {
6501 match.parent = parent;
6502 match.node.full_backref = 1;
6507 node = rb_search(&rec->backref_tree, &match.node.node,
6508 (rb_compare_keys)compare_extent_backref, NULL);
6510 back = to_tree_backref(rb_node_to_extent_backref(node));
6515 static struct data_backref *find_data_backref(struct extent_record *rec,
6516 u64 parent, u64 root,
6517 u64 owner, u64 offset,
6519 u64 disk_bytenr, u64 bytes)
6521 struct rb_node *node;
6522 struct data_backref *back = NULL;
6523 struct data_backref match = {
6530 .found_ref = found_ref,
6531 .disk_bytenr = disk_bytenr,
6535 match.parent = parent;
6536 match.node.full_backref = 1;
6541 node = rb_search(&rec->backref_tree, &match.node.node,
6542 (rb_compare_keys)compare_extent_backref, NULL);
6544 back = to_data_backref(rb_node_to_extent_backref(node));
6549 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6550 * blocks and integrity of fs tree items.
6552 * @root: the root of the tree to be checked.
6553 * @ext_ref feature EXTENDED_IREF is enable or not.
6554 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6555 * otherwise means check fs tree(s) items relationship and
6556 * @root MUST be a fs tree root.
6557 * Returns 0 represents OK.
6558 * Returns not 0 represents error.
6560 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6561 struct btrfs_root *root, unsigned int ext_ref,
6565 struct btrfs_path path;
6566 struct node_refs nrefs;
6567 struct btrfs_root_item *root_item = &root->root_item;
6572 memset(&nrefs, 0, sizeof(nrefs));
6575 * We need to manually check the first inode item (256)
6576 * As the following traversal function will only start from
6577 * the first inode item in the leaf, if inode item (256) is
6578 * missing we will skip it forever.
6580 ret = check_fs_first_inode(root, ext_ref);
6586 level = btrfs_header_level(root->node);
6587 btrfs_init_path(&path);
6589 if (btrfs_root_refs(root_item) > 0 ||
6590 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6591 path.nodes[level] = root->node;
6592 path.slots[level] = 0;
6593 extent_buffer_get(root->node);
6595 struct btrfs_key key;
6597 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6598 level = root_item->drop_level;
6599 path.lowest_level = level;
6600 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6607 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6608 ext_ref, check_all);
6612 /* if ret is negative, walk shall stop */
6618 ret = walk_up_tree_v2(root, &path, &level);
6620 /* Normal exit, reset ret to err */
6627 btrfs_release_path(&path);
6632 * Iterate all items in the tree and call check_inode_item() to check.
6634 * @root: the root of the tree to be checked.
6635 * @ext_ref: the EXTENDED_IREF feature
6637 * Return 0 if no error found.
6638 * Return <0 for error.
6640 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6642 reset_cached_block_groups(root->fs_info);
6643 return check_btrfs_root(NULL, root, ext_ref, 0);
6647 * Find the relative ref for root_ref and root_backref.
6649 * @root: the root of the root tree.
6650 * @ref_key: the key of the root ref.
6652 * Return 0 if no error occurred.
6654 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6655 struct extent_buffer *node, int slot)
6657 struct btrfs_path path;
6658 struct btrfs_key key;
6659 struct btrfs_root_ref *ref;
6660 struct btrfs_root_ref *backref;
6661 char ref_name[BTRFS_NAME_LEN] = {0};
6662 char backref_name[BTRFS_NAME_LEN] = {0};
6668 u32 backref_namelen;
6673 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6674 ref_dirid = btrfs_root_ref_dirid(node, ref);
6675 ref_seq = btrfs_root_ref_sequence(node, ref);
6676 ref_namelen = btrfs_root_ref_name_len(node, ref);
6678 if (ref_namelen <= BTRFS_NAME_LEN) {
6681 len = BTRFS_NAME_LEN;
6682 warning("%s[%llu %llu] ref_name too long",
6683 ref_key->type == BTRFS_ROOT_REF_KEY ?
6684 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6687 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6689 /* Find relative root_ref */
6690 key.objectid = ref_key->offset;
6691 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6692 key.offset = ref_key->objectid;
6694 btrfs_init_path(&path);
6695 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6697 err |= ROOT_REF_MISSING;
6698 error("%s[%llu %llu] couldn't find relative ref",
6699 ref_key->type == BTRFS_ROOT_REF_KEY ?
6700 "ROOT_REF" : "ROOT_BACKREF",
6701 ref_key->objectid, ref_key->offset);
6705 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6706 struct btrfs_root_ref);
6707 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6708 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6709 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6711 if (backref_namelen <= BTRFS_NAME_LEN) {
6712 len = backref_namelen;
6714 len = BTRFS_NAME_LEN;
6715 warning("%s[%llu %llu] ref_name too long",
6716 key.type == BTRFS_ROOT_REF_KEY ?
6717 "ROOT_REF" : "ROOT_BACKREF",
6718 key.objectid, key.offset);
6720 read_extent_buffer(path.nodes[0], backref_name,
6721 (unsigned long)(backref + 1), len);
6723 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6724 ref_namelen != backref_namelen ||
6725 strncmp(ref_name, backref_name, len)) {
6726 err |= ROOT_REF_MISMATCH;
6727 error("%s[%llu %llu] mismatch relative ref",
6728 ref_key->type == BTRFS_ROOT_REF_KEY ?
6729 "ROOT_REF" : "ROOT_BACKREF",
6730 ref_key->objectid, ref_key->offset);
6733 btrfs_release_path(&path);
6738 * Check all fs/file tree in low_memory mode.
6740 * 1. for fs tree root item, call check_fs_root_v2()
6741 * 2. for fs tree root ref/backref, call check_root_ref()
6743 * Return 0 if no error occurred.
6745 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6747 struct btrfs_root *tree_root = fs_info->tree_root;
6748 struct btrfs_root *cur_root = NULL;
6749 struct btrfs_path path;
6750 struct btrfs_key key;
6751 struct extent_buffer *node;
6752 unsigned int ext_ref;
6757 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6759 btrfs_init_path(&path);
6760 key.objectid = BTRFS_FS_TREE_OBJECTID;
6762 key.type = BTRFS_ROOT_ITEM_KEY;
6764 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6768 } else if (ret > 0) {
6774 node = path.nodes[0];
6775 slot = path.slots[0];
6776 btrfs_item_key_to_cpu(node, &key, slot);
6777 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6779 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6780 fs_root_objectid(key.objectid)) {
6781 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6782 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6785 key.offset = (u64)-1;
6786 cur_root = btrfs_read_fs_root(fs_info, &key);
6789 if (IS_ERR(cur_root)) {
6790 error("Fail to read fs/subvol tree: %lld",
6796 ret = check_fs_root_v2(cur_root, ext_ref);
6799 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6800 btrfs_free_fs_root(cur_root);
6801 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6802 key.type == BTRFS_ROOT_BACKREF_KEY) {
6803 ret = check_root_ref(tree_root, &key, node, slot);
6807 ret = btrfs_next_item(tree_root, &path);
6817 btrfs_release_path(&path);
6821 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6822 struct cache_tree *root_cache)
6826 if (!ctx.progress_enabled)
6827 fprintf(stderr, "checking fs roots\n");
6828 if (check_mode == CHECK_MODE_LOWMEM)
6829 ret = check_fs_roots_v2(fs_info);
6831 ret = check_fs_roots(fs_info, root_cache);
6836 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6838 struct extent_backref *back, *tmp;
6839 struct tree_backref *tback;
6840 struct data_backref *dback;
6844 rbtree_postorder_for_each_entry_safe(back, tmp,
6845 &rec->backref_tree, node) {
6846 if (!back->found_extent_tree) {
6850 if (back->is_data) {
6851 dback = to_data_backref(back);
6852 fprintf(stderr, "Data backref %llu %s %llu"
6853 " owner %llu offset %llu num_refs %lu"
6854 " not found in extent tree\n",
6855 (unsigned long long)rec->start,
6856 back->full_backref ?
6858 back->full_backref ?
6859 (unsigned long long)dback->parent:
6860 (unsigned long long)dback->root,
6861 (unsigned long long)dback->owner,
6862 (unsigned long long)dback->offset,
6863 (unsigned long)dback->num_refs);
6865 tback = to_tree_backref(back);
6866 fprintf(stderr, "Tree backref %llu parent %llu"
6867 " root %llu not found in extent tree\n",
6868 (unsigned long long)rec->start,
6869 (unsigned long long)tback->parent,
6870 (unsigned long long)tback->root);
6873 if (!back->is_data && !back->found_ref) {
6877 tback = to_tree_backref(back);
6878 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6879 (unsigned long long)rec->start,
6880 back->full_backref ? "parent" : "root",
6881 back->full_backref ?
6882 (unsigned long long)tback->parent :
6883 (unsigned long long)tback->root, back);
6885 if (back->is_data) {
6886 dback = to_data_backref(back);
6887 if (dback->found_ref != dback->num_refs) {
6891 fprintf(stderr, "Incorrect local backref count"
6892 " on %llu %s %llu owner %llu"
6893 " offset %llu found %u wanted %u back %p\n",
6894 (unsigned long long)rec->start,
6895 back->full_backref ?
6897 back->full_backref ?
6898 (unsigned long long)dback->parent:
6899 (unsigned long long)dback->root,
6900 (unsigned long long)dback->owner,
6901 (unsigned long long)dback->offset,
6902 dback->found_ref, dback->num_refs, back);
6904 if (dback->disk_bytenr != rec->start) {
6908 fprintf(stderr, "Backref disk bytenr does not"
6909 " match extent record, bytenr=%llu, "
6910 "ref bytenr=%llu\n",
6911 (unsigned long long)rec->start,
6912 (unsigned long long)dback->disk_bytenr);
6915 if (dback->bytes != rec->nr) {
6919 fprintf(stderr, "Backref bytes do not match "
6920 "extent backref, bytenr=%llu, ref "
6921 "bytes=%llu, backref bytes=%llu\n",
6922 (unsigned long long)rec->start,
6923 (unsigned long long)rec->nr,
6924 (unsigned long long)dback->bytes);
6927 if (!back->is_data) {
6930 dback = to_data_backref(back);
6931 found += dback->found_ref;
6934 if (found != rec->refs) {
6938 fprintf(stderr, "Incorrect global backref count "
6939 "on %llu found %llu wanted %llu\n",
6940 (unsigned long long)rec->start,
6941 (unsigned long long)found,
6942 (unsigned long long)rec->refs);
6948 static void __free_one_backref(struct rb_node *node)
6950 struct extent_backref *back = rb_node_to_extent_backref(node);
6955 static void free_all_extent_backrefs(struct extent_record *rec)
6957 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6960 static void free_extent_record_cache(struct cache_tree *extent_cache)
6962 struct cache_extent *cache;
6963 struct extent_record *rec;
6966 cache = first_cache_extent(extent_cache);
6969 rec = container_of(cache, struct extent_record, cache);
6970 remove_cache_extent(extent_cache, cache);
6971 free_all_extent_backrefs(rec);
6976 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6977 struct extent_record *rec)
6979 if (rec->content_checked && rec->owner_ref_checked &&
6980 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6981 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6982 !rec->bad_full_backref && !rec->crossing_stripes &&
6983 !rec->wrong_chunk_type) {
6984 remove_cache_extent(extent_cache, &rec->cache);
6985 free_all_extent_backrefs(rec);
6986 list_del_init(&rec->list);
6992 static int check_owner_ref(struct btrfs_root *root,
6993 struct extent_record *rec,
6994 struct extent_buffer *buf)
6996 struct extent_backref *node, *tmp;
6997 struct tree_backref *back;
6998 struct btrfs_root *ref_root;
6999 struct btrfs_key key;
7000 struct btrfs_path path;
7001 struct extent_buffer *parent;
7006 rbtree_postorder_for_each_entry_safe(node, tmp,
7007 &rec->backref_tree, node) {
7010 if (!node->found_ref)
7012 if (node->full_backref)
7014 back = to_tree_backref(node);
7015 if (btrfs_header_owner(buf) == back->root)
7018 BUG_ON(rec->is_root);
7020 /* try to find the block by search corresponding fs tree */
7021 key.objectid = btrfs_header_owner(buf);
7022 key.type = BTRFS_ROOT_ITEM_KEY;
7023 key.offset = (u64)-1;
7025 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7026 if (IS_ERR(ref_root))
7029 level = btrfs_header_level(buf);
7031 btrfs_item_key_to_cpu(buf, &key, 0);
7033 btrfs_node_key_to_cpu(buf, &key, 0);
7035 btrfs_init_path(&path);
7036 path.lowest_level = level + 1;
7037 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7041 parent = path.nodes[level + 1];
7042 if (parent && buf->start == btrfs_node_blockptr(parent,
7043 path.slots[level + 1]))
7046 btrfs_release_path(&path);
7047 return found ? 0 : 1;
7050 static int is_extent_tree_record(struct extent_record *rec)
7052 struct extent_backref *node, *tmp;
7053 struct tree_backref *back;
7056 rbtree_postorder_for_each_entry_safe(node, tmp,
7057 &rec->backref_tree, node) {
7060 back = to_tree_backref(node);
7061 if (node->full_backref)
7063 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7070 static int record_bad_block_io(struct btrfs_fs_info *info,
7071 struct cache_tree *extent_cache,
7074 struct extent_record *rec;
7075 struct cache_extent *cache;
7076 struct btrfs_key key;
7078 cache = lookup_cache_extent(extent_cache, start, len);
7082 rec = container_of(cache, struct extent_record, cache);
7083 if (!is_extent_tree_record(rec))
7086 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7087 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7090 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7091 struct extent_buffer *buf, int slot)
7093 if (btrfs_header_level(buf)) {
7094 struct btrfs_key_ptr ptr1, ptr2;
7096 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7097 sizeof(struct btrfs_key_ptr));
7098 read_extent_buffer(buf, &ptr2,
7099 btrfs_node_key_ptr_offset(slot + 1),
7100 sizeof(struct btrfs_key_ptr));
7101 write_extent_buffer(buf, &ptr1,
7102 btrfs_node_key_ptr_offset(slot + 1),
7103 sizeof(struct btrfs_key_ptr));
7104 write_extent_buffer(buf, &ptr2,
7105 btrfs_node_key_ptr_offset(slot),
7106 sizeof(struct btrfs_key_ptr));
7108 struct btrfs_disk_key key;
7109 btrfs_node_key(buf, &key, 0);
7110 btrfs_fixup_low_keys(root, path, &key,
7111 btrfs_header_level(buf) + 1);
7114 struct btrfs_item *item1, *item2;
7115 struct btrfs_key k1, k2;
7116 char *item1_data, *item2_data;
7117 u32 item1_offset, item2_offset, item1_size, item2_size;
7119 item1 = btrfs_item_nr(slot);
7120 item2 = btrfs_item_nr(slot + 1);
7121 btrfs_item_key_to_cpu(buf, &k1, slot);
7122 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7123 item1_offset = btrfs_item_offset(buf, item1);
7124 item2_offset = btrfs_item_offset(buf, item2);
7125 item1_size = btrfs_item_size(buf, item1);
7126 item2_size = btrfs_item_size(buf, item2);
7128 item1_data = malloc(item1_size);
7131 item2_data = malloc(item2_size);
7137 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7138 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7140 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7141 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7145 btrfs_set_item_offset(buf, item1, item2_offset);
7146 btrfs_set_item_offset(buf, item2, item1_offset);
7147 btrfs_set_item_size(buf, item1, item2_size);
7148 btrfs_set_item_size(buf, item2, item1_size);
7150 path->slots[0] = slot;
7151 btrfs_set_item_key_unsafe(root, path, &k2);
7152 path->slots[0] = slot + 1;
7153 btrfs_set_item_key_unsafe(root, path, &k1);
7158 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7160 struct extent_buffer *buf;
7161 struct btrfs_key k1, k2;
7163 int level = path->lowest_level;
7166 buf = path->nodes[level];
7167 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7169 btrfs_node_key_to_cpu(buf, &k1, i);
7170 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7172 btrfs_item_key_to_cpu(buf, &k1, i);
7173 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7175 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7177 ret = swap_values(root, path, buf, i);
7180 btrfs_mark_buffer_dirty(buf);
7186 static int delete_bogus_item(struct btrfs_root *root,
7187 struct btrfs_path *path,
7188 struct extent_buffer *buf, int slot)
7190 struct btrfs_key key;
7191 int nritems = btrfs_header_nritems(buf);
7193 btrfs_item_key_to_cpu(buf, &key, slot);
7195 /* These are all the keys we can deal with missing. */
7196 if (key.type != BTRFS_DIR_INDEX_KEY &&
7197 key.type != BTRFS_EXTENT_ITEM_KEY &&
7198 key.type != BTRFS_METADATA_ITEM_KEY &&
7199 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7200 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7203 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7204 (unsigned long long)key.objectid, key.type,
7205 (unsigned long long)key.offset, slot, buf->start);
7206 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7207 btrfs_item_nr_offset(slot + 1),
7208 sizeof(struct btrfs_item) *
7209 (nritems - slot - 1));
7210 btrfs_set_header_nritems(buf, nritems - 1);
7212 struct btrfs_disk_key disk_key;
7214 btrfs_item_key(buf, &disk_key, 0);
7215 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7217 btrfs_mark_buffer_dirty(buf);
7221 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7223 struct extent_buffer *buf;
7227 /* We should only get this for leaves */
7228 BUG_ON(path->lowest_level);
7229 buf = path->nodes[0];
7231 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7232 unsigned int shift = 0, offset;
7234 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7235 BTRFS_LEAF_DATA_SIZE(root)) {
7236 if (btrfs_item_end_nr(buf, i) >
7237 BTRFS_LEAF_DATA_SIZE(root)) {
7238 ret = delete_bogus_item(root, path, buf, i);
7241 fprintf(stderr, "item is off the end of the "
7242 "leaf, can't fix\n");
7246 shift = BTRFS_LEAF_DATA_SIZE(root) -
7247 btrfs_item_end_nr(buf, i);
7248 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7249 btrfs_item_offset_nr(buf, i - 1)) {
7250 if (btrfs_item_end_nr(buf, i) >
7251 btrfs_item_offset_nr(buf, i - 1)) {
7252 ret = delete_bogus_item(root, path, buf, i);
7255 fprintf(stderr, "items overlap, can't fix\n");
7259 shift = btrfs_item_offset_nr(buf, i - 1) -
7260 btrfs_item_end_nr(buf, i);
7265 printf("Shifting item nr %d by %u bytes in block %llu\n",
7266 i, shift, (unsigned long long)buf->start);
7267 offset = btrfs_item_offset_nr(buf, i);
7268 memmove_extent_buffer(buf,
7269 btrfs_leaf_data(buf) + offset + shift,
7270 btrfs_leaf_data(buf) + offset,
7271 btrfs_item_size_nr(buf, i));
7272 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7274 btrfs_mark_buffer_dirty(buf);
7278 * We may have moved things, in which case we want to exit so we don't
7279 * write those changes out. Once we have proper abort functionality in
7280 * progs this can be changed to something nicer.
7287 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7288 * then just return -EIO.
7290 static int try_to_fix_bad_block(struct btrfs_root *root,
7291 struct extent_buffer *buf,
7292 enum btrfs_tree_block_status status)
7294 struct btrfs_trans_handle *trans;
7295 struct ulist *roots;
7296 struct ulist_node *node;
7297 struct btrfs_root *search_root;
7298 struct btrfs_path path;
7299 struct ulist_iterator iter;
7300 struct btrfs_key root_key, key;
7303 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7304 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7307 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7311 btrfs_init_path(&path);
7312 ULIST_ITER_INIT(&iter);
7313 while ((node = ulist_next(roots, &iter))) {
7314 root_key.objectid = node->val;
7315 root_key.type = BTRFS_ROOT_ITEM_KEY;
7316 root_key.offset = (u64)-1;
7318 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7325 trans = btrfs_start_transaction(search_root, 0);
7326 if (IS_ERR(trans)) {
7327 ret = PTR_ERR(trans);
7331 path.lowest_level = btrfs_header_level(buf);
7332 path.skip_check_block = 1;
7333 if (path.lowest_level)
7334 btrfs_node_key_to_cpu(buf, &key, 0);
7336 btrfs_item_key_to_cpu(buf, &key, 0);
7337 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7340 btrfs_commit_transaction(trans, search_root);
7343 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7344 ret = fix_key_order(search_root, &path);
7345 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7346 ret = fix_item_offset(search_root, &path);
7348 btrfs_commit_transaction(trans, search_root);
7351 btrfs_release_path(&path);
7352 btrfs_commit_transaction(trans, search_root);
7355 btrfs_release_path(&path);
7359 static int check_block(struct btrfs_root *root,
7360 struct cache_tree *extent_cache,
7361 struct extent_buffer *buf, u64 flags)
7363 struct extent_record *rec;
7364 struct cache_extent *cache;
7365 struct btrfs_key key;
7366 enum btrfs_tree_block_status status;
7370 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7373 rec = container_of(cache, struct extent_record, cache);
7374 rec->generation = btrfs_header_generation(buf);
7376 level = btrfs_header_level(buf);
7377 if (btrfs_header_nritems(buf) > 0) {
7380 btrfs_item_key_to_cpu(buf, &key, 0);
7382 btrfs_node_key_to_cpu(buf, &key, 0);
7384 rec->info_objectid = key.objectid;
7386 rec->info_level = level;
7388 if (btrfs_is_leaf(buf))
7389 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7391 status = btrfs_check_node(root, &rec->parent_key, buf);
7393 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7395 status = try_to_fix_bad_block(root, buf, status);
7396 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7398 fprintf(stderr, "bad block %llu\n",
7399 (unsigned long long)buf->start);
7402 * Signal to callers we need to start the scan over
7403 * again since we'll have cowed blocks.
7408 rec->content_checked = 1;
7409 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7410 rec->owner_ref_checked = 1;
7412 ret = check_owner_ref(root, rec, buf);
7414 rec->owner_ref_checked = 1;
7418 maybe_free_extent_rec(extent_cache, rec);
7423 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7424 u64 parent, u64 root)
7426 struct list_head *cur = rec->backrefs.next;
7427 struct extent_backref *node;
7428 struct tree_backref *back;
7430 while(cur != &rec->backrefs) {
7431 node = to_extent_backref(cur);
7435 back = to_tree_backref(node);
7437 if (!node->full_backref)
7439 if (parent == back->parent)
7442 if (node->full_backref)
7444 if (back->root == root)
7452 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7453 u64 parent, u64 root)
7455 struct tree_backref *ref = malloc(sizeof(*ref));
7459 memset(&ref->node, 0, sizeof(ref->node));
7461 ref->parent = parent;
7462 ref->node.full_backref = 1;
7465 ref->node.full_backref = 0;
7472 static struct data_backref *find_data_backref(struct extent_record *rec,
7473 u64 parent, u64 root,
7474 u64 owner, u64 offset,
7476 u64 disk_bytenr, u64 bytes)
7478 struct list_head *cur = rec->backrefs.next;
7479 struct extent_backref *node;
7480 struct data_backref *back;
7482 while(cur != &rec->backrefs) {
7483 node = to_extent_backref(cur);
7487 back = to_data_backref(node);
7489 if (!node->full_backref)
7491 if (parent == back->parent)
7494 if (node->full_backref)
7496 if (back->root == root && back->owner == owner &&
7497 back->offset == offset) {
7498 if (found_ref && node->found_ref &&
7499 (back->bytes != bytes ||
7500 back->disk_bytenr != disk_bytenr))
7510 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7511 u64 parent, u64 root,
7512 u64 owner, u64 offset,
7515 struct data_backref *ref = malloc(sizeof(*ref));
7519 memset(&ref->node, 0, sizeof(ref->node));
7520 ref->node.is_data = 1;
7523 ref->parent = parent;
7526 ref->node.full_backref = 1;
7530 ref->offset = offset;
7531 ref->node.full_backref = 0;
7533 ref->bytes = max_size;
7536 if (max_size > rec->max_size)
7537 rec->max_size = max_size;
7541 /* Check if the type of extent matches with its chunk */
7542 static void check_extent_type(struct extent_record *rec)
7544 struct btrfs_block_group_cache *bg_cache;
7546 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7550 /* data extent, check chunk directly*/
7551 if (!rec->metadata) {
7552 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7553 rec->wrong_chunk_type = 1;
7557 /* metadata extent, check the obvious case first */
7558 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7559 BTRFS_BLOCK_GROUP_METADATA))) {
7560 rec->wrong_chunk_type = 1;
7565 * Check SYSTEM extent, as it's also marked as metadata, we can only
7566 * make sure it's a SYSTEM extent by its backref
7568 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7569 struct extent_backref *node;
7570 struct tree_backref *tback;
7573 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7574 if (node->is_data) {
7575 /* tree block shouldn't have data backref */
7576 rec->wrong_chunk_type = 1;
7579 tback = container_of(node, struct tree_backref, node);
7581 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7582 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7584 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7585 if (!(bg_cache->flags & bg_type))
7586 rec->wrong_chunk_type = 1;
7591 * Allocate a new extent record, fill default values from @tmpl and insert int
7592 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7593 * the cache, otherwise it fails.
7595 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7596 struct extent_record *tmpl)
7598 struct extent_record *rec;
7601 BUG_ON(tmpl->max_size == 0);
7602 rec = malloc(sizeof(*rec));
7605 rec->start = tmpl->start;
7606 rec->max_size = tmpl->max_size;
7607 rec->nr = max(tmpl->nr, tmpl->max_size);
7608 rec->found_rec = tmpl->found_rec;
7609 rec->content_checked = tmpl->content_checked;
7610 rec->owner_ref_checked = tmpl->owner_ref_checked;
7611 rec->num_duplicates = 0;
7612 rec->metadata = tmpl->metadata;
7613 rec->flag_block_full_backref = FLAG_UNSET;
7614 rec->bad_full_backref = 0;
7615 rec->crossing_stripes = 0;
7616 rec->wrong_chunk_type = 0;
7617 rec->is_root = tmpl->is_root;
7618 rec->refs = tmpl->refs;
7619 rec->extent_item_refs = tmpl->extent_item_refs;
7620 rec->parent_generation = tmpl->parent_generation;
7621 INIT_LIST_HEAD(&rec->backrefs);
7622 INIT_LIST_HEAD(&rec->dups);
7623 INIT_LIST_HEAD(&rec->list);
7624 rec->backref_tree = RB_ROOT;
7625 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7626 rec->cache.start = tmpl->start;
7627 rec->cache.size = tmpl->nr;
7628 ret = insert_cache_extent(extent_cache, &rec->cache);
7633 bytes_used += rec->nr;
7636 rec->crossing_stripes = check_crossing_stripes(global_info,
7637 rec->start, global_info->nodesize);
7638 check_extent_type(rec);
7643 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7645 * - refs - if found, increase refs
7646 * - is_root - if found, set
7647 * - content_checked - if found, set
7648 * - owner_ref_checked - if found, set
7650 * If not found, create a new one, initialize and insert.
7652 static int add_extent_rec(struct cache_tree *extent_cache,
7653 struct extent_record *tmpl)
7655 struct extent_record *rec;
7656 struct cache_extent *cache;
7660 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7662 rec = container_of(cache, struct extent_record, cache);
7666 rec->nr = max(tmpl->nr, tmpl->max_size);
7669 * We need to make sure to reset nr to whatever the extent
7670 * record says was the real size, this way we can compare it to
7673 if (tmpl->found_rec) {
7674 if (tmpl->start != rec->start || rec->found_rec) {
7675 struct extent_record *tmp;
7678 if (list_empty(&rec->list))
7679 list_add_tail(&rec->list,
7680 &duplicate_extents);
7683 * We have to do this song and dance in case we
7684 * find an extent record that falls inside of
7685 * our current extent record but does not have
7686 * the same objectid.
7688 tmp = malloc(sizeof(*tmp));
7691 tmp->start = tmpl->start;
7692 tmp->max_size = tmpl->max_size;
7695 tmp->metadata = tmpl->metadata;
7696 tmp->extent_item_refs = tmpl->extent_item_refs;
7697 INIT_LIST_HEAD(&tmp->list);
7698 list_add_tail(&tmp->list, &rec->dups);
7699 rec->num_duplicates++;
7706 if (tmpl->extent_item_refs && !dup) {
7707 if (rec->extent_item_refs) {
7708 fprintf(stderr, "block %llu rec "
7709 "extent_item_refs %llu, passed %llu\n",
7710 (unsigned long long)tmpl->start,
7711 (unsigned long long)
7712 rec->extent_item_refs,
7713 (unsigned long long)tmpl->extent_item_refs);
7715 rec->extent_item_refs = tmpl->extent_item_refs;
7719 if (tmpl->content_checked)
7720 rec->content_checked = 1;
7721 if (tmpl->owner_ref_checked)
7722 rec->owner_ref_checked = 1;
7723 memcpy(&rec->parent_key, &tmpl->parent_key,
7724 sizeof(tmpl->parent_key));
7725 if (tmpl->parent_generation)
7726 rec->parent_generation = tmpl->parent_generation;
7727 if (rec->max_size < tmpl->max_size)
7728 rec->max_size = tmpl->max_size;
7731 * A metadata extent can't cross stripe_len boundary, otherwise
7732 * kernel scrub won't be able to handle it.
7733 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7737 rec->crossing_stripes = check_crossing_stripes(
7738 global_info, rec->start,
7739 global_info->nodesize);
7740 check_extent_type(rec);
7741 maybe_free_extent_rec(extent_cache, rec);
7745 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7750 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7751 u64 parent, u64 root, int found_ref)
7753 struct extent_record *rec;
7754 struct tree_backref *back;
7755 struct cache_extent *cache;
7757 bool insert = false;
7759 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7761 struct extent_record tmpl;
7763 memset(&tmpl, 0, sizeof(tmpl));
7764 tmpl.start = bytenr;
7769 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7773 /* really a bug in cache_extent implement now */
7774 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7779 rec = container_of(cache, struct extent_record, cache);
7780 if (rec->start != bytenr) {
7782 * Several cause, from unaligned bytenr to over lapping extents
7787 back = find_tree_backref(rec, parent, root);
7789 back = alloc_tree_backref(rec, parent, root);
7796 if (back->node.found_ref) {
7797 fprintf(stderr, "Extent back ref already exists "
7798 "for %llu parent %llu root %llu \n",
7799 (unsigned long long)bytenr,
7800 (unsigned long long)parent,
7801 (unsigned long long)root);
7803 back->node.found_ref = 1;
7805 if (back->node.found_extent_tree) {
7806 fprintf(stderr, "Extent back ref already exists "
7807 "for %llu parent %llu root %llu \n",
7808 (unsigned long long)bytenr,
7809 (unsigned long long)parent,
7810 (unsigned long long)root);
7812 back->node.found_extent_tree = 1;
7815 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7816 compare_extent_backref));
7817 check_extent_type(rec);
7818 maybe_free_extent_rec(extent_cache, rec);
7822 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7823 u64 parent, u64 root, u64 owner, u64 offset,
7824 u32 num_refs, int found_ref, u64 max_size)
7826 struct extent_record *rec;
7827 struct data_backref *back;
7828 struct cache_extent *cache;
7830 bool insert = false;
7832 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7834 struct extent_record tmpl;
7836 memset(&tmpl, 0, sizeof(tmpl));
7837 tmpl.start = bytenr;
7839 tmpl.max_size = max_size;
7841 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7845 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7850 rec = container_of(cache, struct extent_record, cache);
7851 if (rec->max_size < max_size)
7852 rec->max_size = max_size;
7855 * If found_ref is set then max_size is the real size and must match the
7856 * existing refs. So if we have already found a ref then we need to
7857 * make sure that this ref matches the existing one, otherwise we need
7858 * to add a new backref so we can notice that the backrefs don't match
7859 * and we need to figure out who is telling the truth. This is to
7860 * account for that awful fsync bug I introduced where we'd end up with
7861 * a btrfs_file_extent_item that would have its length include multiple
7862 * prealloc extents or point inside of a prealloc extent.
7864 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7867 back = alloc_data_backref(rec, parent, root, owner, offset,
7874 BUG_ON(num_refs != 1);
7875 if (back->node.found_ref)
7876 BUG_ON(back->bytes != max_size);
7877 back->node.found_ref = 1;
7878 back->found_ref += 1;
7879 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7880 back->bytes = max_size;
7881 back->disk_bytenr = bytenr;
7883 /* Need to reinsert if not already in the tree */
7885 rb_erase(&back->node.node, &rec->backref_tree);
7890 rec->content_checked = 1;
7891 rec->owner_ref_checked = 1;
7893 if (back->node.found_extent_tree) {
7894 fprintf(stderr, "Extent back ref already exists "
7895 "for %llu parent %llu root %llu "
7896 "owner %llu offset %llu num_refs %lu\n",
7897 (unsigned long long)bytenr,
7898 (unsigned long long)parent,
7899 (unsigned long long)root,
7900 (unsigned long long)owner,
7901 (unsigned long long)offset,
7902 (unsigned long)num_refs);
7904 back->num_refs = num_refs;
7905 back->node.found_extent_tree = 1;
7908 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7909 compare_extent_backref));
7911 maybe_free_extent_rec(extent_cache, rec);
7915 static int add_pending(struct cache_tree *pending,
7916 struct cache_tree *seen, u64 bytenr, u32 size)
7919 ret = add_cache_extent(seen, bytenr, size);
7922 add_cache_extent(pending, bytenr, size);
7926 static int pick_next_pending(struct cache_tree *pending,
7927 struct cache_tree *reada,
7928 struct cache_tree *nodes,
7929 u64 last, struct block_info *bits, int bits_nr,
7932 unsigned long node_start = last;
7933 struct cache_extent *cache;
7936 cache = search_cache_extent(reada, 0);
7938 bits[0].start = cache->start;
7939 bits[0].size = cache->size;
7944 if (node_start > 32768)
7945 node_start -= 32768;
7947 cache = search_cache_extent(nodes, node_start);
7949 cache = search_cache_extent(nodes, 0);
7952 cache = search_cache_extent(pending, 0);
7957 bits[ret].start = cache->start;
7958 bits[ret].size = cache->size;
7959 cache = next_cache_extent(cache);
7961 } while (cache && ret < bits_nr);
7967 bits[ret].start = cache->start;
7968 bits[ret].size = cache->size;
7969 cache = next_cache_extent(cache);
7971 } while (cache && ret < bits_nr);
7973 if (bits_nr - ret > 8) {
7974 u64 lookup = bits[0].start + bits[0].size;
7975 struct cache_extent *next;
7976 next = search_cache_extent(pending, lookup);
7978 if (next->start - lookup > 32768)
7980 bits[ret].start = next->start;
7981 bits[ret].size = next->size;
7982 lookup = next->start + next->size;
7986 next = next_cache_extent(next);
7994 static void free_chunk_record(struct cache_extent *cache)
7996 struct chunk_record *rec;
7998 rec = container_of(cache, struct chunk_record, cache);
7999 list_del_init(&rec->list);
8000 list_del_init(&rec->dextents);
8004 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8006 cache_tree_free_extents(chunk_cache, free_chunk_record);
8009 static void free_device_record(struct rb_node *node)
8011 struct device_record *rec;
8013 rec = container_of(node, struct device_record, node);
8017 FREE_RB_BASED_TREE(device_cache, free_device_record);
8019 int insert_block_group_record(struct block_group_tree *tree,
8020 struct block_group_record *bg_rec)
8024 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8028 list_add_tail(&bg_rec->list, &tree->block_groups);
8032 static void free_block_group_record(struct cache_extent *cache)
8034 struct block_group_record *rec;
8036 rec = container_of(cache, struct block_group_record, cache);
8037 list_del_init(&rec->list);
8041 void free_block_group_tree(struct block_group_tree *tree)
8043 cache_tree_free_extents(&tree->tree, free_block_group_record);
8046 int insert_device_extent_record(struct device_extent_tree *tree,
8047 struct device_extent_record *de_rec)
8052 * Device extent is a bit different from the other extents, because
8053 * the extents which belong to the different devices may have the
8054 * same start and size, so we need use the special extent cache
8055 * search/insert functions.
8057 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8061 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8062 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8066 static void free_device_extent_record(struct cache_extent *cache)
8068 struct device_extent_record *rec;
8070 rec = container_of(cache, struct device_extent_record, cache);
8071 if (!list_empty(&rec->chunk_list))
8072 list_del_init(&rec->chunk_list);
8073 if (!list_empty(&rec->device_list))
8074 list_del_init(&rec->device_list);
8078 void free_device_extent_tree(struct device_extent_tree *tree)
8080 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8083 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8084 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8085 struct extent_buffer *leaf, int slot)
8087 struct btrfs_extent_ref_v0 *ref0;
8088 struct btrfs_key key;
8091 btrfs_item_key_to_cpu(leaf, &key, slot);
8092 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8093 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8094 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8097 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8098 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8104 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8105 struct btrfs_key *key,
8108 struct btrfs_chunk *ptr;
8109 struct chunk_record *rec;
8112 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8113 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8115 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8117 fprintf(stderr, "memory allocation failed\n");
8121 INIT_LIST_HEAD(&rec->list);
8122 INIT_LIST_HEAD(&rec->dextents);
8125 rec->cache.start = key->offset;
8126 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8128 rec->generation = btrfs_header_generation(leaf);
8130 rec->objectid = key->objectid;
8131 rec->type = key->type;
8132 rec->offset = key->offset;
8134 rec->length = rec->cache.size;
8135 rec->owner = btrfs_chunk_owner(leaf, ptr);
8136 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8137 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8138 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8139 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8140 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8141 rec->num_stripes = num_stripes;
8142 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8144 for (i = 0; i < rec->num_stripes; ++i) {
8145 rec->stripes[i].devid =
8146 btrfs_stripe_devid_nr(leaf, ptr, i);
8147 rec->stripes[i].offset =
8148 btrfs_stripe_offset_nr(leaf, ptr, i);
8149 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8150 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8157 static int process_chunk_item(struct cache_tree *chunk_cache,
8158 struct btrfs_key *key, struct extent_buffer *eb,
8161 struct chunk_record *rec;
8162 struct btrfs_chunk *chunk;
8165 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8167 * Do extra check for this chunk item,
8169 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8170 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8171 * and owner<->key_type check.
8173 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8176 error("chunk(%llu, %llu) is not valid, ignore it",
8177 key->offset, btrfs_chunk_length(eb, chunk));
8180 rec = btrfs_new_chunk_record(eb, key, slot);
8181 ret = insert_cache_extent(chunk_cache, &rec->cache);
8183 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8184 rec->offset, rec->length);
8191 static int process_device_item(struct rb_root *dev_cache,
8192 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8194 struct btrfs_dev_item *ptr;
8195 struct device_record *rec;
8198 ptr = btrfs_item_ptr(eb,
8199 slot, struct btrfs_dev_item);
8201 rec = malloc(sizeof(*rec));
8203 fprintf(stderr, "memory allocation failed\n");
8207 rec->devid = key->offset;
8208 rec->generation = btrfs_header_generation(eb);
8210 rec->objectid = key->objectid;
8211 rec->type = key->type;
8212 rec->offset = key->offset;
8214 rec->devid = btrfs_device_id(eb, ptr);
8215 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8216 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8218 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8220 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8227 struct block_group_record *
8228 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8231 struct btrfs_block_group_item *ptr;
8232 struct block_group_record *rec;
8234 rec = calloc(1, sizeof(*rec));
8236 fprintf(stderr, "memory allocation failed\n");
8240 rec->cache.start = key->objectid;
8241 rec->cache.size = key->offset;
8243 rec->generation = btrfs_header_generation(leaf);
8245 rec->objectid = key->objectid;
8246 rec->type = key->type;
8247 rec->offset = key->offset;
8249 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8250 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8252 INIT_LIST_HEAD(&rec->list);
8257 static int process_block_group_item(struct block_group_tree *block_group_cache,
8258 struct btrfs_key *key,
8259 struct extent_buffer *eb, int slot)
8261 struct block_group_record *rec;
8264 rec = btrfs_new_block_group_record(eb, key, slot);
8265 ret = insert_block_group_record(block_group_cache, rec);
8267 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8268 rec->objectid, rec->offset);
8275 struct device_extent_record *
8276 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8277 struct btrfs_key *key, int slot)
8279 struct device_extent_record *rec;
8280 struct btrfs_dev_extent *ptr;
8282 rec = calloc(1, sizeof(*rec));
8284 fprintf(stderr, "memory allocation failed\n");
8288 rec->cache.objectid = key->objectid;
8289 rec->cache.start = key->offset;
8291 rec->generation = btrfs_header_generation(leaf);
8293 rec->objectid = key->objectid;
8294 rec->type = key->type;
8295 rec->offset = key->offset;
8297 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8298 rec->chunk_objecteid =
8299 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8301 btrfs_dev_extent_chunk_offset(leaf, ptr);
8302 rec->length = btrfs_dev_extent_length(leaf, ptr);
8303 rec->cache.size = rec->length;
8305 INIT_LIST_HEAD(&rec->chunk_list);
8306 INIT_LIST_HEAD(&rec->device_list);
8312 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8313 struct btrfs_key *key, struct extent_buffer *eb,
8316 struct device_extent_record *rec;
8319 rec = btrfs_new_device_extent_record(eb, key, slot);
8320 ret = insert_device_extent_record(dev_extent_cache, rec);
8323 "Device extent[%llu, %llu, %llu] existed.\n",
8324 rec->objectid, rec->offset, rec->length);
8331 static int process_extent_item(struct btrfs_root *root,
8332 struct cache_tree *extent_cache,
8333 struct extent_buffer *eb, int slot)
8335 struct btrfs_extent_item *ei;
8336 struct btrfs_extent_inline_ref *iref;
8337 struct btrfs_extent_data_ref *dref;
8338 struct btrfs_shared_data_ref *sref;
8339 struct btrfs_key key;
8340 struct extent_record tmpl;
8345 u32 item_size = btrfs_item_size_nr(eb, slot);
8351 btrfs_item_key_to_cpu(eb, &key, slot);
8353 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8355 num_bytes = root->fs_info->nodesize;
8357 num_bytes = key.offset;
8360 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8361 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8362 key.objectid, root->fs_info->sectorsize);
8365 if (item_size < sizeof(*ei)) {
8366 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8367 struct btrfs_extent_item_v0 *ei0;
8368 BUG_ON(item_size != sizeof(*ei0));
8369 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8370 refs = btrfs_extent_refs_v0(eb, ei0);
8374 memset(&tmpl, 0, sizeof(tmpl));
8375 tmpl.start = key.objectid;
8376 tmpl.nr = num_bytes;
8377 tmpl.extent_item_refs = refs;
8378 tmpl.metadata = metadata;
8380 tmpl.max_size = num_bytes;
8382 return add_extent_rec(extent_cache, &tmpl);
8385 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8386 refs = btrfs_extent_refs(eb, ei);
8387 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8391 if (metadata && num_bytes != root->fs_info->nodesize) {
8392 error("ignore invalid metadata extent, length %llu does not equal to %u",
8393 num_bytes, root->fs_info->nodesize);
8396 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8397 error("ignore invalid data extent, length %llu is not aligned to %u",
8398 num_bytes, root->fs_info->sectorsize);
8402 memset(&tmpl, 0, sizeof(tmpl));
8403 tmpl.start = key.objectid;
8404 tmpl.nr = num_bytes;
8405 tmpl.extent_item_refs = refs;
8406 tmpl.metadata = metadata;
8408 tmpl.max_size = num_bytes;
8409 add_extent_rec(extent_cache, &tmpl);
8411 ptr = (unsigned long)(ei + 1);
8412 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8413 key.type == BTRFS_EXTENT_ITEM_KEY)
8414 ptr += sizeof(struct btrfs_tree_block_info);
8416 end = (unsigned long)ei + item_size;
8418 iref = (struct btrfs_extent_inline_ref *)ptr;
8419 type = btrfs_extent_inline_ref_type(eb, iref);
8420 offset = btrfs_extent_inline_ref_offset(eb, iref);
8422 case BTRFS_TREE_BLOCK_REF_KEY:
8423 ret = add_tree_backref(extent_cache, key.objectid,
8427 "add_tree_backref failed (extent items tree block): %s",
8430 case BTRFS_SHARED_BLOCK_REF_KEY:
8431 ret = add_tree_backref(extent_cache, key.objectid,
8435 "add_tree_backref failed (extent items shared block): %s",
8438 case BTRFS_EXTENT_DATA_REF_KEY:
8439 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8440 add_data_backref(extent_cache, key.objectid, 0,
8441 btrfs_extent_data_ref_root(eb, dref),
8442 btrfs_extent_data_ref_objectid(eb,
8444 btrfs_extent_data_ref_offset(eb, dref),
8445 btrfs_extent_data_ref_count(eb, dref),
8448 case BTRFS_SHARED_DATA_REF_KEY:
8449 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8450 add_data_backref(extent_cache, key.objectid, offset,
8452 btrfs_shared_data_ref_count(eb, sref),
8456 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8457 key.objectid, key.type, num_bytes);
8460 ptr += btrfs_extent_inline_ref_size(type);
8467 static int check_cache_range(struct btrfs_root *root,
8468 struct btrfs_block_group_cache *cache,
8469 u64 offset, u64 bytes)
8471 struct btrfs_free_space *entry;
8477 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8478 bytenr = btrfs_sb_offset(i);
8479 ret = btrfs_rmap_block(root->fs_info,
8480 cache->key.objectid, bytenr, 0,
8481 &logical, &nr, &stripe_len);
8486 if (logical[nr] + stripe_len <= offset)
8488 if (offset + bytes <= logical[nr])
8490 if (logical[nr] == offset) {
8491 if (stripe_len >= bytes) {
8495 bytes -= stripe_len;
8496 offset += stripe_len;
8497 } else if (logical[nr] < offset) {
8498 if (logical[nr] + stripe_len >=
8503 bytes = (offset + bytes) -
8504 (logical[nr] + stripe_len);
8505 offset = logical[nr] + stripe_len;
8508 * Could be tricky, the super may land in the
8509 * middle of the area we're checking. First
8510 * check the easiest case, it's at the end.
8512 if (logical[nr] + stripe_len >=
8514 bytes = logical[nr] - offset;
8518 /* Check the left side */
8519 ret = check_cache_range(root, cache,
8521 logical[nr] - offset);
8527 /* Now we continue with the right side */
8528 bytes = (offset + bytes) -
8529 (logical[nr] + stripe_len);
8530 offset = logical[nr] + stripe_len;
8537 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8539 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8540 offset, offset+bytes);
8544 if (entry->offset != offset) {
8545 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8550 if (entry->bytes != bytes) {
8551 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8552 bytes, entry->bytes, offset);
8556 unlink_free_space(cache->free_space_ctl, entry);
8561 static int verify_space_cache(struct btrfs_root *root,
8562 struct btrfs_block_group_cache *cache)
8564 struct btrfs_path path;
8565 struct extent_buffer *leaf;
8566 struct btrfs_key key;
8570 root = root->fs_info->extent_root;
8572 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8574 btrfs_init_path(&path);
8575 key.objectid = last;
8577 key.type = BTRFS_EXTENT_ITEM_KEY;
8578 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8583 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8584 ret = btrfs_next_leaf(root, &path);
8592 leaf = path.nodes[0];
8593 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8594 if (key.objectid >= cache->key.offset + cache->key.objectid)
8596 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8597 key.type != BTRFS_METADATA_ITEM_KEY) {
8602 if (last == key.objectid) {
8603 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8604 last = key.objectid + key.offset;
8606 last = key.objectid + root->fs_info->nodesize;
8611 ret = check_cache_range(root, cache, last,
8612 key.objectid - last);
8615 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8616 last = key.objectid + key.offset;
8618 last = key.objectid + root->fs_info->nodesize;
8622 if (last < cache->key.objectid + cache->key.offset)
8623 ret = check_cache_range(root, cache, last,
8624 cache->key.objectid +
8625 cache->key.offset - last);
8628 btrfs_release_path(&path);
8631 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8632 fprintf(stderr, "There are still entries left in the space "
8640 static int check_space_cache(struct btrfs_root *root)
8642 struct btrfs_block_group_cache *cache;
8643 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8647 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8648 btrfs_super_generation(root->fs_info->super_copy) !=
8649 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8650 printf("cache and super generation don't match, space cache "
8651 "will be invalidated\n");
8655 if (ctx.progress_enabled) {
8656 ctx.tp = TASK_FREE_SPACE;
8657 task_start(ctx.info);
8661 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8665 start = cache->key.objectid + cache->key.offset;
8666 if (!cache->free_space_ctl) {
8667 if (btrfs_init_free_space_ctl(cache,
8668 root->fs_info->sectorsize)) {
8673 btrfs_remove_free_space_cache(cache);
8676 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8677 ret = exclude_super_stripes(root, cache);
8679 fprintf(stderr, "could not exclude super stripes: %s\n",
8684 ret = load_free_space_tree(root->fs_info, cache);
8685 free_excluded_extents(root, cache);
8687 fprintf(stderr, "could not load free space tree: %s\n",
8694 ret = load_free_space_cache(root->fs_info, cache);
8699 ret = verify_space_cache(root, cache);
8701 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8702 cache->key.objectid);
8707 task_stop(ctx.info);
8709 return error ? -EINVAL : 0;
8712 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8713 u64 num_bytes, unsigned long leaf_offset,
8714 struct extent_buffer *eb) {
8716 struct btrfs_fs_info *fs_info = root->fs_info;
8718 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8720 unsigned long csum_offset;
8724 u64 data_checked = 0;
8730 if (num_bytes % fs_info->sectorsize)
8733 data = malloc(num_bytes);
8737 while (offset < num_bytes) {
8740 read_len = num_bytes - offset;
8741 /* read as much space once a time */
8742 ret = read_extent_data(fs_info, data + offset,
8743 bytenr + offset, &read_len, mirror);
8747 /* verify every 4k data's checksum */
8748 while (data_checked < read_len) {
8750 tmp = offset + data_checked;
8752 csum = btrfs_csum_data((char *)data + tmp,
8753 csum, fs_info->sectorsize);
8754 btrfs_csum_final(csum, (u8 *)&csum);
8756 csum_offset = leaf_offset +
8757 tmp / fs_info->sectorsize * csum_size;
8758 read_extent_buffer(eb, (char *)&csum_expected,
8759 csum_offset, csum_size);
8760 /* try another mirror */
8761 if (csum != csum_expected) {
8762 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8763 mirror, bytenr + tmp,
8764 csum, csum_expected);
8765 num_copies = btrfs_num_copies(root->fs_info,
8767 if (mirror < num_copies - 1) {
8772 data_checked += fs_info->sectorsize;
8781 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8784 struct btrfs_path path;
8785 struct extent_buffer *leaf;
8786 struct btrfs_key key;
8789 btrfs_init_path(&path);
8790 key.objectid = bytenr;
8791 key.type = BTRFS_EXTENT_ITEM_KEY;
8792 key.offset = (u64)-1;
8795 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8798 fprintf(stderr, "Error looking up extent record %d\n", ret);
8799 btrfs_release_path(&path);
8802 if (path.slots[0] > 0) {
8805 ret = btrfs_prev_leaf(root, &path);
8808 } else if (ret > 0) {
8815 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8818 * Block group items come before extent items if they have the same
8819 * bytenr, so walk back one more just in case. Dear future traveller,
8820 * first congrats on mastering time travel. Now if it's not too much
8821 * trouble could you go back to 2006 and tell Chris to make the
8822 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8823 * EXTENT_ITEM_KEY please?
8825 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8826 if (path.slots[0] > 0) {
8829 ret = btrfs_prev_leaf(root, &path);
8832 } else if (ret > 0) {
8837 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8841 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8842 ret = btrfs_next_leaf(root, &path);
8844 fprintf(stderr, "Error going to next leaf "
8846 btrfs_release_path(&path);
8852 leaf = path.nodes[0];
8853 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8854 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8858 if (key.objectid + key.offset < bytenr) {
8862 if (key.objectid > bytenr + num_bytes)
8865 if (key.objectid == bytenr) {
8866 if (key.offset >= num_bytes) {
8870 num_bytes -= key.offset;
8871 bytenr += key.offset;
8872 } else if (key.objectid < bytenr) {
8873 if (key.objectid + key.offset >= bytenr + num_bytes) {
8877 num_bytes = (bytenr + num_bytes) -
8878 (key.objectid + key.offset);
8879 bytenr = key.objectid + key.offset;
8881 if (key.objectid + key.offset < bytenr + num_bytes) {
8882 u64 new_start = key.objectid + key.offset;
8883 u64 new_bytes = bytenr + num_bytes - new_start;
8886 * Weird case, the extent is in the middle of
8887 * our range, we'll have to search one side
8888 * and then the other. Not sure if this happens
8889 * in real life, but no harm in coding it up
8890 * anyway just in case.
8892 btrfs_release_path(&path);
8893 ret = check_extent_exists(root, new_start,
8896 fprintf(stderr, "Right section didn't "
8900 num_bytes = key.objectid - bytenr;
8903 num_bytes = key.objectid - bytenr;
8910 if (num_bytes && !ret) {
8911 fprintf(stderr, "There are no extents for csum range "
8912 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8916 btrfs_release_path(&path);
8920 static int check_csums(struct btrfs_root *root)
8922 struct btrfs_path path;
8923 struct extent_buffer *leaf;
8924 struct btrfs_key key;
8925 u64 offset = 0, num_bytes = 0;
8926 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8930 unsigned long leaf_offset;
8932 root = root->fs_info->csum_root;
8933 if (!extent_buffer_uptodate(root->node)) {
8934 fprintf(stderr, "No valid csum tree found\n");
8938 btrfs_init_path(&path);
8939 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8940 key.type = BTRFS_EXTENT_CSUM_KEY;
8942 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8944 fprintf(stderr, "Error searching csum tree %d\n", ret);
8945 btrfs_release_path(&path);
8949 if (ret > 0 && path.slots[0])
8954 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8955 ret = btrfs_next_leaf(root, &path);
8957 fprintf(stderr, "Error going to next leaf "
8964 leaf = path.nodes[0];
8966 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8967 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8972 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8973 csum_size) * root->fs_info->sectorsize;
8974 if (!check_data_csum)
8975 goto skip_csum_check;
8976 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8977 ret = check_extent_csums(root, key.offset, data_len,
8983 offset = key.offset;
8984 } else if (key.offset != offset + num_bytes) {
8985 ret = check_extent_exists(root, offset, num_bytes);
8987 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8988 "there is no extent record\n",
8989 offset, offset+num_bytes);
8992 offset = key.offset;
8995 num_bytes += data_len;
8999 btrfs_release_path(&path);
9003 static int is_dropped_key(struct btrfs_key *key,
9004 struct btrfs_key *drop_key) {
9005 if (key->objectid < drop_key->objectid)
9007 else if (key->objectid == drop_key->objectid) {
9008 if (key->type < drop_key->type)
9010 else if (key->type == drop_key->type) {
9011 if (key->offset < drop_key->offset)
9019 * Here are the rules for FULL_BACKREF.
9021 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9022 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9024 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9025 * if it happened after the relocation occurred since we'll have dropped the
9026 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9027 * have no real way to know for sure.
9029 * We process the blocks one root at a time, and we start from the lowest root
9030 * objectid and go to the highest. So we can just lookup the owner backref for
9031 * the record and if we don't find it then we know it doesn't exist and we have
9034 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9035 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9036 * be set or not and then we can check later once we've gathered all the refs.
9038 static int calc_extent_flag(struct cache_tree *extent_cache,
9039 struct extent_buffer *buf,
9040 struct root_item_record *ri,
9043 struct extent_record *rec;
9044 struct cache_extent *cache;
9045 struct tree_backref *tback;
9048 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9049 /* we have added this extent before */
9053 rec = container_of(cache, struct extent_record, cache);
9056 * Except file/reloc tree, we can not have
9059 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9064 if (buf->start == ri->bytenr)
9067 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9070 owner = btrfs_header_owner(buf);
9071 if (owner == ri->objectid)
9074 tback = find_tree_backref(rec, 0, owner);
9079 if (rec->flag_block_full_backref != FLAG_UNSET &&
9080 rec->flag_block_full_backref != 0)
9081 rec->bad_full_backref = 1;
9084 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9085 if (rec->flag_block_full_backref != FLAG_UNSET &&
9086 rec->flag_block_full_backref != 1)
9087 rec->bad_full_backref = 1;
9091 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9093 fprintf(stderr, "Invalid key type(");
9094 print_key_type(stderr, 0, key_type);
9095 fprintf(stderr, ") found in root(");
9096 print_objectid(stderr, rootid, 0);
9097 fprintf(stderr, ")\n");
9101 * Check if the key is valid with its extent buffer.
9103 * This is a early check in case invalid key exists in a extent buffer
9104 * This is not comprehensive yet, but should prevent wrong key/item passed
9107 static int check_type_with_root(u64 rootid, u8 key_type)
9110 /* Only valid in chunk tree */
9111 case BTRFS_DEV_ITEM_KEY:
9112 case BTRFS_CHUNK_ITEM_KEY:
9113 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9116 /* valid in csum and log tree */
9117 case BTRFS_CSUM_TREE_OBJECTID:
9118 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9122 case BTRFS_EXTENT_ITEM_KEY:
9123 case BTRFS_METADATA_ITEM_KEY:
9124 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9125 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9128 case BTRFS_ROOT_ITEM_KEY:
9129 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9132 case BTRFS_DEV_EXTENT_KEY:
9133 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9139 report_mismatch_key_root(key_type, rootid);
9143 static int run_next_block(struct btrfs_root *root,
9144 struct block_info *bits,
9147 struct cache_tree *pending,
9148 struct cache_tree *seen,
9149 struct cache_tree *reada,
9150 struct cache_tree *nodes,
9151 struct cache_tree *extent_cache,
9152 struct cache_tree *chunk_cache,
9153 struct rb_root *dev_cache,
9154 struct block_group_tree *block_group_cache,
9155 struct device_extent_tree *dev_extent_cache,
9156 struct root_item_record *ri)
9158 struct btrfs_fs_info *fs_info = root->fs_info;
9159 struct extent_buffer *buf;
9160 struct extent_record *rec = NULL;
9171 struct btrfs_key key;
9172 struct cache_extent *cache;
9175 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9176 bits_nr, &reada_bits);
9181 for(i = 0; i < nritems; i++) {
9182 ret = add_cache_extent(reada, bits[i].start,
9187 /* fixme, get the parent transid */
9188 readahead_tree_block(fs_info, bits[i].start, 0);
9191 *last = bits[0].start;
9192 bytenr = bits[0].start;
9193 size = bits[0].size;
9195 cache = lookup_cache_extent(pending, bytenr, size);
9197 remove_cache_extent(pending, cache);
9200 cache = lookup_cache_extent(reada, bytenr, size);
9202 remove_cache_extent(reada, cache);
9205 cache = lookup_cache_extent(nodes, bytenr, size);
9207 remove_cache_extent(nodes, cache);
9210 cache = lookup_cache_extent(extent_cache, bytenr, size);
9212 rec = container_of(cache, struct extent_record, cache);
9213 gen = rec->parent_generation;
9216 /* fixme, get the real parent transid */
9217 buf = read_tree_block(root->fs_info, bytenr, gen);
9218 if (!extent_buffer_uptodate(buf)) {
9219 record_bad_block_io(root->fs_info,
9220 extent_cache, bytenr, size);
9224 nritems = btrfs_header_nritems(buf);
9227 if (!init_extent_tree) {
9228 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9229 btrfs_header_level(buf), 1, NULL,
9232 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9234 fprintf(stderr, "Couldn't calc extent flags\n");
9235 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9240 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9242 fprintf(stderr, "Couldn't calc extent flags\n");
9243 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9247 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9249 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9250 ri->objectid == btrfs_header_owner(buf)) {
9252 * Ok we got to this block from it's original owner and
9253 * we have FULL_BACKREF set. Relocation can leave
9254 * converted blocks over so this is altogether possible,
9255 * however it's not possible if the generation > the
9256 * last snapshot, so check for this case.
9258 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9259 btrfs_header_generation(buf) > ri->last_snapshot) {
9260 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9261 rec->bad_full_backref = 1;
9266 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9267 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9268 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9269 rec->bad_full_backref = 1;
9273 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9274 rec->flag_block_full_backref = 1;
9278 rec->flag_block_full_backref = 0;
9280 owner = btrfs_header_owner(buf);
9283 ret = check_block(root, extent_cache, buf, flags);
9287 if (btrfs_is_leaf(buf)) {
9288 btree_space_waste += btrfs_leaf_free_space(root, buf);
9289 for (i = 0; i < nritems; i++) {
9290 struct btrfs_file_extent_item *fi;
9291 btrfs_item_key_to_cpu(buf, &key, i);
9293 * Check key type against the leaf owner.
9294 * Could filter quite a lot of early error if
9297 if (check_type_with_root(btrfs_header_owner(buf),
9299 fprintf(stderr, "ignoring invalid key\n");
9302 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9303 process_extent_item(root, extent_cache, buf,
9307 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9308 process_extent_item(root, extent_cache, buf,
9312 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9314 btrfs_item_size_nr(buf, i);
9317 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9318 process_chunk_item(chunk_cache, &key, buf, i);
9321 if (key.type == BTRFS_DEV_ITEM_KEY) {
9322 process_device_item(dev_cache, &key, buf, i);
9325 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9326 process_block_group_item(block_group_cache,
9330 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9331 process_device_extent_item(dev_extent_cache,
9336 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9337 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9338 process_extent_ref_v0(extent_cache, buf, i);
9345 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9346 ret = add_tree_backref(extent_cache,
9347 key.objectid, 0, key.offset, 0);
9350 "add_tree_backref failed (leaf tree block): %s",
9354 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9355 ret = add_tree_backref(extent_cache,
9356 key.objectid, key.offset, 0, 0);
9359 "add_tree_backref failed (leaf shared block): %s",
9363 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9364 struct btrfs_extent_data_ref *ref;
9365 ref = btrfs_item_ptr(buf, i,
9366 struct btrfs_extent_data_ref);
9367 add_data_backref(extent_cache,
9369 btrfs_extent_data_ref_root(buf, ref),
9370 btrfs_extent_data_ref_objectid(buf,
9372 btrfs_extent_data_ref_offset(buf, ref),
9373 btrfs_extent_data_ref_count(buf, ref),
9374 0, root->fs_info->sectorsize);
9377 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9378 struct btrfs_shared_data_ref *ref;
9379 ref = btrfs_item_ptr(buf, i,
9380 struct btrfs_shared_data_ref);
9381 add_data_backref(extent_cache,
9382 key.objectid, key.offset, 0, 0, 0,
9383 btrfs_shared_data_ref_count(buf, ref),
9384 0, root->fs_info->sectorsize);
9387 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9388 struct bad_item *bad;
9390 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9394 bad = malloc(sizeof(struct bad_item));
9397 INIT_LIST_HEAD(&bad->list);
9398 memcpy(&bad->key, &key,
9399 sizeof(struct btrfs_key));
9400 bad->root_id = owner;
9401 list_add_tail(&bad->list, &delete_items);
9404 if (key.type != BTRFS_EXTENT_DATA_KEY)
9406 fi = btrfs_item_ptr(buf, i,
9407 struct btrfs_file_extent_item);
9408 if (btrfs_file_extent_type(buf, fi) ==
9409 BTRFS_FILE_EXTENT_INLINE)
9411 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9414 data_bytes_allocated +=
9415 btrfs_file_extent_disk_num_bytes(buf, fi);
9416 if (data_bytes_allocated < root->fs_info->sectorsize) {
9419 data_bytes_referenced +=
9420 btrfs_file_extent_num_bytes(buf, fi);
9421 add_data_backref(extent_cache,
9422 btrfs_file_extent_disk_bytenr(buf, fi),
9423 parent, owner, key.objectid, key.offset -
9424 btrfs_file_extent_offset(buf, fi), 1, 1,
9425 btrfs_file_extent_disk_num_bytes(buf, fi));
9429 struct btrfs_key first_key;
9431 first_key.objectid = 0;
9434 btrfs_item_key_to_cpu(buf, &first_key, 0);
9435 level = btrfs_header_level(buf);
9436 for (i = 0; i < nritems; i++) {
9437 struct extent_record tmpl;
9439 ptr = btrfs_node_blockptr(buf, i);
9440 size = root->fs_info->nodesize;
9441 btrfs_node_key_to_cpu(buf, &key, i);
9443 if ((level == ri->drop_level)
9444 && is_dropped_key(&key, &ri->drop_key)) {
9449 memset(&tmpl, 0, sizeof(tmpl));
9450 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9451 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9456 tmpl.max_size = size;
9457 ret = add_extent_rec(extent_cache, &tmpl);
9461 ret = add_tree_backref(extent_cache, ptr, parent,
9465 "add_tree_backref failed (non-leaf block): %s",
9471 add_pending(nodes, seen, ptr, size);
9473 add_pending(pending, seen, ptr, size);
9476 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9477 nritems) * sizeof(struct btrfs_key_ptr);
9479 total_btree_bytes += buf->len;
9480 if (fs_root_objectid(btrfs_header_owner(buf)))
9481 total_fs_tree_bytes += buf->len;
9482 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9483 total_extent_tree_bytes += buf->len;
9485 free_extent_buffer(buf);
9489 static int add_root_to_pending(struct extent_buffer *buf,
9490 struct cache_tree *extent_cache,
9491 struct cache_tree *pending,
9492 struct cache_tree *seen,
9493 struct cache_tree *nodes,
9496 struct extent_record tmpl;
9499 if (btrfs_header_level(buf) > 0)
9500 add_pending(nodes, seen, buf->start, buf->len);
9502 add_pending(pending, seen, buf->start, buf->len);
9504 memset(&tmpl, 0, sizeof(tmpl));
9505 tmpl.start = buf->start;
9510 tmpl.max_size = buf->len;
9511 add_extent_rec(extent_cache, &tmpl);
9513 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9514 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9515 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9518 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9523 /* as we fix the tree, we might be deleting blocks that
9524 * we're tracking for repair. This hook makes sure we
9525 * remove any backrefs for blocks as we are fixing them.
9527 static int free_extent_hook(struct btrfs_trans_handle *trans,
9528 struct btrfs_root *root,
9529 u64 bytenr, u64 num_bytes, u64 parent,
9530 u64 root_objectid, u64 owner, u64 offset,
9533 struct extent_record *rec;
9534 struct cache_extent *cache;
9536 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9538 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9539 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9543 rec = container_of(cache, struct extent_record, cache);
9545 struct data_backref *back;
9546 back = find_data_backref(rec, parent, root_objectid, owner,
9547 offset, 1, bytenr, num_bytes);
9550 if (back->node.found_ref) {
9551 back->found_ref -= refs_to_drop;
9553 rec->refs -= refs_to_drop;
9555 if (back->node.found_extent_tree) {
9556 back->num_refs -= refs_to_drop;
9557 if (rec->extent_item_refs)
9558 rec->extent_item_refs -= refs_to_drop;
9560 if (back->found_ref == 0)
9561 back->node.found_ref = 0;
9562 if (back->num_refs == 0)
9563 back->node.found_extent_tree = 0;
9565 if (!back->node.found_extent_tree && back->node.found_ref) {
9566 rb_erase(&back->node.node, &rec->backref_tree);
9570 struct tree_backref *back;
9571 back = find_tree_backref(rec, parent, root_objectid);
9574 if (back->node.found_ref) {
9577 back->node.found_ref = 0;
9579 if (back->node.found_extent_tree) {
9580 if (rec->extent_item_refs)
9581 rec->extent_item_refs--;
9582 back->node.found_extent_tree = 0;
9584 if (!back->node.found_extent_tree && back->node.found_ref) {
9585 rb_erase(&back->node.node, &rec->backref_tree);
9589 maybe_free_extent_rec(extent_cache, rec);
9594 static int delete_extent_records(struct btrfs_trans_handle *trans,
9595 struct btrfs_root *root,
9596 struct btrfs_path *path,
9599 struct btrfs_key key;
9600 struct btrfs_key found_key;
9601 struct extent_buffer *leaf;
9606 key.objectid = bytenr;
9608 key.offset = (u64)-1;
9611 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9618 if (path->slots[0] == 0)
9624 leaf = path->nodes[0];
9625 slot = path->slots[0];
9627 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9628 if (found_key.objectid != bytenr)
9631 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9632 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9633 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9634 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9635 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9636 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9637 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9638 btrfs_release_path(path);
9639 if (found_key.type == 0) {
9640 if (found_key.offset == 0)
9642 key.offset = found_key.offset - 1;
9643 key.type = found_key.type;
9645 key.type = found_key.type - 1;
9646 key.offset = (u64)-1;
9650 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9651 found_key.objectid, found_key.type, found_key.offset);
9653 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9656 btrfs_release_path(path);
9658 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9659 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9660 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9661 found_key.offset : root->fs_info->nodesize;
9663 ret = btrfs_update_block_group(trans, root, bytenr,
9670 btrfs_release_path(path);
9675 * for a single backref, this will allocate a new extent
9676 * and add the backref to it.
9678 static int record_extent(struct btrfs_trans_handle *trans,
9679 struct btrfs_fs_info *info,
9680 struct btrfs_path *path,
9681 struct extent_record *rec,
9682 struct extent_backref *back,
9683 int allocated, u64 flags)
9686 struct btrfs_root *extent_root = info->extent_root;
9687 struct extent_buffer *leaf;
9688 struct btrfs_key ins_key;
9689 struct btrfs_extent_item *ei;
9690 struct data_backref *dback;
9691 struct btrfs_tree_block_info *bi;
9694 rec->max_size = max_t(u64, rec->max_size,
9698 u32 item_size = sizeof(*ei);
9701 item_size += sizeof(*bi);
9703 ins_key.objectid = rec->start;
9704 ins_key.offset = rec->max_size;
9705 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9707 ret = btrfs_insert_empty_item(trans, extent_root, path,
9708 &ins_key, item_size);
9712 leaf = path->nodes[0];
9713 ei = btrfs_item_ptr(leaf, path->slots[0],
9714 struct btrfs_extent_item);
9716 btrfs_set_extent_refs(leaf, ei, 0);
9717 btrfs_set_extent_generation(leaf, ei, rec->generation);
9719 if (back->is_data) {
9720 btrfs_set_extent_flags(leaf, ei,
9721 BTRFS_EXTENT_FLAG_DATA);
9723 struct btrfs_disk_key copy_key;;
9725 bi = (struct btrfs_tree_block_info *)(ei + 1);
9726 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9729 btrfs_set_disk_key_objectid(©_key,
9730 rec->info_objectid);
9731 btrfs_set_disk_key_type(©_key, 0);
9732 btrfs_set_disk_key_offset(©_key, 0);
9734 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9735 btrfs_set_tree_block_key(leaf, bi, ©_key);
9737 btrfs_set_extent_flags(leaf, ei,
9738 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9741 btrfs_mark_buffer_dirty(leaf);
9742 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9743 rec->max_size, 1, 0);
9746 btrfs_release_path(path);
9749 if (back->is_data) {
9753 dback = to_data_backref(back);
9754 if (back->full_backref)
9755 parent = dback->parent;
9759 for (i = 0; i < dback->found_ref; i++) {
9760 /* if parent != 0, we're doing a full backref
9761 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9762 * just makes the backref allocator create a data
9765 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9766 rec->start, rec->max_size,
9770 BTRFS_FIRST_FREE_OBJECTID :
9776 fprintf(stderr, "adding new data backref"
9777 " on %llu %s %llu owner %llu"
9778 " offset %llu found %d\n",
9779 (unsigned long long)rec->start,
9780 back->full_backref ?
9782 back->full_backref ?
9783 (unsigned long long)parent :
9784 (unsigned long long)dback->root,
9785 (unsigned long long)dback->owner,
9786 (unsigned long long)dback->offset,
9790 struct tree_backref *tback;
9792 tback = to_tree_backref(back);
9793 if (back->full_backref)
9794 parent = tback->parent;
9798 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9799 rec->start, rec->max_size,
9800 parent, tback->root, 0, 0);
9801 fprintf(stderr, "adding new tree backref on "
9802 "start %llu len %llu parent %llu root %llu\n",
9803 rec->start, rec->max_size, parent, tback->root);
9806 btrfs_release_path(path);
9810 static struct extent_entry *find_entry(struct list_head *entries,
9811 u64 bytenr, u64 bytes)
9813 struct extent_entry *entry = NULL;
9815 list_for_each_entry(entry, entries, list) {
9816 if (entry->bytenr == bytenr && entry->bytes == bytes)
9823 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9825 struct extent_entry *entry, *best = NULL, *prev = NULL;
9827 list_for_each_entry(entry, entries, list) {
9829 * If there are as many broken entries as entries then we know
9830 * not to trust this particular entry.
9832 if (entry->broken == entry->count)
9836 * Special case, when there are only two entries and 'best' is
9846 * If our current entry == best then we can't be sure our best
9847 * is really the best, so we need to keep searching.
9849 if (best && best->count == entry->count) {
9855 /* Prev == entry, not good enough, have to keep searching */
9856 if (!prev->broken && prev->count == entry->count)
9860 best = (prev->count > entry->count) ? prev : entry;
9861 else if (best->count < entry->count)
9869 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9870 struct data_backref *dback, struct extent_entry *entry)
9872 struct btrfs_trans_handle *trans;
9873 struct btrfs_root *root;
9874 struct btrfs_file_extent_item *fi;
9875 struct extent_buffer *leaf;
9876 struct btrfs_key key;
9880 key.objectid = dback->root;
9881 key.type = BTRFS_ROOT_ITEM_KEY;
9882 key.offset = (u64)-1;
9883 root = btrfs_read_fs_root(info, &key);
9885 fprintf(stderr, "Couldn't find root for our ref\n");
9890 * The backref points to the original offset of the extent if it was
9891 * split, so we need to search down to the offset we have and then walk
9892 * forward until we find the backref we're looking for.
9894 key.objectid = dback->owner;
9895 key.type = BTRFS_EXTENT_DATA_KEY;
9896 key.offset = dback->offset;
9897 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9899 fprintf(stderr, "Error looking up ref %d\n", ret);
9904 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9905 ret = btrfs_next_leaf(root, path);
9907 fprintf(stderr, "Couldn't find our ref, next\n");
9911 leaf = path->nodes[0];
9912 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9913 if (key.objectid != dback->owner ||
9914 key.type != BTRFS_EXTENT_DATA_KEY) {
9915 fprintf(stderr, "Couldn't find our ref, search\n");
9918 fi = btrfs_item_ptr(leaf, path->slots[0],
9919 struct btrfs_file_extent_item);
9920 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9921 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9923 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9928 btrfs_release_path(path);
9930 trans = btrfs_start_transaction(root, 1);
9932 return PTR_ERR(trans);
9935 * Ok we have the key of the file extent we want to fix, now we can cow
9936 * down to the thing and fix it.
9938 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9940 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9941 key.objectid, key.type, key.offset, ret);
9945 fprintf(stderr, "Well that's odd, we just found this key "
9946 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9951 leaf = path->nodes[0];
9952 fi = btrfs_item_ptr(leaf, path->slots[0],
9953 struct btrfs_file_extent_item);
9955 if (btrfs_file_extent_compression(leaf, fi) &&
9956 dback->disk_bytenr != entry->bytenr) {
9957 fprintf(stderr, "Ref doesn't match the record start and is "
9958 "compressed, please take a btrfs-image of this file "
9959 "system and send it to a btrfs developer so they can "
9960 "complete this functionality for bytenr %Lu\n",
9961 dback->disk_bytenr);
9966 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9967 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9968 } else if (dback->disk_bytenr > entry->bytenr) {
9969 u64 off_diff, offset;
9971 off_diff = dback->disk_bytenr - entry->bytenr;
9972 offset = btrfs_file_extent_offset(leaf, fi);
9973 if (dback->disk_bytenr + offset +
9974 btrfs_file_extent_num_bytes(leaf, fi) >
9975 entry->bytenr + entry->bytes) {
9976 fprintf(stderr, "Ref is past the entry end, please "
9977 "take a btrfs-image of this file system and "
9978 "send it to a btrfs developer, ref %Lu\n",
9979 dback->disk_bytenr);
9984 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9985 btrfs_set_file_extent_offset(leaf, fi, offset);
9986 } else if (dback->disk_bytenr < entry->bytenr) {
9989 offset = btrfs_file_extent_offset(leaf, fi);
9990 if (dback->disk_bytenr + offset < entry->bytenr) {
9991 fprintf(stderr, "Ref is before the entry start, please"
9992 " take a btrfs-image of this file system and "
9993 "send it to a btrfs developer, ref %Lu\n",
9994 dback->disk_bytenr);
9999 offset += dback->disk_bytenr;
10000 offset -= entry->bytenr;
10001 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10002 btrfs_set_file_extent_offset(leaf, fi, offset);
10005 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10008 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10009 * only do this if we aren't using compression, otherwise it's a
10012 if (!btrfs_file_extent_compression(leaf, fi))
10013 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10015 printf("ram bytes may be wrong?\n");
10016 btrfs_mark_buffer_dirty(leaf);
10018 err = btrfs_commit_transaction(trans, root);
10019 btrfs_release_path(path);
10020 return ret ? ret : err;
10023 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10024 struct extent_record *rec)
10026 struct extent_backref *back, *tmp;
10027 struct data_backref *dback;
10028 struct extent_entry *entry, *best = NULL;
10029 LIST_HEAD(entries);
10030 int nr_entries = 0;
10031 int broken_entries = 0;
10033 short mismatch = 0;
10036 * Metadata is easy and the backrefs should always agree on bytenr and
10037 * size, if not we've got bigger issues.
10042 rbtree_postorder_for_each_entry_safe(back, tmp,
10043 &rec->backref_tree, node) {
10044 if (back->full_backref || !back->is_data)
10047 dback = to_data_backref(back);
10050 * We only pay attention to backrefs that we found a real
10053 if (dback->found_ref == 0)
10057 * For now we only catch when the bytes don't match, not the
10058 * bytenr. We can easily do this at the same time, but I want
10059 * to have a fs image to test on before we just add repair
10060 * functionality willy-nilly so we know we won't screw up the
10064 entry = find_entry(&entries, dback->disk_bytenr,
10067 entry = malloc(sizeof(struct extent_entry));
10072 memset(entry, 0, sizeof(*entry));
10073 entry->bytenr = dback->disk_bytenr;
10074 entry->bytes = dback->bytes;
10075 list_add_tail(&entry->list, &entries);
10080 * If we only have on entry we may think the entries agree when
10081 * in reality they don't so we have to do some extra checking.
10083 if (dback->disk_bytenr != rec->start ||
10084 dback->bytes != rec->nr || back->broken)
10087 if (back->broken) {
10095 /* Yay all the backrefs agree, carry on good sir */
10096 if (nr_entries <= 1 && !mismatch)
10099 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10100 "%Lu\n", rec->start);
10103 * First we want to see if the backrefs can agree amongst themselves who
10104 * is right, so figure out which one of the entries has the highest
10107 best = find_most_right_entry(&entries);
10110 * Ok so we may have an even split between what the backrefs think, so
10111 * this is where we use the extent ref to see what it thinks.
10114 entry = find_entry(&entries, rec->start, rec->nr);
10115 if (!entry && (!broken_entries || !rec->found_rec)) {
10116 fprintf(stderr, "Backrefs don't agree with each other "
10117 "and extent record doesn't agree with anybody,"
10118 " so we can't fix bytenr %Lu bytes %Lu\n",
10119 rec->start, rec->nr);
10122 } else if (!entry) {
10124 * Ok our backrefs were broken, we'll assume this is the
10125 * correct value and add an entry for this range.
10127 entry = malloc(sizeof(struct extent_entry));
10132 memset(entry, 0, sizeof(*entry));
10133 entry->bytenr = rec->start;
10134 entry->bytes = rec->nr;
10135 list_add_tail(&entry->list, &entries);
10139 best = find_most_right_entry(&entries);
10141 fprintf(stderr, "Backrefs and extent record evenly "
10142 "split on who is right, this is going to "
10143 "require user input to fix bytenr %Lu bytes "
10144 "%Lu\n", rec->start, rec->nr);
10151 * I don't think this can happen currently as we'll abort() if we catch
10152 * this case higher up, but in case somebody removes that we still can't
10153 * deal with it properly here yet, so just bail out of that's the case.
10155 if (best->bytenr != rec->start) {
10156 fprintf(stderr, "Extent start and backref starts don't match, "
10157 "please use btrfs-image on this file system and send "
10158 "it to a btrfs developer so they can make fsck fix "
10159 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10160 rec->start, rec->nr);
10166 * Ok great we all agreed on an extent record, let's go find the real
10167 * references and fix up the ones that don't match.
10169 rbtree_postorder_for_each_entry_safe(back, tmp,
10170 &rec->backref_tree, node) {
10171 if (back->full_backref || !back->is_data)
10174 dback = to_data_backref(back);
10177 * Still ignoring backrefs that don't have a real ref attached
10180 if (dback->found_ref == 0)
10183 if (dback->bytes == best->bytes &&
10184 dback->disk_bytenr == best->bytenr)
10187 ret = repair_ref(info, path, dback, best);
10193 * Ok we messed with the actual refs, which means we need to drop our
10194 * entire cache and go back and rescan. I know this is a huge pain and
10195 * adds a lot of extra work, but it's the only way to be safe. Once all
10196 * the backrefs agree we may not need to do anything to the extent
10201 while (!list_empty(&entries)) {
10202 entry = list_entry(entries.next, struct extent_entry, list);
10203 list_del_init(&entry->list);
10209 static int process_duplicates(struct cache_tree *extent_cache,
10210 struct extent_record *rec)
10212 struct extent_record *good, *tmp;
10213 struct cache_extent *cache;
10217 * If we found a extent record for this extent then return, or if we
10218 * have more than one duplicate we are likely going to need to delete
10221 if (rec->found_rec || rec->num_duplicates > 1)
10224 /* Shouldn't happen but just in case */
10225 BUG_ON(!rec->num_duplicates);
10228 * So this happens if we end up with a backref that doesn't match the
10229 * actual extent entry. So either the backref is bad or the extent
10230 * entry is bad. Either way we want to have the extent_record actually
10231 * reflect what we found in the extent_tree, so we need to take the
10232 * duplicate out and use that as the extent_record since the only way we
10233 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10235 remove_cache_extent(extent_cache, &rec->cache);
10237 good = to_extent_record(rec->dups.next);
10238 list_del_init(&good->list);
10239 INIT_LIST_HEAD(&good->backrefs);
10240 INIT_LIST_HEAD(&good->dups);
10241 good->cache.start = good->start;
10242 good->cache.size = good->nr;
10243 good->content_checked = 0;
10244 good->owner_ref_checked = 0;
10245 good->num_duplicates = 0;
10246 good->refs = rec->refs;
10247 list_splice_init(&rec->backrefs, &good->backrefs);
10249 cache = lookup_cache_extent(extent_cache, good->start,
10253 tmp = container_of(cache, struct extent_record, cache);
10256 * If we find another overlapping extent and it's found_rec is
10257 * set then it's a duplicate and we need to try and delete
10260 if (tmp->found_rec || tmp->num_duplicates > 0) {
10261 if (list_empty(&good->list))
10262 list_add_tail(&good->list,
10263 &duplicate_extents);
10264 good->num_duplicates += tmp->num_duplicates + 1;
10265 list_splice_init(&tmp->dups, &good->dups);
10266 list_del_init(&tmp->list);
10267 list_add_tail(&tmp->list, &good->dups);
10268 remove_cache_extent(extent_cache, &tmp->cache);
10273 * Ok we have another non extent item backed extent rec, so lets
10274 * just add it to this extent and carry on like we did above.
10276 good->refs += tmp->refs;
10277 list_splice_init(&tmp->backrefs, &good->backrefs);
10278 remove_cache_extent(extent_cache, &tmp->cache);
10281 ret = insert_cache_extent(extent_cache, &good->cache);
10284 return good->num_duplicates ? 0 : 1;
10287 static int delete_duplicate_records(struct btrfs_root *root,
10288 struct extent_record *rec)
10290 struct btrfs_trans_handle *trans;
10291 LIST_HEAD(delete_list);
10292 struct btrfs_path path;
10293 struct extent_record *tmp, *good, *n;
10296 struct btrfs_key key;
10298 btrfs_init_path(&path);
10301 /* Find the record that covers all of the duplicates. */
10302 list_for_each_entry(tmp, &rec->dups, list) {
10303 if (good->start < tmp->start)
10305 if (good->nr > tmp->nr)
10308 if (tmp->start + tmp->nr < good->start + good->nr) {
10309 fprintf(stderr, "Ok we have overlapping extents that "
10310 "aren't completely covered by each other, this "
10311 "is going to require more careful thought. "
10312 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10313 tmp->start, tmp->nr, good->start, good->nr);
10320 list_add_tail(&rec->list, &delete_list);
10322 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10325 list_move_tail(&tmp->list, &delete_list);
10328 root = root->fs_info->extent_root;
10329 trans = btrfs_start_transaction(root, 1);
10330 if (IS_ERR(trans)) {
10331 ret = PTR_ERR(trans);
10335 list_for_each_entry(tmp, &delete_list, list) {
10336 if (tmp->found_rec == 0)
10338 key.objectid = tmp->start;
10339 key.type = BTRFS_EXTENT_ITEM_KEY;
10340 key.offset = tmp->nr;
10342 /* Shouldn't happen but just in case */
10343 if (tmp->metadata) {
10344 fprintf(stderr, "Well this shouldn't happen, extent "
10345 "record overlaps but is metadata? "
10346 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10350 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10356 ret = btrfs_del_item(trans, root, &path);
10359 btrfs_release_path(&path);
10362 err = btrfs_commit_transaction(trans, root);
10366 while (!list_empty(&delete_list)) {
10367 tmp = to_extent_record(delete_list.next);
10368 list_del_init(&tmp->list);
10374 while (!list_empty(&rec->dups)) {
10375 tmp = to_extent_record(rec->dups.next);
10376 list_del_init(&tmp->list);
10380 btrfs_release_path(&path);
10382 if (!ret && !nr_del)
10383 rec->num_duplicates = 0;
10385 return ret ? ret : nr_del;
10388 static int find_possible_backrefs(struct btrfs_fs_info *info,
10389 struct btrfs_path *path,
10390 struct cache_tree *extent_cache,
10391 struct extent_record *rec)
10393 struct btrfs_root *root;
10394 struct extent_backref *back, *tmp;
10395 struct data_backref *dback;
10396 struct cache_extent *cache;
10397 struct btrfs_file_extent_item *fi;
10398 struct btrfs_key key;
10402 rbtree_postorder_for_each_entry_safe(back, tmp,
10403 &rec->backref_tree, node) {
10404 /* Don't care about full backrefs (poor unloved backrefs) */
10405 if (back->full_backref || !back->is_data)
10408 dback = to_data_backref(back);
10410 /* We found this one, we don't need to do a lookup */
10411 if (dback->found_ref)
10414 key.objectid = dback->root;
10415 key.type = BTRFS_ROOT_ITEM_KEY;
10416 key.offset = (u64)-1;
10418 root = btrfs_read_fs_root(info, &key);
10420 /* No root, definitely a bad ref, skip */
10421 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10423 /* Other err, exit */
10425 return PTR_ERR(root);
10427 key.objectid = dback->owner;
10428 key.type = BTRFS_EXTENT_DATA_KEY;
10429 key.offset = dback->offset;
10430 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10432 btrfs_release_path(path);
10435 /* Didn't find it, we can carry on */
10440 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10441 struct btrfs_file_extent_item);
10442 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10443 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10444 btrfs_release_path(path);
10445 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10447 struct extent_record *tmp;
10448 tmp = container_of(cache, struct extent_record, cache);
10451 * If we found an extent record for the bytenr for this
10452 * particular backref then we can't add it to our
10453 * current extent record. We only want to add backrefs
10454 * that don't have a corresponding extent item in the
10455 * extent tree since they likely belong to this record
10456 * and we need to fix it if it doesn't match bytenrs.
10458 if (tmp->found_rec)
10462 dback->found_ref += 1;
10463 dback->disk_bytenr = bytenr;
10464 dback->bytes = bytes;
10467 * Set this so the verify backref code knows not to trust the
10468 * values in this backref.
10477 * Record orphan data ref into corresponding root.
10479 * Return 0 if the extent item contains data ref and recorded.
10480 * Return 1 if the extent item contains no useful data ref
10481 * On that case, it may contains only shared_dataref or metadata backref
10482 * or the file extent exists(this should be handled by the extent bytenr
10483 * recovery routine)
10484 * Return <0 if something goes wrong.
10486 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10487 struct extent_record *rec)
10489 struct btrfs_key key;
10490 struct btrfs_root *dest_root;
10491 struct extent_backref *back, *tmp;
10492 struct data_backref *dback;
10493 struct orphan_data_extent *orphan;
10494 struct btrfs_path path;
10495 int recorded_data_ref = 0;
10500 btrfs_init_path(&path);
10501 rbtree_postorder_for_each_entry_safe(back, tmp,
10502 &rec->backref_tree, node) {
10503 if (back->full_backref || !back->is_data ||
10504 !back->found_extent_tree)
10506 dback = to_data_backref(back);
10507 if (dback->found_ref)
10509 key.objectid = dback->root;
10510 key.type = BTRFS_ROOT_ITEM_KEY;
10511 key.offset = (u64)-1;
10513 dest_root = btrfs_read_fs_root(fs_info, &key);
10515 /* For non-exist root we just skip it */
10516 if (IS_ERR(dest_root) || !dest_root)
10519 key.objectid = dback->owner;
10520 key.type = BTRFS_EXTENT_DATA_KEY;
10521 key.offset = dback->offset;
10523 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10524 btrfs_release_path(&path);
10526 * For ret < 0, it's OK since the fs-tree may be corrupted,
10527 * we need to record it for inode/file extent rebuild.
10528 * For ret > 0, we record it only for file extent rebuild.
10529 * For ret == 0, the file extent exists but only bytenr
10530 * mismatch, let the original bytenr fix routine to handle,
10536 orphan = malloc(sizeof(*orphan));
10541 INIT_LIST_HEAD(&orphan->list);
10542 orphan->root = dback->root;
10543 orphan->objectid = dback->owner;
10544 orphan->offset = dback->offset;
10545 orphan->disk_bytenr = rec->cache.start;
10546 orphan->disk_len = rec->cache.size;
10547 list_add(&dest_root->orphan_data_extents, &orphan->list);
10548 recorded_data_ref = 1;
10551 btrfs_release_path(&path);
10553 return !recorded_data_ref;
10559 * when an incorrect extent item is found, this will delete
10560 * all of the existing entries for it and recreate them
10561 * based on what the tree scan found.
10563 static int fixup_extent_refs(struct btrfs_fs_info *info,
10564 struct cache_tree *extent_cache,
10565 struct extent_record *rec)
10567 struct btrfs_trans_handle *trans = NULL;
10569 struct btrfs_path path;
10570 struct cache_extent *cache;
10571 struct extent_backref *back, *tmp;
10575 if (rec->flag_block_full_backref)
10576 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10578 btrfs_init_path(&path);
10579 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10581 * Sometimes the backrefs themselves are so broken they don't
10582 * get attached to any meaningful rec, so first go back and
10583 * check any of our backrefs that we couldn't find and throw
10584 * them into the list if we find the backref so that
10585 * verify_backrefs can figure out what to do.
10587 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10592 /* step one, make sure all of the backrefs agree */
10593 ret = verify_backrefs(info, &path, rec);
10597 trans = btrfs_start_transaction(info->extent_root, 1);
10598 if (IS_ERR(trans)) {
10599 ret = PTR_ERR(trans);
10603 /* step two, delete all the existing records */
10604 ret = delete_extent_records(trans, info->extent_root, &path,
10610 /* was this block corrupt? If so, don't add references to it */
10611 cache = lookup_cache_extent(info->corrupt_blocks,
10612 rec->start, rec->max_size);
10618 /* step three, recreate all the refs we did find */
10619 rbtree_postorder_for_each_entry_safe(back, tmp,
10620 &rec->backref_tree, node) {
10622 * if we didn't find any references, don't create a
10623 * new extent record
10625 if (!back->found_ref)
10628 rec->bad_full_backref = 0;
10629 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10637 int err = btrfs_commit_transaction(trans, info->extent_root);
10643 fprintf(stderr, "Repaired extent references for %llu\n",
10644 (unsigned long long)rec->start);
10646 btrfs_release_path(&path);
10650 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10651 struct extent_record *rec)
10653 struct btrfs_trans_handle *trans;
10654 struct btrfs_root *root = fs_info->extent_root;
10655 struct btrfs_path path;
10656 struct btrfs_extent_item *ei;
10657 struct btrfs_key key;
10661 key.objectid = rec->start;
10662 if (rec->metadata) {
10663 key.type = BTRFS_METADATA_ITEM_KEY;
10664 key.offset = rec->info_level;
10666 key.type = BTRFS_EXTENT_ITEM_KEY;
10667 key.offset = rec->max_size;
10670 trans = btrfs_start_transaction(root, 0);
10672 return PTR_ERR(trans);
10674 btrfs_init_path(&path);
10675 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10677 btrfs_release_path(&path);
10678 btrfs_commit_transaction(trans, root);
10681 fprintf(stderr, "Didn't find extent for %llu\n",
10682 (unsigned long long)rec->start);
10683 btrfs_release_path(&path);
10684 btrfs_commit_transaction(trans, root);
10688 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10689 struct btrfs_extent_item);
10690 flags = btrfs_extent_flags(path.nodes[0], ei);
10691 if (rec->flag_block_full_backref) {
10692 fprintf(stderr, "setting full backref on %llu\n",
10693 (unsigned long long)key.objectid);
10694 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696 fprintf(stderr, "clearing full backref on %llu\n",
10697 (unsigned long long)key.objectid);
10698 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10700 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10701 btrfs_mark_buffer_dirty(path.nodes[0]);
10702 btrfs_release_path(&path);
10703 ret = btrfs_commit_transaction(trans, root);
10705 fprintf(stderr, "Repaired extent flags for %llu\n",
10706 (unsigned long long)rec->start);
10711 /* right now we only prune from the extent allocation tree */
10712 static int prune_one_block(struct btrfs_trans_handle *trans,
10713 struct btrfs_fs_info *info,
10714 struct btrfs_corrupt_block *corrupt)
10717 struct btrfs_path path;
10718 struct extent_buffer *eb;
10722 int level = corrupt->level + 1;
10724 btrfs_init_path(&path);
10726 /* we want to stop at the parent to our busted block */
10727 path.lowest_level = level;
10729 ret = btrfs_search_slot(trans, info->extent_root,
10730 &corrupt->key, &path, -1, 1);
10735 eb = path.nodes[level];
10742 * hopefully the search gave us the block we want to prune,
10743 * lets try that first
10745 slot = path.slots[level];
10746 found = btrfs_node_blockptr(eb, slot);
10747 if (found == corrupt->cache.start)
10750 nritems = btrfs_header_nritems(eb);
10752 /* the search failed, lets scan this node and hope we find it */
10753 for (slot = 0; slot < nritems; slot++) {
10754 found = btrfs_node_blockptr(eb, slot);
10755 if (found == corrupt->cache.start)
10759 * we couldn't find the bad block. TODO, search all the nodes for pointers
10762 if (eb == info->extent_root->node) {
10767 btrfs_release_path(&path);
10772 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10773 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10776 btrfs_release_path(&path);
10780 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10782 struct btrfs_trans_handle *trans = NULL;
10783 struct cache_extent *cache;
10784 struct btrfs_corrupt_block *corrupt;
10787 cache = search_cache_extent(info->corrupt_blocks, 0);
10791 trans = btrfs_start_transaction(info->extent_root, 1);
10793 return PTR_ERR(trans);
10795 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10796 prune_one_block(trans, info, corrupt);
10797 remove_cache_extent(info->corrupt_blocks, cache);
10800 return btrfs_commit_transaction(trans, info->extent_root);
10804 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10806 struct btrfs_block_group_cache *cache;
10811 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10812 &start, &end, EXTENT_DIRTY);
10815 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10820 cache = btrfs_lookup_first_block_group(fs_info, start);
10825 start = cache->key.objectid + cache->key.offset;
10829 static int check_extent_refs(struct btrfs_root *root,
10830 struct cache_tree *extent_cache)
10832 struct extent_record *rec;
10833 struct cache_extent *cache;
10840 * if we're doing a repair, we have to make sure
10841 * we don't allocate from the problem extents.
10842 * In the worst case, this will be all the
10843 * extents in the FS
10845 cache = search_cache_extent(extent_cache, 0);
10847 rec = container_of(cache, struct extent_record, cache);
10848 set_extent_dirty(root->fs_info->excluded_extents,
10850 rec->start + rec->max_size - 1);
10851 cache = next_cache_extent(cache);
10854 /* pin down all the corrupted blocks too */
10855 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10857 set_extent_dirty(root->fs_info->excluded_extents,
10859 cache->start + cache->size - 1);
10860 cache = next_cache_extent(cache);
10862 prune_corrupt_blocks(root->fs_info);
10863 reset_cached_block_groups(root->fs_info);
10866 reset_cached_block_groups(root->fs_info);
10869 * We need to delete any duplicate entries we find first otherwise we
10870 * could mess up the extent tree when we have backrefs that actually
10871 * belong to a different extent item and not the weird duplicate one.
10873 while (repair && !list_empty(&duplicate_extents)) {
10874 rec = to_extent_record(duplicate_extents.next);
10875 list_del_init(&rec->list);
10877 /* Sometimes we can find a backref before we find an actual
10878 * extent, so we need to process it a little bit to see if there
10879 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10880 * if this is a backref screwup. If we need to delete stuff
10881 * process_duplicates() will return 0, otherwise it will return
10884 if (process_duplicates(extent_cache, rec))
10886 ret = delete_duplicate_records(root, rec);
10890 * delete_duplicate_records will return the number of entries
10891 * deleted, so if it's greater than 0 then we know we actually
10892 * did something and we need to remove.
10905 cache = search_cache_extent(extent_cache, 0);
10908 rec = container_of(cache, struct extent_record, cache);
10909 if (rec->num_duplicates) {
10910 fprintf(stderr, "extent item %llu has multiple extent "
10911 "items\n", (unsigned long long)rec->start);
10915 if (rec->refs != rec->extent_item_refs) {
10916 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10917 (unsigned long long)rec->start,
10918 (unsigned long long)rec->nr);
10919 fprintf(stderr, "extent item %llu, found %llu\n",
10920 (unsigned long long)rec->extent_item_refs,
10921 (unsigned long long)rec->refs);
10922 ret = record_orphan_data_extents(root->fs_info, rec);
10928 if (all_backpointers_checked(rec, 1)) {
10929 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10930 (unsigned long long)rec->start,
10931 (unsigned long long)rec->nr);
10935 if (!rec->owner_ref_checked) {
10936 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10937 (unsigned long long)rec->start,
10938 (unsigned long long)rec->nr);
10943 if (repair && fix) {
10944 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10950 if (rec->bad_full_backref) {
10951 fprintf(stderr, "bad full backref, on [%llu]\n",
10952 (unsigned long long)rec->start);
10954 ret = fixup_extent_flags(root->fs_info, rec);
10962 * Although it's not a extent ref's problem, we reuse this
10963 * routine for error reporting.
10964 * No repair function yet.
10966 if (rec->crossing_stripes) {
10968 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10969 rec->start, rec->start + rec->max_size);
10973 if (rec->wrong_chunk_type) {
10975 "bad extent [%llu, %llu), type mismatch with chunk\n",
10976 rec->start, rec->start + rec->max_size);
10981 remove_cache_extent(extent_cache, cache);
10982 free_all_extent_backrefs(rec);
10983 if (!init_extent_tree && repair && (!cur_err || fix))
10984 clear_extent_dirty(root->fs_info->excluded_extents,
10986 rec->start + rec->max_size - 1);
10991 if (ret && ret != -EAGAIN) {
10992 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10995 struct btrfs_trans_handle *trans;
10997 root = root->fs_info->extent_root;
10998 trans = btrfs_start_transaction(root, 1);
10999 if (IS_ERR(trans)) {
11000 ret = PTR_ERR(trans);
11004 ret = btrfs_fix_block_accounting(trans, root);
11007 ret = btrfs_commit_transaction(trans, root);
11019 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11023 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11024 stripe_size = length;
11025 stripe_size /= num_stripes;
11026 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11027 stripe_size = length * 2;
11028 stripe_size /= num_stripes;
11029 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11030 stripe_size = length;
11031 stripe_size /= (num_stripes - 1);
11032 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11033 stripe_size = length;
11034 stripe_size /= (num_stripes - 2);
11036 stripe_size = length;
11038 return stripe_size;
11042 * Check the chunk with its block group/dev list ref:
11043 * Return 0 if all refs seems valid.
11044 * Return 1 if part of refs seems valid, need later check for rebuild ref
11045 * like missing block group and needs to search extent tree to rebuild them.
11046 * Return -1 if essential refs are missing and unable to rebuild.
11048 static int check_chunk_refs(struct chunk_record *chunk_rec,
11049 struct block_group_tree *block_group_cache,
11050 struct device_extent_tree *dev_extent_cache,
11053 struct cache_extent *block_group_item;
11054 struct block_group_record *block_group_rec;
11055 struct cache_extent *dev_extent_item;
11056 struct device_extent_record *dev_extent_rec;
11060 int metadump_v2 = 0;
11064 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11066 chunk_rec->length);
11067 if (block_group_item) {
11068 block_group_rec = container_of(block_group_item,
11069 struct block_group_record,
11071 if (chunk_rec->length != block_group_rec->offset ||
11072 chunk_rec->offset != block_group_rec->objectid ||
11074 chunk_rec->type_flags != block_group_rec->flags)) {
11077 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11078 chunk_rec->objectid,
11083 chunk_rec->type_flags,
11084 block_group_rec->objectid,
11085 block_group_rec->type,
11086 block_group_rec->offset,
11087 block_group_rec->offset,
11088 block_group_rec->objectid,
11089 block_group_rec->flags);
11092 list_del_init(&block_group_rec->list);
11093 chunk_rec->bg_rec = block_group_rec;
11098 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11099 chunk_rec->objectid,
11104 chunk_rec->type_flags);
11111 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11112 chunk_rec->num_stripes);
11113 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11114 devid = chunk_rec->stripes[i].devid;
11115 offset = chunk_rec->stripes[i].offset;
11116 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11117 devid, offset, length);
11118 if (dev_extent_item) {
11119 dev_extent_rec = container_of(dev_extent_item,
11120 struct device_extent_record,
11122 if (dev_extent_rec->objectid != devid ||
11123 dev_extent_rec->offset != offset ||
11124 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11125 dev_extent_rec->length != length) {
11128 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11129 chunk_rec->objectid,
11132 chunk_rec->stripes[i].devid,
11133 chunk_rec->stripes[i].offset,
11134 dev_extent_rec->objectid,
11135 dev_extent_rec->offset,
11136 dev_extent_rec->length);
11139 list_move(&dev_extent_rec->chunk_list,
11140 &chunk_rec->dextents);
11145 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11146 chunk_rec->objectid,
11149 chunk_rec->stripes[i].devid,
11150 chunk_rec->stripes[i].offset);
11157 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11158 int check_chunks(struct cache_tree *chunk_cache,
11159 struct block_group_tree *block_group_cache,
11160 struct device_extent_tree *dev_extent_cache,
11161 struct list_head *good, struct list_head *bad,
11162 struct list_head *rebuild, int silent)
11164 struct cache_extent *chunk_item;
11165 struct chunk_record *chunk_rec;
11166 struct block_group_record *bg_rec;
11167 struct device_extent_record *dext_rec;
11171 chunk_item = first_cache_extent(chunk_cache);
11172 while (chunk_item) {
11173 chunk_rec = container_of(chunk_item, struct chunk_record,
11175 err = check_chunk_refs(chunk_rec, block_group_cache,
11176 dev_extent_cache, silent);
11179 if (err == 0 && good)
11180 list_add_tail(&chunk_rec->list, good);
11181 if (err > 0 && rebuild)
11182 list_add_tail(&chunk_rec->list, rebuild);
11183 if (err < 0 && bad)
11184 list_add_tail(&chunk_rec->list, bad);
11185 chunk_item = next_cache_extent(chunk_item);
11188 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11191 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11199 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11203 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11204 dext_rec->objectid,
11214 static int check_device_used(struct device_record *dev_rec,
11215 struct device_extent_tree *dext_cache)
11217 struct cache_extent *cache;
11218 struct device_extent_record *dev_extent_rec;
11219 u64 total_byte = 0;
11221 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11223 dev_extent_rec = container_of(cache,
11224 struct device_extent_record,
11226 if (dev_extent_rec->objectid != dev_rec->devid)
11229 list_del_init(&dev_extent_rec->device_list);
11230 total_byte += dev_extent_rec->length;
11231 cache = next_cache_extent(cache);
11234 if (total_byte != dev_rec->byte_used) {
11236 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11237 total_byte, dev_rec->byte_used, dev_rec->objectid,
11238 dev_rec->type, dev_rec->offset);
11246 * Extra (optional) check for dev_item size to report possbile problem on a new
11249 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11251 if (!IS_ALIGNED(total_bytes, sectorsize)) {
11253 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11254 devid, total_bytes, sectorsize);
11256 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11257 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11262 * Unlike device size alignment check above, some super total_bytes check
11263 * failure can lead to mount failure for newer kernel.
11265 * So this function will return the error for a fatal super total_bytes problem.
11267 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11269 struct btrfs_device *dev;
11270 struct list_head *dev_list = &fs_info->fs_devices->devices;
11271 u64 total_bytes = 0;
11272 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11274 list_for_each_entry(dev, dev_list, dev_list)
11275 total_bytes += dev->total_bytes;
11277 /* Important check, which can cause unmountable fs */
11278 if (super_bytes < total_bytes) {
11279 error("super total bytes %llu smaller than real device(s) size %llu",
11280 super_bytes, total_bytes);
11281 error("mounting this fs may fail for newer kernels");
11282 error("this can be fixed by 'btrfs rescue fix-device-size'");
11287 * Optional check, just to make everything aligned and match with each
11290 * For a btrfs-image restored fs, we don't need to check it anyway.
11292 if (btrfs_super_flags(fs_info->super_copy) &
11293 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11295 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11296 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11297 super_bytes != total_bytes) {
11298 warning("minor unaligned/mismatch device size detected");
11300 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11305 /* check btrfs_dev_item -> btrfs_dev_extent */
11306 static int check_devices(struct rb_root *dev_cache,
11307 struct device_extent_tree *dev_extent_cache)
11309 struct rb_node *dev_node;
11310 struct device_record *dev_rec;
11311 struct device_extent_record *dext_rec;
11315 dev_node = rb_first(dev_cache);
11317 dev_rec = container_of(dev_node, struct device_record, node);
11318 err = check_device_used(dev_rec, dev_extent_cache);
11322 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11323 global_info->sectorsize);
11324 dev_node = rb_next(dev_node);
11326 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11329 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11330 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11337 static int add_root_item_to_list(struct list_head *head,
11338 u64 objectid, u64 bytenr, u64 last_snapshot,
11339 u8 level, u8 drop_level,
11340 struct btrfs_key *drop_key)
11343 struct root_item_record *ri_rec;
11344 ri_rec = malloc(sizeof(*ri_rec));
11347 ri_rec->bytenr = bytenr;
11348 ri_rec->objectid = objectid;
11349 ri_rec->level = level;
11350 ri_rec->drop_level = drop_level;
11351 ri_rec->last_snapshot = last_snapshot;
11353 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11354 list_add_tail(&ri_rec->list, head);
11359 static void free_root_item_list(struct list_head *list)
11361 struct root_item_record *ri_rec;
11363 while (!list_empty(list)) {
11364 ri_rec = list_first_entry(list, struct root_item_record,
11366 list_del_init(&ri_rec->list);
11371 static int deal_root_from_list(struct list_head *list,
11372 struct btrfs_root *root,
11373 struct block_info *bits,
11375 struct cache_tree *pending,
11376 struct cache_tree *seen,
11377 struct cache_tree *reada,
11378 struct cache_tree *nodes,
11379 struct cache_tree *extent_cache,
11380 struct cache_tree *chunk_cache,
11381 struct rb_root *dev_cache,
11382 struct block_group_tree *block_group_cache,
11383 struct device_extent_tree *dev_extent_cache)
11388 while (!list_empty(list)) {
11389 struct root_item_record *rec;
11390 struct extent_buffer *buf;
11391 rec = list_entry(list->next,
11392 struct root_item_record, list);
11394 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11395 if (!extent_buffer_uptodate(buf)) {
11396 free_extent_buffer(buf);
11400 ret = add_root_to_pending(buf, extent_cache, pending,
11401 seen, nodes, rec->objectid);
11405 * To rebuild extent tree, we need deal with snapshot
11406 * one by one, otherwise we deal with node firstly which
11407 * can maximize readahead.
11410 ret = run_next_block(root, bits, bits_nr, &last,
11411 pending, seen, reada, nodes,
11412 extent_cache, chunk_cache,
11413 dev_cache, block_group_cache,
11414 dev_extent_cache, rec);
11418 free_extent_buffer(buf);
11419 list_del(&rec->list);
11425 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11426 reada, nodes, extent_cache, chunk_cache,
11427 dev_cache, block_group_cache,
11428 dev_extent_cache, NULL);
11438 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11440 struct rb_root dev_cache;
11441 struct cache_tree chunk_cache;
11442 struct block_group_tree block_group_cache;
11443 struct device_extent_tree dev_extent_cache;
11444 struct cache_tree extent_cache;
11445 struct cache_tree seen;
11446 struct cache_tree pending;
11447 struct cache_tree reada;
11448 struct cache_tree nodes;
11449 struct extent_io_tree excluded_extents;
11450 struct cache_tree corrupt_blocks;
11451 struct btrfs_path path;
11452 struct btrfs_key key;
11453 struct btrfs_key found_key;
11455 struct block_info *bits;
11457 struct extent_buffer *leaf;
11459 struct btrfs_root_item ri;
11460 struct list_head dropping_trees;
11461 struct list_head normal_trees;
11462 struct btrfs_root *root1;
11463 struct btrfs_root *root;
11467 root = fs_info->fs_root;
11468 dev_cache = RB_ROOT;
11469 cache_tree_init(&chunk_cache);
11470 block_group_tree_init(&block_group_cache);
11471 device_extent_tree_init(&dev_extent_cache);
11473 cache_tree_init(&extent_cache);
11474 cache_tree_init(&seen);
11475 cache_tree_init(&pending);
11476 cache_tree_init(&nodes);
11477 cache_tree_init(&reada);
11478 cache_tree_init(&corrupt_blocks);
11479 extent_io_tree_init(&excluded_extents);
11480 INIT_LIST_HEAD(&dropping_trees);
11481 INIT_LIST_HEAD(&normal_trees);
11484 fs_info->excluded_extents = &excluded_extents;
11485 fs_info->fsck_extent_cache = &extent_cache;
11486 fs_info->free_extent_hook = free_extent_hook;
11487 fs_info->corrupt_blocks = &corrupt_blocks;
11491 bits = malloc(bits_nr * sizeof(struct block_info));
11497 if (ctx.progress_enabled) {
11498 ctx.tp = TASK_EXTENTS;
11499 task_start(ctx.info);
11503 root1 = fs_info->tree_root;
11504 level = btrfs_header_level(root1->node);
11505 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11506 root1->node->start, 0, level, 0, NULL);
11509 root1 = fs_info->chunk_root;
11510 level = btrfs_header_level(root1->node);
11511 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11512 root1->node->start, 0, level, 0, NULL);
11515 btrfs_init_path(&path);
11518 key.type = BTRFS_ROOT_ITEM_KEY;
11519 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11523 leaf = path.nodes[0];
11524 slot = path.slots[0];
11525 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11526 ret = btrfs_next_leaf(root, &path);
11529 leaf = path.nodes[0];
11530 slot = path.slots[0];
11532 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11533 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11534 unsigned long offset;
11537 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11538 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11539 last_snapshot = btrfs_root_last_snapshot(&ri);
11540 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11541 level = btrfs_root_level(&ri);
11542 ret = add_root_item_to_list(&normal_trees,
11543 found_key.objectid,
11544 btrfs_root_bytenr(&ri),
11545 last_snapshot, level,
11550 level = btrfs_root_level(&ri);
11551 objectid = found_key.objectid;
11552 btrfs_disk_key_to_cpu(&found_key,
11553 &ri.drop_progress);
11554 ret = add_root_item_to_list(&dropping_trees,
11556 btrfs_root_bytenr(&ri),
11557 last_snapshot, level,
11558 ri.drop_level, &found_key);
11565 btrfs_release_path(&path);
11568 * check_block can return -EAGAIN if it fixes something, please keep
11569 * this in mind when dealing with return values from these functions, if
11570 * we get -EAGAIN we want to fall through and restart the loop.
11572 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11573 &seen, &reada, &nodes, &extent_cache,
11574 &chunk_cache, &dev_cache, &block_group_cache,
11575 &dev_extent_cache);
11577 if (ret == -EAGAIN)
11581 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11582 &pending, &seen, &reada, &nodes,
11583 &extent_cache, &chunk_cache, &dev_cache,
11584 &block_group_cache, &dev_extent_cache);
11586 if (ret == -EAGAIN)
11591 ret = check_chunks(&chunk_cache, &block_group_cache,
11592 &dev_extent_cache, NULL, NULL, NULL, 0);
11594 if (ret == -EAGAIN)
11599 ret = check_extent_refs(root, &extent_cache);
11601 if (ret == -EAGAIN)
11606 ret = check_devices(&dev_cache, &dev_extent_cache);
11611 task_stop(ctx.info);
11613 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11614 extent_io_tree_cleanup(&excluded_extents);
11615 fs_info->fsck_extent_cache = NULL;
11616 fs_info->free_extent_hook = NULL;
11617 fs_info->corrupt_blocks = NULL;
11618 fs_info->excluded_extents = NULL;
11621 free_chunk_cache_tree(&chunk_cache);
11622 free_device_cache_tree(&dev_cache);
11623 free_block_group_tree(&block_group_cache);
11624 free_device_extent_tree(&dev_extent_cache);
11625 free_extent_cache_tree(&seen);
11626 free_extent_cache_tree(&pending);
11627 free_extent_cache_tree(&reada);
11628 free_extent_cache_tree(&nodes);
11629 free_root_item_list(&normal_trees);
11630 free_root_item_list(&dropping_trees);
11633 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11634 free_extent_cache_tree(&seen);
11635 free_extent_cache_tree(&pending);
11636 free_extent_cache_tree(&reada);
11637 free_extent_cache_tree(&nodes);
11638 free_chunk_cache_tree(&chunk_cache);
11639 free_block_group_tree(&block_group_cache);
11640 free_device_cache_tree(&dev_cache);
11641 free_device_extent_tree(&dev_extent_cache);
11642 free_extent_record_cache(&extent_cache);
11643 free_root_item_list(&normal_trees);
11644 free_root_item_list(&dropping_trees);
11645 extent_io_tree_cleanup(&excluded_extents);
11649 static int check_extent_inline_ref(struct extent_buffer *eb,
11650 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11653 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11656 case BTRFS_TREE_BLOCK_REF_KEY:
11657 case BTRFS_EXTENT_DATA_REF_KEY:
11658 case BTRFS_SHARED_BLOCK_REF_KEY:
11659 case BTRFS_SHARED_DATA_REF_KEY:
11663 error("extent[%llu %u %llu] has unknown ref type: %d",
11664 key->objectid, key->type, key->offset, type);
11665 ret = UNKNOWN_TYPE;
11673 * Check backrefs of a tree block given by @bytenr or @eb.
11675 * @root: the root containing the @bytenr or @eb
11676 * @eb: tree block extent buffer, can be NULL
11677 * @bytenr: bytenr of the tree block to search
11678 * @level: tree level of the tree block
11679 * @owner: owner of the tree block
11681 * Return >0 for any error found and output error message
11682 * Return 0 for no error found
11684 static int check_tree_block_ref(struct btrfs_root *root,
11685 struct extent_buffer *eb, u64 bytenr,
11686 int level, u64 owner, struct node_refs *nrefs)
11688 struct btrfs_key key;
11689 struct btrfs_root *extent_root = root->fs_info->extent_root;
11690 struct btrfs_path path;
11691 struct btrfs_extent_item *ei;
11692 struct btrfs_extent_inline_ref *iref;
11693 struct extent_buffer *leaf;
11698 int root_level = btrfs_header_level(root->node);
11700 u32 nodesize = root->fs_info->nodesize;
11709 btrfs_init_path(&path);
11710 key.objectid = bytenr;
11711 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11712 key.type = BTRFS_METADATA_ITEM_KEY;
11714 key.type = BTRFS_EXTENT_ITEM_KEY;
11715 key.offset = (u64)-1;
11717 /* Search for the backref in extent tree */
11718 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11720 err |= BACKREF_MISSING;
11723 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11725 err |= BACKREF_MISSING;
11729 leaf = path.nodes[0];
11730 slot = path.slots[0];
11731 btrfs_item_key_to_cpu(leaf, &key, slot);
11733 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11735 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11736 skinny_level = (int)key.offset;
11737 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11739 struct btrfs_tree_block_info *info;
11741 info = (struct btrfs_tree_block_info *)(ei + 1);
11742 skinny_level = btrfs_tree_block_level(leaf, info);
11743 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11752 * Due to the feature of shared tree blocks, if the upper node
11753 * is a fs root or shared node, the extent of checked node may
11754 * not be updated until the next CoW.
11757 strict = should_check_extent_strictly(root, nrefs,
11759 if (!(btrfs_extent_flags(leaf, ei) &
11760 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11762 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11763 key.objectid, nodesize,
11764 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11765 err = BACKREF_MISMATCH;
11767 header_gen = btrfs_header_generation(eb);
11768 extent_gen = btrfs_extent_generation(leaf, ei);
11769 if (header_gen != extent_gen) {
11771 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11772 key.objectid, nodesize, header_gen,
11774 err = BACKREF_MISMATCH;
11776 if (level != skinny_level) {
11778 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11779 key.objectid, nodesize, level, skinny_level);
11780 err = BACKREF_MISMATCH;
11782 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11784 "extent[%llu %u] is referred by other roots than %llu",
11785 key.objectid, nodesize, root->objectid);
11786 err = BACKREF_MISMATCH;
11791 * Iterate the extent/metadata item to find the exact backref
11793 item_size = btrfs_item_size_nr(leaf, slot);
11794 ptr = (unsigned long)iref;
11795 end = (unsigned long)ei + item_size;
11797 while (ptr < end) {
11798 iref = (struct btrfs_extent_inline_ref *)ptr;
11799 type = btrfs_extent_inline_ref_type(leaf, iref);
11800 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11802 ret = check_extent_inline_ref(leaf, &key, iref);
11807 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11808 if (offset == root->objectid)
11810 if (!strict && owner == offset)
11812 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11814 * Backref of tree reloc root points to itself, no need
11815 * to check backref any more.
11817 * This may be an error of loop backref, but extent tree
11818 * checker should have already handled it.
11819 * Here we only need to avoid infinite iteration.
11821 if (offset == bytenr) {
11825 * Check if the backref points to valid
11828 found_ref = !check_tree_block_ref( root, NULL,
11829 offset, level + 1, owner,
11836 ptr += btrfs_extent_inline_ref_size(type);
11840 * Inlined extent item doesn't have what we need, check
11841 * TREE_BLOCK_REF_KEY
11844 btrfs_release_path(&path);
11845 key.objectid = bytenr;
11846 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11847 key.offset = root->objectid;
11849 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11854 * Finally check SHARED BLOCK REF, any found will be good
11855 * Here we're not doing comprehensive extent backref checking,
11856 * only need to ensure there is some extent referring to this
11860 btrfs_release_path(&path);
11861 key.objectid = bytenr;
11862 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11863 key.offset = (u64)-1;
11865 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11867 err |= BACKREF_MISSING;
11870 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11872 err |= BACKREF_MISSING;
11878 err |= BACKREF_MISSING;
11880 btrfs_release_path(&path);
11881 if (nrefs && strict &&
11882 level < root_level && nrefs->full_backref[level + 1])
11883 parent = nrefs->bytenr[level + 1];
11884 if (eb && (err & BACKREF_MISSING))
11886 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11887 bytenr, nodesize, owner, level,
11888 parent ? "parent" : "root",
11889 parent ? parent : root->objectid);
11894 * If @err contains BACKREF_MISSING then add extent of the
11895 * file_extent_data_item.
11897 * Returns error bits after reapir.
11899 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11900 struct btrfs_root *root,
11901 struct btrfs_path *pathp,
11902 struct node_refs *nrefs,
11905 struct btrfs_file_extent_item *fi;
11906 struct btrfs_key fi_key;
11907 struct btrfs_key key;
11908 struct btrfs_extent_item *ei;
11909 struct btrfs_path path;
11910 struct btrfs_root *extent_root = root->fs_info->extent_root;
11911 struct extent_buffer *eb;
11923 eb = pathp->nodes[0];
11924 slot = pathp->slots[0];
11925 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11926 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11928 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11929 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11932 file_offset = fi_key.offset;
11933 generation = btrfs_file_extent_generation(eb, fi);
11934 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11935 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11936 extent_offset = btrfs_file_extent_offset(eb, fi);
11937 offset = file_offset - extent_offset;
11939 /* now repair only adds backref */
11940 if ((err & BACKREF_MISSING) == 0)
11943 /* search extent item */
11944 key.objectid = disk_bytenr;
11945 key.type = BTRFS_EXTENT_ITEM_KEY;
11946 key.offset = num_bytes;
11948 btrfs_init_path(&path);
11949 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11955 /* insert an extent item */
11957 key.objectid = disk_bytenr;
11958 key.type = BTRFS_EXTENT_ITEM_KEY;
11959 key.offset = num_bytes;
11960 size = sizeof(*ei);
11962 btrfs_release_path(&path);
11963 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11967 eb = path.nodes[0];
11968 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11970 btrfs_set_extent_refs(eb, ei, 0);
11971 btrfs_set_extent_generation(eb, ei, generation);
11972 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11974 btrfs_mark_buffer_dirty(eb);
11975 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11977 btrfs_release_path(&path);
11980 if (nrefs->full_backref[0])
11981 parent = btrfs_header_bytenr(eb);
11985 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11987 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11991 "failed to increase extent data backref[%llu %llu] root %llu",
11992 disk_bytenr, num_bytes, root->objectid);
11995 printf("Add one extent data backref [%llu %llu]\n",
11996 disk_bytenr, num_bytes);
11999 err &= ~BACKREF_MISSING;
12002 error("can't repair root %llu extent data item[%llu %llu]",
12003 root->objectid, disk_bytenr, num_bytes);
12008 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
12010 * Return >0 any error found and output error message
12011 * Return 0 for no error found
12013 static int check_extent_data_item(struct btrfs_root *root,
12014 struct btrfs_path *pathp,
12015 struct node_refs *nrefs, int account_bytes)
12017 struct btrfs_file_extent_item *fi;
12018 struct extent_buffer *eb = pathp->nodes[0];
12019 struct btrfs_path path;
12020 struct btrfs_root *extent_root = root->fs_info->extent_root;
12021 struct btrfs_key fi_key;
12022 struct btrfs_key dbref_key;
12023 struct extent_buffer *leaf;
12024 struct btrfs_extent_item *ei;
12025 struct btrfs_extent_inline_ref *iref;
12026 struct btrfs_extent_data_ref *dref;
12029 u64 disk_num_bytes;
12030 u64 extent_num_bytes;
12037 int found_dbackref = 0;
12038 int slot = pathp->slots[0];
12043 btrfs_item_key_to_cpu(eb, &fi_key, slot);
12044 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12046 /* Nothing to check for hole and inline data extents */
12047 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12048 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12051 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12052 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12053 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12054 offset = btrfs_file_extent_offset(eb, fi);
12056 /* Check unaligned disk_num_bytes and num_bytes */
12057 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12059 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12060 fi_key.objectid, fi_key.offset, disk_num_bytes,
12061 root->fs_info->sectorsize);
12062 err |= BYTES_UNALIGNED;
12063 } else if (account_bytes) {
12064 data_bytes_allocated += disk_num_bytes;
12066 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12068 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12069 fi_key.objectid, fi_key.offset, extent_num_bytes,
12070 root->fs_info->sectorsize);
12071 err |= BYTES_UNALIGNED;
12072 } else if (account_bytes) {
12073 data_bytes_referenced += extent_num_bytes;
12075 owner = btrfs_header_owner(eb);
12077 /* Check the extent item of the file extent in extent tree */
12078 btrfs_init_path(&path);
12079 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12080 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12081 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12083 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12087 leaf = path.nodes[0];
12088 slot = path.slots[0];
12089 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12091 extent_flags = btrfs_extent_flags(leaf, ei);
12093 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12095 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12096 disk_bytenr, disk_num_bytes,
12097 BTRFS_EXTENT_FLAG_DATA);
12098 err |= BACKREF_MISMATCH;
12101 /* Check data backref inside that extent item */
12102 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12103 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12104 ptr = (unsigned long)iref;
12105 end = (unsigned long)ei + item_size;
12106 strict = should_check_extent_strictly(root, nrefs, -1);
12108 while (ptr < end) {
12112 bool match = false;
12114 iref = (struct btrfs_extent_inline_ref *)ptr;
12115 type = btrfs_extent_inline_ref_type(leaf, iref);
12116 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12118 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12123 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12124 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12125 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12126 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12128 if (ref_objectid == fi_key.objectid &&
12129 ref_offset == fi_key.offset - offset)
12131 if (ref_root == root->objectid && match)
12132 found_dbackref = 1;
12133 else if (!strict && owner == ref_root && match)
12134 found_dbackref = 1;
12135 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12136 found_dbackref = !check_tree_block_ref(root, NULL,
12137 btrfs_extent_inline_ref_offset(leaf, iref),
12141 if (found_dbackref)
12143 ptr += btrfs_extent_inline_ref_size(type);
12146 if (!found_dbackref) {
12147 btrfs_release_path(&path);
12149 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12150 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12151 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12152 dbref_key.offset = hash_extent_data_ref(root->objectid,
12153 fi_key.objectid, fi_key.offset - offset);
12155 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12156 &dbref_key, &path, 0, 0);
12158 found_dbackref = 1;
12162 btrfs_release_path(&path);
12165 * Neither inlined nor EXTENT_DATA_REF found, try
12166 * SHARED_DATA_REF as last chance.
12168 dbref_key.objectid = disk_bytenr;
12169 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12170 dbref_key.offset = eb->start;
12172 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12173 &dbref_key, &path, 0, 0);
12175 found_dbackref = 1;
12181 if (!found_dbackref)
12182 err |= BACKREF_MISSING;
12183 btrfs_release_path(&path);
12184 if (err & BACKREF_MISSING) {
12185 error("data extent[%llu %llu] backref lost",
12186 disk_bytenr, disk_num_bytes);
12192 * Get real tree block level for the case like shared block
12193 * Return >= 0 as tree level
12194 * Return <0 for error
12196 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12198 struct extent_buffer *eb;
12199 struct btrfs_path path;
12200 struct btrfs_key key;
12201 struct btrfs_extent_item *ei;
12208 /* Search extent tree for extent generation and level */
12209 key.objectid = bytenr;
12210 key.type = BTRFS_METADATA_ITEM_KEY;
12211 key.offset = (u64)-1;
12213 btrfs_init_path(&path);
12214 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12217 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12225 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12226 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12227 struct btrfs_extent_item);
12228 flags = btrfs_extent_flags(path.nodes[0], ei);
12229 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12234 /* Get transid for later read_tree_block() check */
12235 transid = btrfs_extent_generation(path.nodes[0], ei);
12237 /* Get backref level as one source */
12238 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12239 backref_level = key.offset;
12241 struct btrfs_tree_block_info *info;
12243 info = (struct btrfs_tree_block_info *)(ei + 1);
12244 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12246 btrfs_release_path(&path);
12248 /* Get level from tree block as an alternative source */
12249 eb = read_tree_block(fs_info, bytenr, transid);
12250 if (!extent_buffer_uptodate(eb)) {
12251 free_extent_buffer(eb);
12254 header_level = btrfs_header_level(eb);
12255 free_extent_buffer(eb);
12257 if (header_level != backref_level)
12259 return header_level;
12262 btrfs_release_path(&path);
12267 * Check if a tree block backref is valid (points to a valid tree block)
12268 * if level == -1, level will be resolved
12269 * Return >0 for any error found and print error message
12271 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12272 u64 bytenr, int level)
12274 struct btrfs_root *root;
12275 struct btrfs_key key;
12276 struct btrfs_path path;
12277 struct extent_buffer *eb;
12278 struct extent_buffer *node;
12279 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12283 /* Query level for level == -1 special case */
12285 level = query_tree_block_level(fs_info, bytenr);
12287 err |= REFERENCER_MISSING;
12291 key.objectid = root_id;
12292 key.type = BTRFS_ROOT_ITEM_KEY;
12293 key.offset = (u64)-1;
12295 root = btrfs_read_fs_root(fs_info, &key);
12296 if (IS_ERR(root)) {
12297 err |= REFERENCER_MISSING;
12301 /* Read out the tree block to get item/node key */
12302 eb = read_tree_block(fs_info, bytenr, 0);
12303 if (!extent_buffer_uptodate(eb)) {
12304 err |= REFERENCER_MISSING;
12305 free_extent_buffer(eb);
12309 /* Empty tree, no need to check key */
12310 if (!btrfs_header_nritems(eb) && !level) {
12311 free_extent_buffer(eb);
12316 btrfs_node_key_to_cpu(eb, &key, 0);
12318 btrfs_item_key_to_cpu(eb, &key, 0);
12320 free_extent_buffer(eb);
12322 btrfs_init_path(&path);
12323 path.lowest_level = level;
12324 /* Search with the first key, to ensure we can reach it */
12325 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12327 err |= REFERENCER_MISSING;
12331 node = path.nodes[level];
12332 if (btrfs_header_bytenr(node) != bytenr) {
12334 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12335 bytenr, nodesize, bytenr,
12336 btrfs_header_bytenr(node));
12337 err |= REFERENCER_MISMATCH;
12339 if (btrfs_header_level(node) != level) {
12341 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12342 bytenr, nodesize, level,
12343 btrfs_header_level(node));
12344 err |= REFERENCER_MISMATCH;
12348 btrfs_release_path(&path);
12350 if (err & REFERENCER_MISSING) {
12352 error("extent [%llu %d] lost referencer (owner: %llu)",
12353 bytenr, nodesize, root_id);
12356 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12357 bytenr, nodesize, root_id, level);
12364 * Check if tree block @eb is tree reloc root.
12365 * Return 0 if it's not or any problem happens
12366 * Return 1 if it's a tree reloc root
12368 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12369 struct extent_buffer *eb)
12371 struct btrfs_root *tree_reloc_root;
12372 struct btrfs_key key;
12373 u64 bytenr = btrfs_header_bytenr(eb);
12374 u64 owner = btrfs_header_owner(eb);
12377 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12378 key.offset = owner;
12379 key.type = BTRFS_ROOT_ITEM_KEY;
12381 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12382 if (IS_ERR(tree_reloc_root))
12385 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12387 btrfs_free_fs_root(tree_reloc_root);
12392 * Check referencer for shared block backref
12393 * If level == -1, this function will resolve the level.
12395 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12396 u64 parent, u64 bytenr, int level)
12398 struct extent_buffer *eb;
12400 int found_parent = 0;
12403 eb = read_tree_block(fs_info, parent, 0);
12404 if (!extent_buffer_uptodate(eb))
12408 level = query_tree_block_level(fs_info, bytenr);
12412 /* It's possible it's a tree reloc root */
12413 if (parent == bytenr) {
12414 if (is_tree_reloc_root(fs_info, eb))
12419 if (level + 1 != btrfs_header_level(eb))
12422 nr = btrfs_header_nritems(eb);
12423 for (i = 0; i < nr; i++) {
12424 if (bytenr == btrfs_node_blockptr(eb, i)) {
12430 free_extent_buffer(eb);
12431 if (!found_parent) {
12433 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12434 bytenr, fs_info->nodesize, parent, level);
12435 return REFERENCER_MISSING;
12441 * Check referencer for normal (inlined) data ref
12442 * If len == 0, it will be resolved by searching in extent tree
12444 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12445 u64 root_id, u64 objectid, u64 offset,
12446 u64 bytenr, u64 len, u32 count)
12448 struct btrfs_root *root;
12449 struct btrfs_root *extent_root = fs_info->extent_root;
12450 struct btrfs_key key;
12451 struct btrfs_path path;
12452 struct extent_buffer *leaf;
12453 struct btrfs_file_extent_item *fi;
12454 u32 found_count = 0;
12459 key.objectid = bytenr;
12460 key.type = BTRFS_EXTENT_ITEM_KEY;
12461 key.offset = (u64)-1;
12463 btrfs_init_path(&path);
12464 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12467 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12470 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12471 if (key.objectid != bytenr ||
12472 key.type != BTRFS_EXTENT_ITEM_KEY)
12475 btrfs_release_path(&path);
12477 key.objectid = root_id;
12478 key.type = BTRFS_ROOT_ITEM_KEY;
12479 key.offset = (u64)-1;
12480 btrfs_init_path(&path);
12482 root = btrfs_read_fs_root(fs_info, &key);
12486 key.objectid = objectid;
12487 key.type = BTRFS_EXTENT_DATA_KEY;
12489 * It can be nasty as data backref offset is
12490 * file offset - file extent offset, which is smaller or
12491 * equal to original backref offset. The only special case is
12492 * overflow. So we need to special check and do further search.
12494 key.offset = offset & (1ULL << 63) ? 0 : offset;
12496 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12501 * Search afterwards to get correct one
12502 * NOTE: As we must do a comprehensive check on the data backref to
12503 * make sure the dref count also matches, we must iterate all file
12504 * extents for that inode.
12507 leaf = path.nodes[0];
12508 slot = path.slots[0];
12510 if (slot >= btrfs_header_nritems(leaf) ||
12511 btrfs_header_owner(leaf) != root_id)
12513 btrfs_item_key_to_cpu(leaf, &key, slot);
12514 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12516 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12518 * Except normal disk bytenr and disk num bytes, we still
12519 * need to do extra check on dbackref offset as
12520 * dbackref offset = file_offset - file_extent_offset
12522 * Also, we must check the leaf owner.
12523 * In case of shared tree blocks (snapshots) we can inherit
12524 * leaves from source snapshot.
12525 * In that case, reference from source snapshot should not
12528 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12529 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12530 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12531 offset && btrfs_header_owner(leaf) == root_id)
12535 ret = btrfs_next_item(root, &path);
12540 btrfs_release_path(&path);
12541 if (found_count != count) {
12543 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12544 bytenr, len, root_id, objectid, offset, count, found_count);
12545 return REFERENCER_MISSING;
12551 * Check if the referencer of a shared data backref exists
12553 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12554 u64 parent, u64 bytenr)
12556 struct extent_buffer *eb;
12557 struct btrfs_key key;
12558 struct btrfs_file_extent_item *fi;
12560 int found_parent = 0;
12563 eb = read_tree_block(fs_info, parent, 0);
12564 if (!extent_buffer_uptodate(eb))
12567 nr = btrfs_header_nritems(eb);
12568 for (i = 0; i < nr; i++) {
12569 btrfs_item_key_to_cpu(eb, &key, i);
12570 if (key.type != BTRFS_EXTENT_DATA_KEY)
12573 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12574 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12577 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12584 free_extent_buffer(eb);
12585 if (!found_parent) {
12586 error("shared extent %llu referencer lost (parent: %llu)",
12588 return REFERENCER_MISSING;
12594 * Only delete backref if REFERENCER_MISSING now
12596 * Returns <0 the extent was deleted
12597 * Returns >0 the backref was deleted but extent still exists, returned value
12598 * means error after repair
12599 * Returns 0 nothing happened
12601 static int repair_extent_item(struct btrfs_trans_handle *trans,
12602 struct btrfs_root *root, struct btrfs_path *path,
12603 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12604 u64 owner, u64 offset, int err)
12606 struct btrfs_key old_key;
12610 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12612 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12613 /* delete the backref */
12614 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12615 num_bytes, parent, root_objectid, owner, offset);
12618 err &= ~REFERENCER_MISSING;
12619 printf("Delete backref in extent [%llu %llu]\n",
12620 bytenr, num_bytes);
12622 error("fail to delete backref in extent [%llu %llu]",
12623 bytenr, num_bytes);
12627 /* btrfs_free_extent may delete the extent */
12628 btrfs_release_path(path);
12629 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12639 * This function will check a given extent item, including its backref and
12640 * itself (like crossing stripe boundary and type)
12642 * Since we don't use extent_record anymore, introduce new error bit
12644 static int check_extent_item(struct btrfs_trans_handle *trans,
12645 struct btrfs_fs_info *fs_info,
12646 struct btrfs_path *path)
12648 struct btrfs_extent_item *ei;
12649 struct btrfs_extent_inline_ref *iref;
12650 struct btrfs_extent_data_ref *dref;
12651 struct extent_buffer *eb = path->nodes[0];
12654 int slot = path->slots[0];
12656 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12657 u32 item_size = btrfs_item_size_nr(eb, slot);
12667 struct btrfs_key key;
12671 btrfs_item_key_to_cpu(eb, &key, slot);
12672 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12673 bytes_used += key.offset;
12674 num_bytes = key.offset;
12676 bytes_used += nodesize;
12677 num_bytes = nodesize;
12680 if (item_size < sizeof(*ei)) {
12682 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12683 * old thing when on disk format is still un-determined.
12684 * No need to care about it anymore
12686 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12690 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12691 flags = btrfs_extent_flags(eb, ei);
12693 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12695 if (metadata && check_crossing_stripes(global_info, key.objectid,
12697 error("bad metadata [%llu, %llu) crossing stripe boundary",
12698 key.objectid, key.objectid + nodesize);
12699 err |= CROSSING_STRIPE_BOUNDARY;
12702 ptr = (unsigned long)(ei + 1);
12704 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12705 /* Old EXTENT_ITEM metadata */
12706 struct btrfs_tree_block_info *info;
12708 info = (struct btrfs_tree_block_info *)ptr;
12709 level = btrfs_tree_block_level(eb, info);
12710 ptr += sizeof(struct btrfs_tree_block_info);
12712 /* New METADATA_ITEM */
12713 level = key.offset;
12715 end = (unsigned long)ei + item_size;
12718 /* Reached extent item end normally */
12722 /* Beyond extent item end, wrong item size */
12724 err |= ITEM_SIZE_MISMATCH;
12725 error("extent item at bytenr %llu slot %d has wrong size",
12734 /* Now check every backref in this extent item */
12735 iref = (struct btrfs_extent_inline_ref *)ptr;
12736 type = btrfs_extent_inline_ref_type(eb, iref);
12737 offset = btrfs_extent_inline_ref_offset(eb, iref);
12739 case BTRFS_TREE_BLOCK_REF_KEY:
12740 root_objectid = offset;
12742 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12746 case BTRFS_SHARED_BLOCK_REF_KEY:
12748 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12752 case BTRFS_EXTENT_DATA_REF_KEY:
12753 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12754 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12755 owner = btrfs_extent_data_ref_objectid(eb, dref);
12756 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12757 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12758 owner_offset, key.objectid, key.offset,
12759 btrfs_extent_data_ref_count(eb, dref));
12762 case BTRFS_SHARED_DATA_REF_KEY:
12764 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12768 error("extent[%llu %d %llu] has unknown ref type: %d",
12769 key.objectid, key.type, key.offset, type);
12770 ret = UNKNOWN_TYPE;
12775 if (err && repair) {
12776 ret = repair_extent_item(trans, fs_info->extent_root, path,
12777 key.objectid, num_bytes, parent, root_objectid,
12778 owner, owner_offset, ret);
12787 ptr += btrfs_extent_inline_ref_size(type);
12795 * Check if a dev extent item is referred correctly by its chunk
12797 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12798 struct extent_buffer *eb, int slot)
12800 struct btrfs_root *chunk_root = fs_info->chunk_root;
12801 struct btrfs_dev_extent *ptr;
12802 struct btrfs_path path;
12803 struct btrfs_key chunk_key;
12804 struct btrfs_key devext_key;
12805 struct btrfs_chunk *chunk;
12806 struct extent_buffer *l;
12810 int found_chunk = 0;
12813 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12814 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12815 length = btrfs_dev_extent_length(eb, ptr);
12817 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12818 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12819 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12821 btrfs_init_path(&path);
12822 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12827 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12828 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12833 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12836 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12837 for (i = 0; i < num_stripes; i++) {
12838 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12839 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12841 if (devid == devext_key.objectid &&
12842 offset == devext_key.offset) {
12848 btrfs_release_path(&path);
12849 if (!found_chunk) {
12851 "device extent[%llu, %llu, %llu] did not find the related chunk",
12852 devext_key.objectid, devext_key.offset, length);
12853 return REFERENCER_MISSING;
12859 * Check if the used space is correct with the dev item
12861 static int check_dev_item(struct btrfs_fs_info *fs_info,
12862 struct extent_buffer *eb, int slot)
12864 struct btrfs_root *dev_root = fs_info->dev_root;
12865 struct btrfs_dev_item *dev_item;
12866 struct btrfs_path path;
12867 struct btrfs_key key;
12868 struct btrfs_dev_extent *ptr;
12875 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12876 dev_id = btrfs_device_id(eb, dev_item);
12877 used = btrfs_device_bytes_used(eb, dev_item);
12878 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12880 key.objectid = dev_id;
12881 key.type = BTRFS_DEV_EXTENT_KEY;
12884 btrfs_init_path(&path);
12885 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12887 btrfs_item_key_to_cpu(eb, &key, slot);
12888 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12889 key.objectid, key.type, key.offset);
12890 btrfs_release_path(&path);
12891 return REFERENCER_MISSING;
12894 /* Iterate dev_extents to calculate the used space of a device */
12896 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12899 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12900 if (key.objectid > dev_id)
12902 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12905 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12906 struct btrfs_dev_extent);
12907 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12909 ret = btrfs_next_item(dev_root, &path);
12913 btrfs_release_path(&path);
12915 if (used != total) {
12916 btrfs_item_key_to_cpu(eb, &key, slot);
12918 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12919 total, used, BTRFS_ROOT_TREE_OBJECTID,
12920 BTRFS_DEV_EXTENT_KEY, dev_id);
12921 return ACCOUNTING_MISMATCH;
12923 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12929 * Check a block group item with its referener (chunk) and its used space
12930 * with extent/metadata item
12932 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12933 struct extent_buffer *eb, int slot)
12935 struct btrfs_root *extent_root = fs_info->extent_root;
12936 struct btrfs_root *chunk_root = fs_info->chunk_root;
12937 struct btrfs_block_group_item *bi;
12938 struct btrfs_block_group_item bg_item;
12939 struct btrfs_path path;
12940 struct btrfs_key bg_key;
12941 struct btrfs_key chunk_key;
12942 struct btrfs_key extent_key;
12943 struct btrfs_chunk *chunk;
12944 struct extent_buffer *leaf;
12945 struct btrfs_extent_item *ei;
12946 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12954 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12955 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12956 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12957 used = btrfs_block_group_used(&bg_item);
12958 bg_flags = btrfs_block_group_flags(&bg_item);
12960 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12961 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12962 chunk_key.offset = bg_key.objectid;
12964 btrfs_init_path(&path);
12965 /* Search for the referencer chunk */
12966 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12969 "block group[%llu %llu] did not find the related chunk item",
12970 bg_key.objectid, bg_key.offset);
12971 err |= REFERENCER_MISSING;
12973 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12974 struct btrfs_chunk);
12975 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12978 "block group[%llu %llu] related chunk item length does not match",
12979 bg_key.objectid, bg_key.offset);
12980 err |= REFERENCER_MISMATCH;
12983 btrfs_release_path(&path);
12985 /* Search from the block group bytenr */
12986 extent_key.objectid = bg_key.objectid;
12987 extent_key.type = 0;
12988 extent_key.offset = 0;
12990 btrfs_init_path(&path);
12991 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12995 /* Iterate extent tree to account used space */
12997 leaf = path.nodes[0];
12999 /* Search slot can point to the last item beyond leaf nritems */
13000 if (path.slots[0] >= btrfs_header_nritems(leaf))
13003 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
13004 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
13007 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
13008 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
13010 if (extent_key.objectid < bg_key.objectid)
13013 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
13016 total += extent_key.offset;
13018 ei = btrfs_item_ptr(leaf, path.slots[0],
13019 struct btrfs_extent_item);
13020 flags = btrfs_extent_flags(leaf, ei);
13021 if (flags & BTRFS_EXTENT_FLAG_DATA) {
13022 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
13024 "bad extent[%llu, %llu) type mismatch with chunk",
13025 extent_key.objectid,
13026 extent_key.objectid + extent_key.offset);
13027 err |= CHUNK_TYPE_MISMATCH;
13029 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13030 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13031 BTRFS_BLOCK_GROUP_METADATA))) {
13033 "bad extent[%llu, %llu) type mismatch with chunk",
13034 extent_key.objectid,
13035 extent_key.objectid + nodesize);
13036 err |= CHUNK_TYPE_MISMATCH;
13040 ret = btrfs_next_item(extent_root, &path);
13046 btrfs_release_path(&path);
13048 if (total != used) {
13050 "block group[%llu %llu] used %llu but extent items used %llu",
13051 bg_key.objectid, bg_key.offset, used, total);
13052 err |= BG_ACCOUNTING_ERROR;
13058 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13059 * FIXME: We still need to repair error of dev_item.
13061 * Returns error after repair.
13063 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13064 struct btrfs_root *chunk_root,
13065 struct btrfs_path *path, int err)
13067 struct btrfs_chunk *chunk;
13068 struct btrfs_key chunk_key;
13069 struct extent_buffer *eb = path->nodes[0];
13071 int slot = path->slots[0];
13075 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13076 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13078 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13079 type = btrfs_chunk_type(path->nodes[0], chunk);
13080 length = btrfs_chunk_length(eb, chunk);
13082 if (err & REFERENCER_MISSING) {
13083 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13084 type, chunk_key.objectid, chunk_key.offset, length);
13086 error("fail to add block group item[%llu %llu]",
13087 chunk_key.offset, length);
13090 err &= ~REFERENCER_MISSING;
13091 printf("Added block group item[%llu %llu]\n",
13092 chunk_key.offset, length);
13101 * Check a chunk item.
13102 * Including checking all referred dev_extents and block group
13104 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13105 struct extent_buffer *eb, int slot)
13107 struct btrfs_root *extent_root = fs_info->extent_root;
13108 struct btrfs_root *dev_root = fs_info->dev_root;
13109 struct btrfs_path path;
13110 struct btrfs_key chunk_key;
13111 struct btrfs_key bg_key;
13112 struct btrfs_key devext_key;
13113 struct btrfs_chunk *chunk;
13114 struct extent_buffer *leaf;
13115 struct btrfs_block_group_item *bi;
13116 struct btrfs_block_group_item bg_item;
13117 struct btrfs_dev_extent *ptr;
13129 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13130 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13131 length = btrfs_chunk_length(eb, chunk);
13132 chunk_end = chunk_key.offset + length;
13133 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13136 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13138 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13141 type = btrfs_chunk_type(eb, chunk);
13143 bg_key.objectid = chunk_key.offset;
13144 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13145 bg_key.offset = length;
13147 btrfs_init_path(&path);
13148 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13151 "chunk[%llu %llu) did not find the related block group item",
13152 chunk_key.offset, chunk_end);
13153 err |= REFERENCER_MISSING;
13155 leaf = path.nodes[0];
13156 bi = btrfs_item_ptr(leaf, path.slots[0],
13157 struct btrfs_block_group_item);
13158 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13160 if (btrfs_block_group_flags(&bg_item) != type) {
13162 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13163 chunk_key.offset, chunk_end, type,
13164 btrfs_block_group_flags(&bg_item));
13165 err |= REFERENCER_MISSING;
13169 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13170 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13171 for (i = 0; i < num_stripes; i++) {
13172 btrfs_release_path(&path);
13173 btrfs_init_path(&path);
13174 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13175 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13176 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13178 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13181 goto not_match_dev;
13183 leaf = path.nodes[0];
13184 ptr = btrfs_item_ptr(leaf, path.slots[0],
13185 struct btrfs_dev_extent);
13186 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13187 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13188 if (objectid != chunk_key.objectid ||
13189 offset != chunk_key.offset ||
13190 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13191 goto not_match_dev;
13194 err |= BACKREF_MISSING;
13196 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13197 chunk_key.objectid, chunk_end, i);
13200 btrfs_release_path(&path);
13205 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13206 struct btrfs_root *root,
13207 struct btrfs_path *path)
13209 struct btrfs_key key;
13212 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13213 btrfs_release_path(path);
13214 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13220 ret = btrfs_del_item(trans, root, path);
13224 if (path->slots[0] == 0)
13225 btrfs_prev_leaf(root, path);
13230 error("failed to delete root %llu item[%llu, %u, %llu]",
13231 root->objectid, key.objectid, key.type, key.offset);
13233 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13234 root->objectid, key.objectid, key.type, key.offset);
13239 * Main entry function to check known items and update related accounting info
13241 static int check_leaf_items(struct btrfs_trans_handle *trans,
13242 struct btrfs_root *root, struct btrfs_path *path,
13243 struct node_refs *nrefs, int account_bytes)
13245 struct btrfs_fs_info *fs_info = root->fs_info;
13246 struct btrfs_key key;
13247 struct extent_buffer *eb;
13250 struct btrfs_extent_data_ref *dref;
13255 eb = path->nodes[0];
13256 slot = path->slots[0];
13257 if (slot >= btrfs_header_nritems(eb)) {
13259 error("empty leaf [%llu %u] root %llu", eb->start,
13260 root->fs_info->nodesize, root->objectid);
13266 btrfs_item_key_to_cpu(eb, &key, slot);
13270 case BTRFS_EXTENT_DATA_KEY:
13271 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13273 ret = repair_extent_data_item(trans, root, path, nrefs,
13277 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13278 ret = check_block_group_item(fs_info, eb, slot);
13280 ret & REFERENCER_MISSING)
13281 ret = delete_extent_tree_item(trans, root, path);
13284 case BTRFS_DEV_ITEM_KEY:
13285 ret = check_dev_item(fs_info, eb, slot);
13288 case BTRFS_CHUNK_ITEM_KEY:
13289 ret = check_chunk_item(fs_info, eb, slot);
13291 ret = repair_chunk_item(trans, root, path, ret);
13294 case BTRFS_DEV_EXTENT_KEY:
13295 ret = check_dev_extent_item(fs_info, eb, slot);
13298 case BTRFS_EXTENT_ITEM_KEY:
13299 case BTRFS_METADATA_ITEM_KEY:
13300 ret = check_extent_item(trans, fs_info, path);
13303 case BTRFS_EXTENT_CSUM_KEY:
13304 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13307 case BTRFS_TREE_BLOCK_REF_KEY:
13308 ret = check_tree_block_backref(fs_info, key.offset,
13311 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13312 ret = delete_extent_tree_item(trans, root, path);
13315 case BTRFS_EXTENT_DATA_REF_KEY:
13316 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13317 ret = check_extent_data_backref(fs_info,
13318 btrfs_extent_data_ref_root(eb, dref),
13319 btrfs_extent_data_ref_objectid(eb, dref),
13320 btrfs_extent_data_ref_offset(eb, dref),
13322 btrfs_extent_data_ref_count(eb, dref));
13324 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13325 ret = delete_extent_tree_item(trans, root, path);
13328 case BTRFS_SHARED_BLOCK_REF_KEY:
13329 ret = check_shared_block_backref(fs_info, key.offset,
13332 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13333 ret = delete_extent_tree_item(trans, root, path);
13336 case BTRFS_SHARED_DATA_REF_KEY:
13337 ret = check_shared_data_backref(fs_info, key.offset,
13340 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13341 ret = delete_extent_tree_item(trans, root, path);
13355 * Low memory usage version check_chunks_and_extents.
13357 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13359 struct btrfs_trans_handle *trans = NULL;
13360 struct btrfs_path path;
13361 struct btrfs_key old_key;
13362 struct btrfs_key key;
13363 struct btrfs_root *root1;
13364 struct btrfs_root *root;
13365 struct btrfs_root *cur_root;
13369 root = fs_info->fs_root;
13372 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13373 if (IS_ERR(trans)) {
13374 error("failed to start transaction before check");
13375 return PTR_ERR(trans);
13379 root1 = root->fs_info->chunk_root;
13380 ret = check_btrfs_root(trans, root1, 0, 1);
13383 root1 = root->fs_info->tree_root;
13384 ret = check_btrfs_root(trans, root1, 0, 1);
13387 btrfs_init_path(&path);
13388 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13390 key.type = BTRFS_ROOT_ITEM_KEY;
13392 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13394 error("cannot find extent tree in tree_root");
13399 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13400 if (key.type != BTRFS_ROOT_ITEM_KEY)
13403 key.offset = (u64)-1;
13405 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13406 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13409 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13410 if (IS_ERR(cur_root) || !cur_root) {
13411 error("failed to read tree: %lld", key.objectid);
13415 ret = check_btrfs_root(trans, cur_root, 0, 1);
13418 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13419 btrfs_free_fs_root(cur_root);
13421 btrfs_release_path(&path);
13422 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13423 &old_key, &path, 0, 0);
13427 ret = btrfs_next_item(root1, &path);
13433 /* if repair, update block accounting */
13435 ret = btrfs_fix_block_accounting(trans, root);
13439 err &= ~BG_ACCOUNTING_ERROR;
13443 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13445 btrfs_release_path(&path);
13450 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13454 if (!ctx.progress_enabled)
13455 fprintf(stderr, "checking extents\n");
13456 if (check_mode == CHECK_MODE_LOWMEM)
13457 ret = check_chunks_and_extents_v2(fs_info);
13459 ret = check_chunks_and_extents(fs_info);
13461 /* Also repair device size related problems */
13462 if (repair && !ret) {
13463 ret = btrfs_fix_device_and_super_size(fs_info);
13470 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13471 struct btrfs_root *root, int overwrite)
13473 struct extent_buffer *c;
13474 struct extent_buffer *old = root->node;
13477 struct btrfs_disk_key disk_key = {0,0,0};
13483 extent_buffer_get(c);
13486 c = btrfs_alloc_free_block(trans, root,
13487 root->fs_info->nodesize,
13488 root->root_key.objectid,
13489 &disk_key, level, 0, 0);
13492 extent_buffer_get(c);
13496 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13497 btrfs_set_header_level(c, level);
13498 btrfs_set_header_bytenr(c, c->start);
13499 btrfs_set_header_generation(c, trans->transid);
13500 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13501 btrfs_set_header_owner(c, root->root_key.objectid);
13503 write_extent_buffer(c, root->fs_info->fsid,
13504 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13506 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13507 btrfs_header_chunk_tree_uuid(c),
13510 btrfs_mark_buffer_dirty(c);
13512 * this case can happen in the following case:
13514 * 1.overwrite previous root.
13516 * 2.reinit reloc data root, this is because we skip pin
13517 * down reloc data tree before which means we can allocate
13518 * same block bytenr here.
13520 if (old->start == c->start) {
13521 btrfs_set_root_generation(&root->root_item,
13523 root->root_item.level = btrfs_header_level(root->node);
13524 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13525 &root->root_key, &root->root_item);
13527 free_extent_buffer(c);
13531 free_extent_buffer(old);
13533 add_root_to_dirty_list(root);
13537 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13538 struct extent_buffer *eb, int tree_root)
13540 struct extent_buffer *tmp;
13541 struct btrfs_root_item *ri;
13542 struct btrfs_key key;
13544 int level = btrfs_header_level(eb);
13550 * If we have pinned this block before, don't pin it again.
13551 * This can not only avoid forever loop with broken filesystem
13552 * but also give us some speedups.
13554 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13555 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13558 btrfs_pin_extent(fs_info, eb->start, eb->len);
13560 nritems = btrfs_header_nritems(eb);
13561 for (i = 0; i < nritems; i++) {
13563 btrfs_item_key_to_cpu(eb, &key, i);
13564 if (key.type != BTRFS_ROOT_ITEM_KEY)
13566 /* Skip the extent root and reloc roots */
13567 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13568 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13569 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13571 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13572 bytenr = btrfs_disk_root_bytenr(eb, ri);
13575 * If at any point we start needing the real root we
13576 * will have to build a stump root for the root we are
13577 * in, but for now this doesn't actually use the root so
13578 * just pass in extent_root.
13580 tmp = read_tree_block(fs_info, bytenr, 0);
13581 if (!extent_buffer_uptodate(tmp)) {
13582 fprintf(stderr, "Error reading root block\n");
13585 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13586 free_extent_buffer(tmp);
13590 bytenr = btrfs_node_blockptr(eb, i);
13592 /* If we aren't the tree root don't read the block */
13593 if (level == 1 && !tree_root) {
13594 btrfs_pin_extent(fs_info, bytenr,
13595 fs_info->nodesize);
13599 tmp = read_tree_block(fs_info, bytenr, 0);
13600 if (!extent_buffer_uptodate(tmp)) {
13601 fprintf(stderr, "Error reading tree block\n");
13604 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13605 free_extent_buffer(tmp);
13614 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13618 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13622 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13625 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13627 struct btrfs_block_group_cache *cache;
13628 struct btrfs_path path;
13629 struct extent_buffer *leaf;
13630 struct btrfs_chunk *chunk;
13631 struct btrfs_key key;
13635 btrfs_init_path(&path);
13637 key.type = BTRFS_CHUNK_ITEM_KEY;
13639 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13641 btrfs_release_path(&path);
13646 * We do this in case the block groups were screwed up and had alloc
13647 * bits that aren't actually set on the chunks. This happens with
13648 * restored images every time and could happen in real life I guess.
13650 fs_info->avail_data_alloc_bits = 0;
13651 fs_info->avail_metadata_alloc_bits = 0;
13652 fs_info->avail_system_alloc_bits = 0;
13654 /* First we need to create the in-memory block groups */
13656 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13657 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13659 btrfs_release_path(&path);
13667 leaf = path.nodes[0];
13668 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13669 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13674 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13675 btrfs_add_block_group(fs_info, 0,
13676 btrfs_chunk_type(leaf, chunk),
13677 key.objectid, key.offset,
13678 btrfs_chunk_length(leaf, chunk));
13679 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13680 key.offset + btrfs_chunk_length(leaf, chunk));
13685 cache = btrfs_lookup_first_block_group(fs_info, start);
13689 start = cache->key.objectid + cache->key.offset;
13692 btrfs_release_path(&path);
13696 static int reset_balance(struct btrfs_trans_handle *trans,
13697 struct btrfs_fs_info *fs_info)
13699 struct btrfs_root *root = fs_info->tree_root;
13700 struct btrfs_path path;
13701 struct extent_buffer *leaf;
13702 struct btrfs_key key;
13703 int del_slot, del_nr = 0;
13707 btrfs_init_path(&path);
13708 key.objectid = BTRFS_BALANCE_OBJECTID;
13709 key.type = BTRFS_BALANCE_ITEM_KEY;
13711 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13716 goto reinit_data_reloc;
13721 ret = btrfs_del_item(trans, root, &path);
13724 btrfs_release_path(&path);
13726 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13727 key.type = BTRFS_ROOT_ITEM_KEY;
13729 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13733 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13738 ret = btrfs_del_items(trans, root, &path,
13745 btrfs_release_path(&path);
13748 ret = btrfs_search_slot(trans, root, &key, &path,
13755 leaf = path.nodes[0];
13756 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13757 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13759 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13764 del_slot = path.slots[0];
13773 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13777 btrfs_release_path(&path);
13780 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13781 key.type = BTRFS_ROOT_ITEM_KEY;
13782 key.offset = (u64)-1;
13783 root = btrfs_read_fs_root(fs_info, &key);
13784 if (IS_ERR(root)) {
13785 fprintf(stderr, "Error reading data reloc tree\n");
13786 ret = PTR_ERR(root);
13789 record_root_in_trans(trans, root);
13790 ret = btrfs_fsck_reinit_root(trans, root, 0);
13793 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13795 btrfs_release_path(&path);
13799 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13800 struct btrfs_fs_info *fs_info)
13806 * The only reason we don't do this is because right now we're just
13807 * walking the trees we find and pinning down their bytes, we don't look
13808 * at any of the leaves. In order to do mixed groups we'd have to check
13809 * the leaves of any fs roots and pin down the bytes for any file
13810 * extents we find. Not hard but why do it if we don't have to?
13812 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13813 fprintf(stderr, "We don't support re-initing the extent tree "
13814 "for mixed block groups yet, please notify a btrfs "
13815 "developer you want to do this so they can add this "
13816 "functionality.\n");
13821 * first we need to walk all of the trees except the extent tree and pin
13822 * down the bytes that are in use so we don't overwrite any existing
13825 ret = pin_metadata_blocks(fs_info);
13827 fprintf(stderr, "error pinning down used bytes\n");
13832 * Need to drop all the block groups since we're going to recreate all
13835 btrfs_free_block_groups(fs_info);
13836 ret = reset_block_groups(fs_info);
13838 fprintf(stderr, "error resetting the block groups\n");
13842 /* Ok we can allocate now, reinit the extent root */
13843 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13845 fprintf(stderr, "extent root initialization failed\n");
13847 * When the transaction code is updated we should end the
13848 * transaction, but for now progs only knows about commit so
13849 * just return an error.
13855 * Now we have all the in-memory block groups setup so we can make
13856 * allocations properly, and the metadata we care about is safe since we
13857 * pinned all of it above.
13860 struct btrfs_block_group_cache *cache;
13862 cache = btrfs_lookup_first_block_group(fs_info, start);
13865 start = cache->key.objectid + cache->key.offset;
13866 ret = btrfs_insert_item(trans, fs_info->extent_root,
13867 &cache->key, &cache->item,
13868 sizeof(cache->item));
13870 fprintf(stderr, "Error adding block group\n");
13873 btrfs_extent_post_op(trans, fs_info->extent_root);
13876 ret = reset_balance(trans, fs_info);
13878 fprintf(stderr, "error resetting the pending balance\n");
13883 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13885 struct btrfs_path path;
13886 struct btrfs_trans_handle *trans;
13887 struct btrfs_key key;
13890 printf("Recowing metadata block %llu\n", eb->start);
13891 key.objectid = btrfs_header_owner(eb);
13892 key.type = BTRFS_ROOT_ITEM_KEY;
13893 key.offset = (u64)-1;
13895 root = btrfs_read_fs_root(root->fs_info, &key);
13896 if (IS_ERR(root)) {
13897 fprintf(stderr, "Couldn't find owner root %llu\n",
13899 return PTR_ERR(root);
13902 trans = btrfs_start_transaction(root, 1);
13904 return PTR_ERR(trans);
13906 btrfs_init_path(&path);
13907 path.lowest_level = btrfs_header_level(eb);
13908 if (path.lowest_level)
13909 btrfs_node_key_to_cpu(eb, &key, 0);
13911 btrfs_item_key_to_cpu(eb, &key, 0);
13913 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13914 btrfs_commit_transaction(trans, root);
13915 btrfs_release_path(&path);
13919 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13921 struct btrfs_path path;
13922 struct btrfs_trans_handle *trans;
13923 struct btrfs_key key;
13926 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13927 bad->key.type, bad->key.offset);
13928 key.objectid = bad->root_id;
13929 key.type = BTRFS_ROOT_ITEM_KEY;
13930 key.offset = (u64)-1;
13932 root = btrfs_read_fs_root(root->fs_info, &key);
13933 if (IS_ERR(root)) {
13934 fprintf(stderr, "Couldn't find owner root %llu\n",
13936 return PTR_ERR(root);
13939 trans = btrfs_start_transaction(root, 1);
13941 return PTR_ERR(trans);
13943 btrfs_init_path(&path);
13944 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13950 ret = btrfs_del_item(trans, root, &path);
13952 btrfs_commit_transaction(trans, root);
13953 btrfs_release_path(&path);
13957 static int zero_log_tree(struct btrfs_root *root)
13959 struct btrfs_trans_handle *trans;
13962 trans = btrfs_start_transaction(root, 1);
13963 if (IS_ERR(trans)) {
13964 ret = PTR_ERR(trans);
13967 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13968 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13969 ret = btrfs_commit_transaction(trans, root);
13973 static int populate_csum(struct btrfs_trans_handle *trans,
13974 struct btrfs_root *csum_root, char *buf, u64 start,
13977 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13982 while (offset < len) {
13983 sectorsize = fs_info->sectorsize;
13984 ret = read_extent_data(fs_info, buf, start + offset,
13988 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13989 start + offset, buf, sectorsize);
13992 offset += sectorsize;
13997 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13998 struct btrfs_root *csum_root,
13999 struct btrfs_root *cur_root)
14001 struct btrfs_path path;
14002 struct btrfs_key key;
14003 struct extent_buffer *node;
14004 struct btrfs_file_extent_item *fi;
14011 buf = malloc(cur_root->fs_info->sectorsize);
14015 btrfs_init_path(&path);
14019 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
14022 /* Iterate all regular file extents and fill its csum */
14024 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
14026 if (key.type != BTRFS_EXTENT_DATA_KEY)
14028 node = path.nodes[0];
14029 slot = path.slots[0];
14030 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14031 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14033 start = btrfs_file_extent_disk_bytenr(node, fi);
14034 len = btrfs_file_extent_disk_num_bytes(node, fi);
14036 ret = populate_csum(trans, csum_root, buf, start, len);
14037 if (ret == -EEXIST)
14043 * TODO: if next leaf is corrupted, jump to nearest next valid
14046 ret = btrfs_next_item(cur_root, &path);
14056 btrfs_release_path(&path);
14061 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14062 struct btrfs_root *csum_root)
14064 struct btrfs_fs_info *fs_info = csum_root->fs_info;
14065 struct btrfs_path path;
14066 struct btrfs_root *tree_root = fs_info->tree_root;
14067 struct btrfs_root *cur_root;
14068 struct extent_buffer *node;
14069 struct btrfs_key key;
14073 btrfs_init_path(&path);
14074 key.objectid = BTRFS_FS_TREE_OBJECTID;
14076 key.type = BTRFS_ROOT_ITEM_KEY;
14077 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14086 node = path.nodes[0];
14087 slot = path.slots[0];
14088 btrfs_item_key_to_cpu(node, &key, slot);
14089 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14091 if (key.type != BTRFS_ROOT_ITEM_KEY)
14093 if (!is_fstree(key.objectid))
14095 key.offset = (u64)-1;
14097 cur_root = btrfs_read_fs_root(fs_info, &key);
14098 if (IS_ERR(cur_root) || !cur_root) {
14099 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14103 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14108 ret = btrfs_next_item(tree_root, &path);
14118 btrfs_release_path(&path);
14122 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14123 struct btrfs_root *csum_root)
14125 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14126 struct btrfs_path path;
14127 struct btrfs_extent_item *ei;
14128 struct extent_buffer *leaf;
14130 struct btrfs_key key;
14133 btrfs_init_path(&path);
14135 key.type = BTRFS_EXTENT_ITEM_KEY;
14137 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14139 btrfs_release_path(&path);
14143 buf = malloc(csum_root->fs_info->sectorsize);
14145 btrfs_release_path(&path);
14150 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14151 ret = btrfs_next_leaf(extent_root, &path);
14159 leaf = path.nodes[0];
14161 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14162 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14167 ei = btrfs_item_ptr(leaf, path.slots[0],
14168 struct btrfs_extent_item);
14169 if (!(btrfs_extent_flags(leaf, ei) &
14170 BTRFS_EXTENT_FLAG_DATA)) {
14175 ret = populate_csum(trans, csum_root, buf, key.objectid,
14182 btrfs_release_path(&path);
14188 * Recalculate the csum and put it into the csum tree.
14190 * Extent tree init will wipe out all the extent info, so in that case, we
14191 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14192 * will use fs/subvol trees to init the csum tree.
14194 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14195 struct btrfs_root *csum_root,
14196 int search_fs_tree)
14198 if (search_fs_tree)
14199 return fill_csum_tree_from_fs(trans, csum_root);
14201 return fill_csum_tree_from_extent(trans, csum_root);
14204 static void free_roots_info_cache(void)
14206 if (!roots_info_cache)
14209 while (!cache_tree_empty(roots_info_cache)) {
14210 struct cache_extent *entry;
14211 struct root_item_info *rii;
14213 entry = first_cache_extent(roots_info_cache);
14216 remove_cache_extent(roots_info_cache, entry);
14217 rii = container_of(entry, struct root_item_info, cache_extent);
14221 free(roots_info_cache);
14222 roots_info_cache = NULL;
14225 static int build_roots_info_cache(struct btrfs_fs_info *info)
14228 struct btrfs_key key;
14229 struct extent_buffer *leaf;
14230 struct btrfs_path path;
14232 if (!roots_info_cache) {
14233 roots_info_cache = malloc(sizeof(*roots_info_cache));
14234 if (!roots_info_cache)
14236 cache_tree_init(roots_info_cache);
14239 btrfs_init_path(&path);
14241 key.type = BTRFS_EXTENT_ITEM_KEY;
14243 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14246 leaf = path.nodes[0];
14249 struct btrfs_key found_key;
14250 struct btrfs_extent_item *ei;
14251 struct btrfs_extent_inline_ref *iref;
14252 int slot = path.slots[0];
14257 struct cache_extent *entry;
14258 struct root_item_info *rii;
14260 if (slot >= btrfs_header_nritems(leaf)) {
14261 ret = btrfs_next_leaf(info->extent_root, &path);
14268 leaf = path.nodes[0];
14269 slot = path.slots[0];
14272 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14274 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14275 found_key.type != BTRFS_METADATA_ITEM_KEY)
14278 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14279 flags = btrfs_extent_flags(leaf, ei);
14281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14282 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14285 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14286 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14287 level = found_key.offset;
14289 struct btrfs_tree_block_info *binfo;
14291 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14292 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14293 level = btrfs_tree_block_level(leaf, binfo);
14297 * For a root extent, it must be of the following type and the
14298 * first (and only one) iref in the item.
14300 type = btrfs_extent_inline_ref_type(leaf, iref);
14301 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14304 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14305 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14307 rii = malloc(sizeof(struct root_item_info));
14312 rii->cache_extent.start = root_id;
14313 rii->cache_extent.size = 1;
14314 rii->level = (u8)-1;
14315 entry = &rii->cache_extent;
14316 ret = insert_cache_extent(roots_info_cache, entry);
14319 rii = container_of(entry, struct root_item_info,
14323 ASSERT(rii->cache_extent.start == root_id);
14324 ASSERT(rii->cache_extent.size == 1);
14326 if (level > rii->level || rii->level == (u8)-1) {
14327 rii->level = level;
14328 rii->bytenr = found_key.objectid;
14329 rii->gen = btrfs_extent_generation(leaf, ei);
14330 rii->node_count = 1;
14331 } else if (level == rii->level) {
14339 btrfs_release_path(&path);
14344 static int maybe_repair_root_item(struct btrfs_path *path,
14345 const struct btrfs_key *root_key,
14346 const int read_only_mode)
14348 const u64 root_id = root_key->objectid;
14349 struct cache_extent *entry;
14350 struct root_item_info *rii;
14351 struct btrfs_root_item ri;
14352 unsigned long offset;
14354 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14357 "Error: could not find extent items for root %llu\n",
14358 root_key->objectid);
14362 rii = container_of(entry, struct root_item_info, cache_extent);
14363 ASSERT(rii->cache_extent.start == root_id);
14364 ASSERT(rii->cache_extent.size == 1);
14366 if (rii->node_count != 1) {
14368 "Error: could not find btree root extent for root %llu\n",
14373 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14374 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14376 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14377 btrfs_root_level(&ri) != rii->level ||
14378 btrfs_root_generation(&ri) != rii->gen) {
14381 * If we're in repair mode but our caller told us to not update
14382 * the root item, i.e. just check if it needs to be updated, don't
14383 * print this message, since the caller will call us again shortly
14384 * for the same root item without read only mode (the caller will
14385 * open a transaction first).
14387 if (!(read_only_mode && repair))
14389 "%sroot item for root %llu,"
14390 " current bytenr %llu, current gen %llu, current level %u,"
14391 " new bytenr %llu, new gen %llu, new level %u\n",
14392 (read_only_mode ? "" : "fixing "),
14394 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14395 btrfs_root_level(&ri),
14396 rii->bytenr, rii->gen, rii->level);
14398 if (btrfs_root_generation(&ri) > rii->gen) {
14400 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14401 root_id, btrfs_root_generation(&ri), rii->gen);
14405 if (!read_only_mode) {
14406 btrfs_set_root_bytenr(&ri, rii->bytenr);
14407 btrfs_set_root_level(&ri, rii->level);
14408 btrfs_set_root_generation(&ri, rii->gen);
14409 write_extent_buffer(path->nodes[0], &ri,
14410 offset, sizeof(ri));
14420 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14421 * caused read-only snapshots to be corrupted if they were created at a moment
14422 * when the source subvolume/snapshot had orphan items. The issue was that the
14423 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14424 * node instead of the post orphan cleanup root node.
14425 * So this function, and its callees, just detects and fixes those cases. Even
14426 * though the regression was for read-only snapshots, this function applies to
14427 * any snapshot/subvolume root.
14428 * This must be run before any other repair code - not doing it so, makes other
14429 * repair code delete or modify backrefs in the extent tree for example, which
14430 * will result in an inconsistent fs after repairing the root items.
14432 static int repair_root_items(struct btrfs_fs_info *info)
14434 struct btrfs_path path;
14435 struct btrfs_key key;
14436 struct extent_buffer *leaf;
14437 struct btrfs_trans_handle *trans = NULL;
14440 int need_trans = 0;
14442 btrfs_init_path(&path);
14444 ret = build_roots_info_cache(info);
14448 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14449 key.type = BTRFS_ROOT_ITEM_KEY;
14454 * Avoid opening and committing transactions if a leaf doesn't have
14455 * any root items that need to be fixed, so that we avoid rotating
14456 * backup roots unnecessarily.
14459 trans = btrfs_start_transaction(info->tree_root, 1);
14460 if (IS_ERR(trans)) {
14461 ret = PTR_ERR(trans);
14466 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14470 leaf = path.nodes[0];
14473 struct btrfs_key found_key;
14475 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14476 int no_more_keys = find_next_key(&path, &key);
14478 btrfs_release_path(&path);
14480 ret = btrfs_commit_transaction(trans,
14492 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14494 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14496 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14499 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14503 if (!trans && repair) {
14506 btrfs_release_path(&path);
14516 free_roots_info_cache();
14517 btrfs_release_path(&path);
14519 btrfs_commit_transaction(trans, info->tree_root);
14526 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14528 struct btrfs_trans_handle *trans;
14529 struct btrfs_block_group_cache *bg_cache;
14533 /* Clear all free space cache inodes and its extent data */
14535 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14538 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14541 current = bg_cache->key.objectid + bg_cache->key.offset;
14544 /* Don't forget to set cache_generation to -1 */
14545 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14546 if (IS_ERR(trans)) {
14547 error("failed to update super block cache generation");
14548 return PTR_ERR(trans);
14550 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14551 btrfs_commit_transaction(trans, fs_info->tree_root);
14556 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14561 if (clear_version == 1) {
14562 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14564 "free space cache v2 detected, use --clear-space-cache v2");
14568 printf("Clearing free space cache\n");
14569 ret = clear_free_space_cache(fs_info);
14571 error("failed to clear free space cache");
14574 printf("Free space cache cleared\n");
14576 } else if (clear_version == 2) {
14577 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14578 printf("no free space cache v2 to clear\n");
14582 printf("Clear free space cache v2\n");
14583 ret = btrfs_clear_free_space_tree(fs_info);
14585 error("failed to clear free space cache v2: %d", ret);
14588 printf("free space cache v2 cleared\n");
14595 const char * const cmd_check_usage[] = {
14596 "btrfs check [options] <device>",
14597 "Check structural integrity of a filesystem (unmounted).",
14598 "Check structural integrity of an unmounted filesystem. Verify internal",
14599 "trees' consistency and item connectivity. In the repair mode try to",
14600 "fix the problems found. ",
14601 "WARNING: the repair mode is considered dangerous",
14603 "-s|--super <superblock> use this superblock copy",
14604 "-b|--backup use the first valid backup root copy",
14605 "--force skip mount checks, repair is not possible",
14606 "--repair try to repair the filesystem",
14607 "--readonly run in read-only mode (default)",
14608 "--init-csum-tree create a new CRC tree",
14609 "--init-extent-tree create a new extent tree",
14610 "--mode <MODE> allows choice of memory/IO trade-offs",
14611 " where MODE is one of:",
14612 " original - read inodes and extents to memory (requires",
14613 " more memory, does less IO)",
14614 " lowmem - try to use less memory but read blocks again",
14616 "--check-data-csum verify checksums of data blocks",
14617 "-Q|--qgroup-report print a report on qgroup consistency",
14618 "-E|--subvol-extents <subvolid>",
14619 " print subvolume extents and sharing state",
14620 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14621 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14622 "-p|--progress indicate progress",
14623 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14627 int cmd_check(int argc, char **argv)
14629 struct cache_tree root_cache;
14630 struct btrfs_root *root;
14631 struct btrfs_fs_info *info;
14634 u64 tree_root_bytenr = 0;
14635 u64 chunk_root_bytenr = 0;
14636 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14640 int init_csum_tree = 0;
14642 int clear_space_cache = 0;
14643 int qgroup_report = 0;
14644 int qgroups_repaired = 0;
14645 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14650 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14651 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14652 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14653 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14654 GETOPT_VAL_FORCE };
14655 static const struct option long_options[] = {
14656 { "super", required_argument, NULL, 's' },
14657 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14658 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14659 { "init-csum-tree", no_argument, NULL,
14660 GETOPT_VAL_INIT_CSUM },
14661 { "init-extent-tree", no_argument, NULL,
14662 GETOPT_VAL_INIT_EXTENT },
14663 { "check-data-csum", no_argument, NULL,
14664 GETOPT_VAL_CHECK_CSUM },
14665 { "backup", no_argument, NULL, 'b' },
14666 { "subvol-extents", required_argument, NULL, 'E' },
14667 { "qgroup-report", no_argument, NULL, 'Q' },
14668 { "tree-root", required_argument, NULL, 'r' },
14669 { "chunk-root", required_argument, NULL,
14670 GETOPT_VAL_CHUNK_TREE },
14671 { "progress", no_argument, NULL, 'p' },
14672 { "mode", required_argument, NULL,
14674 { "clear-space-cache", required_argument, NULL,
14675 GETOPT_VAL_CLEAR_SPACE_CACHE},
14676 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14677 { NULL, 0, NULL, 0}
14680 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14684 case 'a': /* ignored */ break;
14686 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14689 num = arg_strtou64(optarg);
14690 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14692 "super mirror should be less than %d",
14693 BTRFS_SUPER_MIRROR_MAX);
14696 bytenr = btrfs_sb_offset(((int)num));
14697 printf("using SB copy %llu, bytenr %llu\n", num,
14698 (unsigned long long)bytenr);
14704 subvolid = arg_strtou64(optarg);
14707 tree_root_bytenr = arg_strtou64(optarg);
14709 case GETOPT_VAL_CHUNK_TREE:
14710 chunk_root_bytenr = arg_strtou64(optarg);
14713 ctx.progress_enabled = true;
14717 usage(cmd_check_usage);
14718 case GETOPT_VAL_REPAIR:
14719 printf("enabling repair mode\n");
14721 ctree_flags |= OPEN_CTREE_WRITES;
14723 case GETOPT_VAL_READONLY:
14726 case GETOPT_VAL_INIT_CSUM:
14727 printf("Creating a new CRC tree\n");
14728 init_csum_tree = 1;
14730 ctree_flags |= OPEN_CTREE_WRITES;
14732 case GETOPT_VAL_INIT_EXTENT:
14733 init_extent_tree = 1;
14734 ctree_flags |= (OPEN_CTREE_WRITES |
14735 OPEN_CTREE_NO_BLOCK_GROUPS);
14738 case GETOPT_VAL_CHECK_CSUM:
14739 check_data_csum = 1;
14741 case GETOPT_VAL_MODE:
14742 check_mode = parse_check_mode(optarg);
14743 if (check_mode == CHECK_MODE_UNKNOWN) {
14744 error("unknown mode: %s", optarg);
14748 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14749 if (strcmp(optarg, "v1") == 0) {
14750 clear_space_cache = 1;
14751 } else if (strcmp(optarg, "v2") == 0) {
14752 clear_space_cache = 2;
14753 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14756 "invalid argument to --clear-space-cache, must be v1 or v2");
14759 ctree_flags |= OPEN_CTREE_WRITES;
14761 case GETOPT_VAL_FORCE:
14767 if (check_argc_exact(argc - optind, 1))
14768 usage(cmd_check_usage);
14770 if (ctx.progress_enabled) {
14771 ctx.tp = TASK_NOTHING;
14772 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14775 /* This check is the only reason for --readonly to exist */
14776 if (readonly && repair) {
14777 error("repair options are not compatible with --readonly");
14782 * experimental and dangerous
14784 if (repair && check_mode == CHECK_MODE_LOWMEM)
14785 warning("low-memory mode repair support is only partial");
14788 cache_tree_init(&root_cache);
14790 ret = check_mounted(argv[optind]);
14793 error("could not check mount status: %s",
14799 "%s is currently mounted, use --force if you really intend to check the filesystem",
14807 error("repair and --force is not yet supported");
14814 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14818 "filesystem mounted, continuing because of --force");
14820 /* A block device is mounted in exclusive mode by kernel */
14821 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14824 /* only allow partial opening under repair mode */
14826 ctree_flags |= OPEN_CTREE_PARTIAL;
14828 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14829 chunk_root_bytenr, ctree_flags);
14831 error("cannot open file system");
14837 global_info = info;
14838 root = info->fs_root;
14839 uuid_unparse(info->super_copy->fsid, uuidbuf);
14841 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14844 * Check the bare minimum before starting anything else that could rely
14845 * on it, namely the tree roots, any local consistency checks
14847 if (!extent_buffer_uptodate(info->tree_root->node) ||
14848 !extent_buffer_uptodate(info->dev_root->node) ||
14849 !extent_buffer_uptodate(info->chunk_root->node)) {
14850 error("critical roots corrupted, unable to check the filesystem");
14856 if (clear_space_cache) {
14857 ret = do_clear_free_space_cache(info, clear_space_cache);
14863 * repair mode will force us to commit transaction which
14864 * will make us fail to load log tree when mounting.
14866 if (repair && btrfs_super_log_root(info->super_copy)) {
14867 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14873 ret = zero_log_tree(root);
14876 error("failed to zero log tree: %d", ret);
14881 if (qgroup_report) {
14882 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14884 ret = qgroup_verify_all(info);
14891 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14892 subvolid, argv[optind], uuidbuf);
14893 ret = print_extent_state(info, subvolid);
14898 if (init_extent_tree || init_csum_tree) {
14899 struct btrfs_trans_handle *trans;
14901 trans = btrfs_start_transaction(info->extent_root, 0);
14902 if (IS_ERR(trans)) {
14903 error("error starting transaction");
14904 ret = PTR_ERR(trans);
14909 if (init_extent_tree) {
14910 printf("Creating a new extent tree\n");
14911 ret = reinit_extent_tree(trans, info);
14917 if (init_csum_tree) {
14918 printf("Reinitialize checksum tree\n");
14919 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14921 error("checksum tree initialization failed: %d",
14928 ret = fill_csum_tree(trans, info->csum_root,
14932 error("checksum tree refilling failed: %d", ret);
14937 * Ok now we commit and run the normal fsck, which will add
14938 * extent entries for all of the items it finds.
14940 ret = btrfs_commit_transaction(trans, info->extent_root);
14945 if (!extent_buffer_uptodate(info->extent_root->node)) {
14946 error("critical: extent_root, unable to check the filesystem");
14951 if (!extent_buffer_uptodate(info->csum_root->node)) {
14952 error("critical: csum_root, unable to check the filesystem");
14958 if (!init_extent_tree) {
14959 ret = repair_root_items(info);
14962 error("failed to repair root items: %s", strerror(-ret));
14966 fprintf(stderr, "Fixed %d roots.\n", ret);
14968 } else if (ret > 0) {
14970 "Found %d roots with an outdated root item.\n",
14973 "Please run a filesystem check with the option --repair to fix them.\n");
14980 ret = do_check_chunks_and_extents(info);
14984 "errors found in extent allocation tree or chunk allocation");
14986 /* Only re-check super size after we checked and repaired the fs */
14987 err |= !is_super_size_valid(info);
14989 if (!ctx.progress_enabled) {
14990 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14991 fprintf(stderr, "checking free space tree\n");
14993 fprintf(stderr, "checking free space cache\n");
14995 ret = check_space_cache(root);
14998 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14999 error("errors found in free space tree");
15001 error("errors found in free space cache");
15006 * We used to have to have these hole extents in between our real
15007 * extents so if we don't have this flag set we need to make sure there
15008 * are no gaps in the file extents for inodes, otherwise we can just
15009 * ignore it when this happens.
15011 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
15012 ret = do_check_fs_roots(info, &root_cache);
15015 error("errors found in fs roots");
15019 fprintf(stderr, "checking csums\n");
15020 ret = check_csums(root);
15023 error("errors found in csum tree");
15027 fprintf(stderr, "checking root refs\n");
15028 /* For low memory mode, check_fs_roots_v2 handles root refs */
15029 if (check_mode != CHECK_MODE_LOWMEM) {
15030 ret = check_root_refs(root, &root_cache);
15033 error("errors found in root refs");
15038 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15039 struct extent_buffer *eb;
15041 eb = list_first_entry(&root->fs_info->recow_ebs,
15042 struct extent_buffer, recow);
15043 list_del_init(&eb->recow);
15044 ret = recow_extent_buffer(root, eb);
15047 error("fails to fix transid errors");
15052 while (!list_empty(&delete_items)) {
15053 struct bad_item *bad;
15055 bad = list_first_entry(&delete_items, struct bad_item, list);
15056 list_del_init(&bad->list);
15058 ret = delete_bad_item(root, bad);
15064 if (info->quota_enabled) {
15065 fprintf(stderr, "checking quota groups\n");
15066 ret = qgroup_verify_all(info);
15069 error("failed to check quota groups");
15073 ret = repair_qgroups(info, &qgroups_repaired);
15076 error("failed to repair quota groups");
15082 if (!list_empty(&root->fs_info->recow_ebs)) {
15083 error("transid errors in file system");
15088 printf("found %llu bytes used, ",
15089 (unsigned long long)bytes_used);
15091 printf("error(s) found\n");
15093 printf("no error found\n");
15094 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15095 printf("total tree bytes: %llu\n",
15096 (unsigned long long)total_btree_bytes);
15097 printf("total fs tree bytes: %llu\n",
15098 (unsigned long long)total_fs_tree_bytes);
15099 printf("total extent tree bytes: %llu\n",
15100 (unsigned long long)total_extent_tree_bytes);
15101 printf("btree space waste bytes: %llu\n",
15102 (unsigned long long)btree_space_waste);
15103 printf("file data blocks allocated: %llu\n referenced %llu\n",
15104 (unsigned long long)data_bytes_allocated,
15105 (unsigned long long)data_bytes_referenced);
15107 free_qgroup_counts();
15108 free_root_recs_tree(&root_cache);
15112 if (ctx.progress_enabled)
15113 task_deinit(ctx.info);