2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2153 * @roots can be empty if it belongs to tree reloc tree
2154 * In that case, we should always check the leaf, as we can't use
2155 * the tree owner to ensure some other root will check it.
2157 if (roots->nnodes == 1 || roots->nnodes == 0)
2160 node = rb_first(&roots->root);
2161 u = rb_entry(node, struct ulist_node, rb_node);
2163 * current root id is not smallest, we skip it and let it be checked
2164 * in the fs or file tree who hash the smallest root id.
2166 if (root->objectid != u->val)
2172 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2175 struct btrfs_root *extent_root = root->fs_info->extent_root;
2176 struct btrfs_root_item *ri = &root->root_item;
2177 struct btrfs_extent_inline_ref *iref;
2178 struct btrfs_extent_item *ei;
2179 struct btrfs_key key;
2180 struct btrfs_path *path = NULL;
2191 * Except file/reloc tree, we can not have FULL BACKREF MODE
2193 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2197 if (eb->start == btrfs_root_bytenr(ri))
2200 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2203 owner = btrfs_header_owner(eb);
2204 if (owner == root->objectid)
2207 path = btrfs_alloc_path();
2211 key.objectid = btrfs_header_bytenr(eb);
2213 key.offset = (u64)-1;
2215 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2222 ret = btrfs_previous_extent_item(extent_root, path,
2228 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2230 eb = path->nodes[0];
2231 slot = path->slots[0];
2232 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2234 flags = btrfs_extent_flags(eb, ei);
2235 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2238 ptr = (unsigned long)(ei + 1);
2239 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2241 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2242 ptr += sizeof(struct btrfs_tree_block_info);
2245 /* Reached extent item ends normally */
2249 /* Beyond extent item end, wrong item size */
2251 error("extent item at bytenr %llu slot %d has wrong size",
2256 iref = (struct btrfs_extent_inline_ref *)ptr;
2257 offset = btrfs_extent_inline_ref_offset(eb, iref);
2258 type = btrfs_extent_inline_ref_type(eb, iref);
2260 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2262 ptr += btrfs_extent_inline_ref_size(type);
2266 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2270 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2272 btrfs_free_path(path);
2277 * for a tree node or leaf, we record its reference count, so later if we still
2278 * process this node or leaf, don't need to compute its reference count again.
2280 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2282 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2283 struct extent_buffer *eb, struct node_refs *nrefs,
2284 u64 level, int check_all)
2286 struct ulist *roots;
2289 int root_level = btrfs_header_level(root->node);
2293 if (nrefs->bytenr[level] == bytenr)
2296 if (bytenr != (u64)-1) {
2297 /* the return value of this function seems a mistake */
2298 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2299 level, 1, &refs, &flags);
2301 if (ret < 0 && !check_all)
2304 nrefs->bytenr[level] = bytenr;
2305 nrefs->refs[level] = refs;
2306 nrefs->full_backref[level] = 0;
2307 nrefs->checked[level] = 0;
2310 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2315 check = need_check(root, roots);
2317 nrefs->need_check[level] = check;
2320 nrefs->need_check[level] = 1;
2322 if (level == root_level) {
2323 nrefs->need_check[level] = 1;
2326 * The node refs may have not been
2327 * updated if upper needs checking (the
2328 * lowest root_objectid) the node can
2331 nrefs->need_check[level] =
2332 nrefs->need_check[level + 1];
2338 if (check_all && eb) {
2339 calc_extent_flag_v2(root, eb, &flags);
2340 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2341 nrefs->full_backref[level] = 1;
2348 * @level if @level == -1 means extent data item
2349 * else normal treeblocl.
2351 static int should_check_extent_strictly(struct btrfs_root *root,
2352 struct node_refs *nrefs, int level)
2354 int root_level = btrfs_header_level(root->node);
2356 if (level > root_level || level < -1)
2358 if (level == root_level)
2361 * if the upper node is marked full backref, it should contain shared
2362 * backref of the parent (except owner == root->objectid).
2364 while (++level <= root_level)
2365 if (nrefs->refs[level] > 1)
2371 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2372 struct walk_control *wc, int *level,
2373 struct node_refs *nrefs)
2375 enum btrfs_tree_block_status status;
2378 struct btrfs_fs_info *fs_info = root->fs_info;
2379 struct extent_buffer *next;
2380 struct extent_buffer *cur;
2384 WARN_ON(*level < 0);
2385 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2387 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2388 refs = nrefs->refs[*level];
2391 ret = btrfs_lookup_extent_info(NULL, root,
2392 path->nodes[*level]->start,
2393 *level, 1, &refs, NULL);
2398 nrefs->bytenr[*level] = path->nodes[*level]->start;
2399 nrefs->refs[*level] = refs;
2403 ret = enter_shared_node(root, path->nodes[*level]->start,
2411 while (*level >= 0) {
2412 WARN_ON(*level < 0);
2413 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2414 cur = path->nodes[*level];
2416 if (btrfs_header_level(cur) != *level)
2419 if (path->slots[*level] >= btrfs_header_nritems(cur))
2422 ret = process_one_leaf(root, cur, wc);
2427 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2428 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2430 if (bytenr == nrefs->bytenr[*level - 1]) {
2431 refs = nrefs->refs[*level - 1];
2433 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2434 *level - 1, 1, &refs, NULL);
2438 nrefs->bytenr[*level - 1] = bytenr;
2439 nrefs->refs[*level - 1] = refs;
2444 ret = enter_shared_node(root, bytenr, refs,
2447 path->slots[*level]++;
2452 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2453 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2454 free_extent_buffer(next);
2455 reada_walk_down(root, cur, path->slots[*level]);
2456 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2457 if (!extent_buffer_uptodate(next)) {
2458 struct btrfs_key node_key;
2460 btrfs_node_key_to_cpu(path->nodes[*level],
2462 path->slots[*level]);
2463 btrfs_add_corrupt_extent_record(root->fs_info,
2465 path->nodes[*level]->start,
2466 root->fs_info->nodesize,
2473 ret = check_child_node(cur, path->slots[*level], next);
2475 free_extent_buffer(next);
2480 if (btrfs_is_leaf(next))
2481 status = btrfs_check_leaf(root, NULL, next);
2483 status = btrfs_check_node(root, NULL, next);
2484 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2485 free_extent_buffer(next);
2490 *level = *level - 1;
2491 free_extent_buffer(path->nodes[*level]);
2492 path->nodes[*level] = next;
2493 path->slots[*level] = 0;
2496 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2500 static int fs_root_objectid(u64 objectid);
2503 * Update global fs information.
2505 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2509 struct extent_buffer *eb = path->nodes[level];
2511 total_btree_bytes += eb->len;
2512 if (fs_root_objectid(root->objectid))
2513 total_fs_tree_bytes += eb->len;
2514 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2515 total_extent_tree_bytes += eb->len;
2518 btree_space_waste += btrfs_leaf_free_space(root, eb);
2520 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2521 btrfs_header_nritems(eb));
2522 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2527 * This function only handles BACKREF_MISSING,
2528 * If corresponding extent item exists, increase the ref, else insert an extent
2531 * Returns error bits after repair.
2533 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2534 struct btrfs_root *root,
2535 struct extent_buffer *node,
2536 struct node_refs *nrefs, int level, int err)
2538 struct btrfs_fs_info *fs_info = root->fs_info;
2539 struct btrfs_root *extent_root = fs_info->extent_root;
2540 struct btrfs_path path;
2541 struct btrfs_extent_item *ei;
2542 struct btrfs_tree_block_info *bi;
2543 struct btrfs_key key;
2544 struct extent_buffer *eb;
2545 u32 size = sizeof(*ei);
2546 u32 node_size = root->fs_info->nodesize;
2547 int insert_extent = 0;
2548 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2549 int root_level = btrfs_header_level(root->node);
2554 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2557 if ((err & BACKREF_MISSING) == 0)
2560 WARN_ON(level > BTRFS_MAX_LEVEL);
2563 btrfs_init_path(&path);
2564 bytenr = btrfs_header_bytenr(node);
2565 owner = btrfs_header_owner(node);
2566 generation = btrfs_header_generation(node);
2568 key.objectid = bytenr;
2570 key.offset = (u64)-1;
2572 /* Search for the extent item */
2573 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2579 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2583 /* calculate if the extent item flag is full backref or not */
2584 if (nrefs->full_backref[level] != 0)
2585 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2587 /* insert an extent item */
2588 if (insert_extent) {
2589 struct btrfs_disk_key copy_key;
2591 generation = btrfs_header_generation(node);
2593 if (level < root_level && nrefs->full_backref[level + 1] &&
2594 owner != root->objectid) {
2595 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2598 key.objectid = bytenr;
2599 if (!skinny_metadata) {
2600 key.type = BTRFS_EXTENT_ITEM_KEY;
2601 key.offset = node_size;
2602 size += sizeof(*bi);
2604 key.type = BTRFS_METADATA_ITEM_KEY;
2608 btrfs_release_path(&path);
2609 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2615 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2617 btrfs_set_extent_refs(eb, ei, 0);
2618 btrfs_set_extent_generation(eb, ei, generation);
2619 btrfs_set_extent_flags(eb, ei, flags);
2621 if (!skinny_metadata) {
2622 bi = (struct btrfs_tree_block_info *)(ei + 1);
2623 memset_extent_buffer(eb, 0, (unsigned long)bi,
2625 btrfs_set_disk_key_objectid(©_key, root->objectid);
2626 btrfs_set_disk_key_type(©_key, 0);
2627 btrfs_set_disk_key_offset(©_key, 0);
2629 btrfs_set_tree_block_level(eb, bi, level);
2630 btrfs_set_tree_block_key(eb, bi, ©_key);
2632 btrfs_mark_buffer_dirty(eb);
2633 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2634 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2637 nrefs->refs[level] = 0;
2638 nrefs->full_backref[level] =
2639 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640 btrfs_release_path(&path);
2643 if (level < root_level && nrefs->full_backref[level + 1] &&
2644 owner != root->objectid)
2645 parent = nrefs->bytenr[level + 1];
2647 /* increase the ref */
2648 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649 parent, root->objectid, level, 0);
2651 nrefs->refs[level]++;
2653 btrfs_release_path(&path);
2656 "failed to repair tree block ref start %llu root %llu due to %s",
2657 bytenr, root->objectid, strerror(-ret));
2659 printf("Added one tree block ref start %llu %s %llu\n",
2660 bytenr, parent ? "parent" : "root",
2661 parent ? parent : root->objectid);
2662 err &= ~BACKREF_MISSING;
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669 unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671 struct extent_buffer *eb, u64 bytenr,
2672 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674 struct btrfs_root *root, struct btrfs_path *path,
2675 struct node_refs *nrefs, int account_bytes);
2678 * @trans just for lowmem repair mode
2679 * @check all if not 0 then check all tree block backrefs and items
2680 * 0 then just check relationship of items in fs tree(s)
2682 * Returns >0 Found error, should continue
2683 * Returns <0 Fatal error, must exit the whole check
2684 * Returns 0 No errors found
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687 struct btrfs_root *root, struct btrfs_path *path,
2688 int *level, struct node_refs *nrefs, int ext_ref,
2692 enum btrfs_tree_block_status status;
2695 struct btrfs_fs_info *fs_info = root->fs_info;
2696 struct extent_buffer *next;
2697 struct extent_buffer *cur;
2701 int account_file_data = 0;
2703 WARN_ON(*level < 0);
2704 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2706 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707 path->nodes[*level], nrefs, *level, check_all);
2711 while (*level >= 0) {
2712 WARN_ON(*level < 0);
2713 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714 cur = path->nodes[*level];
2715 bytenr = btrfs_header_bytenr(cur);
2716 check = nrefs->need_check[*level];
2718 if (btrfs_header_level(cur) != *level)
2721 * Update bytes accounting and check tree block ref
2722 * NOTE: Doing accounting and check before checking nritems
2723 * is necessary because of empty node/leaf.
2725 if ((check_all && !nrefs->checked[*level]) ||
2726 (!check_all && nrefs->need_check[*level])) {
2727 ret = check_tree_block_ref(root, cur,
2728 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729 btrfs_header_owner(cur), nrefs);
2732 ret = repair_tree_block_ref(trans, root,
2733 path->nodes[*level], nrefs, *level, ret);
2736 if (check_all && nrefs->need_check[*level] &&
2737 nrefs->refs[*level]) {
2738 account_bytes(root, path, *level);
2739 account_file_data = 1;
2741 nrefs->checked[*level] = 1;
2744 if (path->slots[*level] >= btrfs_header_nritems(cur))
2747 /* Don't forgot to check leaf/node validation */
2749 /* skip duplicate check */
2750 if (check || !check_all) {
2751 ret = btrfs_check_leaf(root, NULL, cur);
2752 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2760 ret = process_one_leaf_v2(root, path, nrefs,
2763 ret = check_leaf_items(trans, root, path,
2764 nrefs, account_file_data);
2768 if (check || !check_all) {
2769 ret = btrfs_check_node(root, NULL, cur);
2770 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2777 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2780 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2785 * check all trees in check_chunks_and_extent_v2
2786 * check shared node once in check_fs_roots
2788 if (!check_all && !nrefs->need_check[*level - 1]) {
2789 path->slots[*level]++;
2793 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795 free_extent_buffer(next);
2796 reada_walk_down(root, cur, path->slots[*level]);
2797 next = read_tree_block(fs_info, bytenr, ptr_gen);
2798 if (!extent_buffer_uptodate(next)) {
2799 struct btrfs_key node_key;
2801 btrfs_node_key_to_cpu(path->nodes[*level],
2803 path->slots[*level]);
2804 btrfs_add_corrupt_extent_record(fs_info,
2805 &node_key, path->nodes[*level]->start,
2806 fs_info->nodesize, *level);
2812 ret = check_child_node(cur, path->slots[*level], next);
2817 if (btrfs_is_leaf(next))
2818 status = btrfs_check_leaf(root, NULL, next);
2820 status = btrfs_check_node(root, NULL, next);
2821 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822 free_extent_buffer(next);
2827 *level = *level - 1;
2828 free_extent_buffer(path->nodes[*level]);
2829 path->nodes[*level] = next;
2830 path->slots[*level] = 0;
2831 account_file_data = 0;
2833 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839 struct walk_control *wc, int *level)
2842 struct extent_buffer *leaf;
2844 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845 leaf = path->nodes[i];
2846 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2851 free_extent_buffer(path->nodes[*level]);
2852 path->nodes[*level] = NULL;
2853 BUG_ON(*level > wc->active_node);
2854 if (*level == wc->active_node)
2855 leave_shared_node(root, wc, *level);
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2866 struct extent_buffer *leaf;
2868 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869 leaf = path->nodes[i];
2870 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2875 free_extent_buffer(path->nodes[*level]);
2876 path->nodes[*level] = NULL;
2883 static int check_root_dir(struct inode_record *rec)
2885 struct inode_backref *backref;
2888 if (!rec->found_inode_item || rec->errors)
2890 if (rec->nlink != 1 || rec->found_link != 0)
2892 if (list_empty(&rec->backrefs))
2894 backref = to_inode_backref(rec->backrefs.next);
2895 if (!backref->found_inode_ref)
2897 if (backref->index != 0 || backref->namelen != 2 ||
2898 memcmp(backref->name, "..", 2))
2900 if (backref->found_dir_index || backref->found_dir_item)
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root, struct btrfs_path *path,
2909 struct inode_record *rec)
2911 struct btrfs_inode_item *ei;
2912 struct btrfs_key key;
2915 key.objectid = rec->ino;
2916 key.type = BTRFS_INODE_ITEM_KEY;
2917 key.offset = (u64)-1;
2919 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2923 if (!path->slots[0]) {
2930 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931 if (key.objectid != rec->ino) {
2936 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937 struct btrfs_inode_item);
2938 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939 btrfs_mark_buffer_dirty(path->nodes[0]);
2940 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942 root->root_key.objectid);
2944 btrfs_release_path(path);
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949 struct btrfs_root *root,
2950 struct btrfs_path *path,
2951 struct inode_record *rec)
2955 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956 btrfs_release_path(path);
2958 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963 struct btrfs_root *root,
2964 struct btrfs_path *path,
2965 struct inode_record *rec)
2967 struct btrfs_inode_item *ei;
2968 struct btrfs_key key;
2971 key.objectid = rec->ino;
2972 key.type = BTRFS_INODE_ITEM_KEY;
2975 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2982 /* Since ret == 0, no need to check anything */
2983 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984 struct btrfs_inode_item);
2985 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986 btrfs_mark_buffer_dirty(path->nodes[0]);
2987 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988 printf("reset nbytes for ino %llu root %llu\n",
2989 rec->ino, root->root_key.objectid);
2991 btrfs_release_path(path);
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996 struct cache_tree *inode_cache,
2997 struct inode_record *rec,
2998 struct inode_backref *backref)
3000 struct btrfs_path path;
3001 struct btrfs_trans_handle *trans;
3002 struct btrfs_dir_item *dir_item;
3003 struct extent_buffer *leaf;
3004 struct btrfs_key key;
3005 struct btrfs_disk_key disk_key;
3006 struct inode_record *dir_rec;
3007 unsigned long name_ptr;
3008 u32 data_size = sizeof(*dir_item) + backref->namelen;
3011 trans = btrfs_start_transaction(root, 1);
3013 return PTR_ERR(trans);
3015 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016 (unsigned long long)rec->ino);
3018 btrfs_init_path(&path);
3019 key.objectid = backref->dir;
3020 key.type = BTRFS_DIR_INDEX_KEY;
3021 key.offset = backref->index;
3022 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3025 leaf = path.nodes[0];
3026 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3028 disk_key.objectid = cpu_to_le64(rec->ino);
3029 disk_key.type = BTRFS_INODE_ITEM_KEY;
3030 disk_key.offset = 0;
3032 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034 btrfs_set_dir_data_len(leaf, dir_item, 0);
3035 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036 name_ptr = (unsigned long)(dir_item + 1);
3037 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038 btrfs_mark_buffer_dirty(leaf);
3039 btrfs_release_path(&path);
3040 btrfs_commit_transaction(trans, root);
3042 backref->found_dir_index = 1;
3043 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044 BUG_ON(IS_ERR(dir_rec));
3047 dir_rec->found_size += backref->namelen;
3048 if (dir_rec->found_size == dir_rec->isize &&
3049 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051 if (dir_rec->found_size != dir_rec->isize)
3052 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3057 static int delete_dir_index(struct btrfs_root *root,
3058 struct inode_backref *backref)
3060 struct btrfs_trans_handle *trans;
3061 struct btrfs_dir_item *di;
3062 struct btrfs_path path;
3065 trans = btrfs_start_transaction(root, 1);
3067 return PTR_ERR(trans);
3069 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070 (unsigned long long)backref->dir,
3071 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072 (unsigned long long)root->objectid);
3074 btrfs_init_path(&path);
3075 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076 backref->name, backref->namelen,
3077 backref->index, -1);
3080 btrfs_release_path(&path);
3081 btrfs_commit_transaction(trans, root);
3088 ret = btrfs_del_item(trans, root, &path);
3090 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3092 btrfs_release_path(&path);
3093 btrfs_commit_transaction(trans, root);
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098 struct btrfs_root *root, u64 ino, u64 size,
3099 u64 nbytes, u64 nlink, u32 mode)
3101 struct btrfs_inode_item ii;
3102 time_t now = time(NULL);
3105 btrfs_set_stack_inode_size(&ii, size);
3106 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107 btrfs_set_stack_inode_nlink(&ii, nlink);
3108 btrfs_set_stack_inode_mode(&ii, mode);
3109 btrfs_set_stack_inode_generation(&ii, trans->transid);
3110 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3118 ret = btrfs_insert_inode(trans, root, ino, &ii);
3121 warning("root %llu inode %llu recreating inode item, this may "
3122 "be incomplete, please check permissions and content after "
3123 "the fsck completes.\n", (unsigned long long)root->objectid,
3124 (unsigned long long)ino);
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130 struct btrfs_root *root, u64 ino,
3133 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3135 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3138 static int create_inode_item(struct btrfs_root *root,
3139 struct inode_record *rec, int root_dir)
3141 struct btrfs_trans_handle *trans;
3147 trans = btrfs_start_transaction(root, 1);
3148 if (IS_ERR(trans)) {
3149 ret = PTR_ERR(trans);
3153 nlink = root_dir ? 1 : rec->found_link;
3154 if (rec->found_dir_item) {
3155 if (rec->found_file_extent)
3156 fprintf(stderr, "root %llu inode %llu has both a dir "
3157 "item and extents, unsure if it is a dir or a "
3158 "regular file so setting it as a directory\n",
3159 (unsigned long long)root->objectid,
3160 (unsigned long long)rec->ino);
3161 mode = S_IFDIR | 0755;
3162 size = rec->found_size;
3163 } else if (!rec->found_dir_item) {
3164 size = rec->extent_end;
3165 mode = S_IFREG | 0755;
3168 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3170 btrfs_commit_transaction(trans, root);
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175 struct inode_record *rec,
3176 struct cache_tree *inode_cache,
3179 struct inode_backref *tmp, *backref;
3180 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3184 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185 if (!delete && rec->ino == root_dirid) {
3186 if (!rec->found_inode_item) {
3187 ret = create_inode_item(root, rec, 1);
3194 /* Index 0 for root dir's are special, don't mess with it */
3195 if (rec->ino == root_dirid && backref->index == 0)
3199 ((backref->found_dir_index && !backref->found_inode_ref) ||
3200 (backref->found_dir_index && backref->found_inode_ref &&
3201 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202 ret = delete_dir_index(root, backref);
3206 list_del(&backref->list);
3211 if (!delete && !backref->found_dir_index &&
3212 backref->found_dir_item && backref->found_inode_ref) {
3213 ret = add_missing_dir_index(root, inode_cache, rec,
3218 if (backref->found_dir_item &&
3219 backref->found_dir_index) {
3220 if (!backref->errors &&
3221 backref->found_inode_ref) {
3222 list_del(&backref->list);
3229 if (!delete && (!backref->found_dir_index &&
3230 !backref->found_dir_item &&
3231 backref->found_inode_ref)) {
3232 struct btrfs_trans_handle *trans;
3233 struct btrfs_key location;
3235 ret = check_dir_conflict(root, backref->name,
3241 * let nlink fixing routine to handle it,
3242 * which can do it better.
3247 location.objectid = rec->ino;
3248 location.type = BTRFS_INODE_ITEM_KEY;
3249 location.offset = 0;
3251 trans = btrfs_start_transaction(root, 1);
3252 if (IS_ERR(trans)) {
3253 ret = PTR_ERR(trans);
3256 fprintf(stderr, "adding missing dir index/item pair "
3258 (unsigned long long)rec->ino);
3259 ret = btrfs_insert_dir_item(trans, root, backref->name,
3261 backref->dir, &location,
3262 imode_to_type(rec->imode),
3265 btrfs_commit_transaction(trans, root);
3269 if (!delete && (backref->found_inode_ref &&
3270 backref->found_dir_index &&
3271 backref->found_dir_item &&
3272 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273 !rec->found_inode_item)) {
3274 ret = create_inode_item(root, rec, 0);
3281 return ret ? ret : repaired;
3285 * To determine the file type for nlink/inode_item repair
3287 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288 * Return -ENOENT if file type is not found.
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3292 struct inode_backref *backref;
3294 /* For inode item recovered case */
3295 if (rec->found_inode_item) {
3296 *type = imode_to_type(rec->imode);
3300 list_for_each_entry(backref, &rec->backrefs, list) {
3301 if (backref->found_dir_index || backref->found_dir_item) {
3302 *type = backref->filetype;
3310 * To determine the file name for nlink repair
3312 * Return 0 if file name is found, set name and namelen.
3313 * Return -ENOENT if file name is not found.
3315 static int find_file_name(struct inode_record *rec,
3316 char *name, int *namelen)
3318 struct inode_backref *backref;
3320 list_for_each_entry(backref, &rec->backrefs, list) {
3321 if (backref->found_dir_index || backref->found_dir_item ||
3322 backref->found_inode_ref) {
3323 memcpy(name, backref->name, backref->namelen);
3324 *namelen = backref->namelen;
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root,
3334 struct btrfs_path *path,
3335 struct inode_record *rec)
3337 struct inode_backref *backref;
3338 struct inode_backref *tmp;
3339 struct btrfs_key key;
3340 struct btrfs_inode_item *inode_item;
3343 /* We don't believe this either, reset it and iterate backref */
3344 rec->found_link = 0;
3346 /* Remove all backref including the valid ones */
3347 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349 backref->index, backref->name,
3350 backref->namelen, 0);
3354 /* remove invalid backref, so it won't be added back */
3355 if (!(backref->found_dir_index &&
3356 backref->found_dir_item &&
3357 backref->found_inode_ref)) {
3358 list_del(&backref->list);
3365 /* Set nlink to 0 */
3366 key.objectid = rec->ino;
3367 key.type = BTRFS_INODE_ITEM_KEY;
3369 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3376 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377 struct btrfs_inode_item);
3378 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379 btrfs_mark_buffer_dirty(path->nodes[0]);
3380 btrfs_release_path(path);
3383 * Add back valid inode_ref/dir_item/dir_index,
3384 * add_link() will handle the nlink inc, so new nlink must be correct
3386 list_for_each_entry(backref, &rec->backrefs, list) {
3387 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388 backref->name, backref->namelen,
3389 backref->filetype, &backref->index, 1, 0);
3394 btrfs_release_path(path);
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399 struct btrfs_root *root,
3400 struct btrfs_path *path,
3403 struct btrfs_key key, found_key;
3406 btrfs_init_path(path);
3407 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3409 key.type = BTRFS_INODE_ITEM_KEY;
3410 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3412 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413 path->slots[0] - 1);
3414 *highest_ino = found_key.objectid;
3417 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3419 btrfs_release_path(path);
3424 * Link inode to dir 'lost+found'. Increase @ref_count.
3426 * Returns 0 means success.
3427 * Returns <0 means failure.
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430 struct btrfs_root *root,
3431 struct btrfs_path *path,
3432 u64 ino, char *namebuf, u32 name_len,
3433 u8 filetype, u64 *ref_count)
3435 char *dir_name = "lost+found";
3440 btrfs_release_path(path);
3441 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3446 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3450 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3453 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454 namebuf, name_len, filetype, NULL, 1, 0);
3456 * Add ".INO" suffix several times to handle case where
3457 * "FILENAME.INO" is already taken by another file.
3459 while (ret == -EEXIST) {
3461 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3463 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3467 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3469 name_len += count_digits(ino) + 1;
3470 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471 name_len, filetype, NULL, 1, 0);
3474 error("failed to link the inode %llu to %s dir: %s",
3475 ino, dir_name, strerror(-ret));
3480 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481 name_len, namebuf, dir_name);
3483 btrfs_release_path(path);
3485 error("failed to move file '%.*s' to '%s' dir", name_len,
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491 struct btrfs_root *root,
3492 struct btrfs_path *path,
3493 struct inode_record *rec)
3495 char namebuf[BTRFS_NAME_LEN] = {0};
3498 int name_recovered = 0;
3499 int type_recovered = 0;
3503 * Get file name and type first before these invalid inode ref
3504 * are deleted by remove_all_invalid_backref()
3506 name_recovered = !find_file_name(rec, namebuf, &namelen);
3507 type_recovered = !find_file_type(rec, &type);
3509 if (!name_recovered) {
3510 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511 rec->ino, rec->ino);
3512 namelen = count_digits(rec->ino);
3513 sprintf(namebuf, "%llu", rec->ino);
3516 if (!type_recovered) {
3517 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3519 type = BTRFS_FT_REG_FILE;
3523 ret = reset_nlink(trans, root, path, rec);
3526 "Failed to reset nlink for inode %llu: %s\n",
3527 rec->ino, strerror(-ret));
3531 if (rec->found_link == 0) {
3532 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533 namebuf, namelen, type,
3534 (u64 *)&rec->found_link);
3538 printf("Fixed the nlink of inode %llu\n", rec->ino);
3541 * Clear the flag anyway, or we will loop forever for the same inode
3542 * as it will not be removed from the bad inode list and the dead loop
3545 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546 btrfs_release_path(path);
3551 * Check if there is any normal(reg or prealloc) file extent for given
3553 * This is used to determine the file type when neither its dir_index/item or
3554 * inode_item exists.
3556 * This will *NOT* report error, if any error happens, just consider it does
3557 * not have any normal file extent.
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3561 struct btrfs_path path;
3562 struct btrfs_key key;
3563 struct btrfs_key found_key;
3564 struct btrfs_file_extent_item *fi;
3568 btrfs_init_path(&path);
3570 key.type = BTRFS_EXTENT_DATA_KEY;
3573 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3578 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579 ret = btrfs_next_leaf(root, &path);
3586 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3588 if (found_key.objectid != ino ||
3589 found_key.type != BTRFS_EXTENT_DATA_KEY)
3591 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592 struct btrfs_file_extent_item);
3593 type = btrfs_file_extent_type(path.nodes[0], fi);
3594 if (type != BTRFS_FILE_EXTENT_INLINE) {
3600 btrfs_release_path(&path);
3604 static u32 btrfs_type_to_imode(u8 type)
3606 static u32 imode_by_btrfs_type[] = {
3607 [BTRFS_FT_REG_FILE] = S_IFREG,
3608 [BTRFS_FT_DIR] = S_IFDIR,
3609 [BTRFS_FT_CHRDEV] = S_IFCHR,
3610 [BTRFS_FT_BLKDEV] = S_IFBLK,
3611 [BTRFS_FT_FIFO] = S_IFIFO,
3612 [BTRFS_FT_SOCK] = S_IFSOCK,
3613 [BTRFS_FT_SYMLINK] = S_IFLNK,
3616 return imode_by_btrfs_type[(type)];
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620 struct btrfs_root *root,
3621 struct btrfs_path *path,
3622 struct inode_record *rec)
3626 int type_recovered = 0;
3629 printf("Trying to rebuild inode:%llu\n", rec->ino);
3631 type_recovered = !find_file_type(rec, &filetype);
3634 * Try to determine inode type if type not found.
3636 * For found regular file extent, it must be FILE.
3637 * For found dir_item/index, it must be DIR.
3639 * For undetermined one, use FILE as fallback.
3642 * 1. If found backref(inode_index/item is already handled) to it,
3644 * Need new inode-inode ref structure to allow search for that.
3646 if (!type_recovered) {
3647 if (rec->found_file_extent &&
3648 find_normal_file_extent(root, rec->ino)) {
3650 filetype = BTRFS_FT_REG_FILE;
3651 } else if (rec->found_dir_item) {
3653 filetype = BTRFS_FT_DIR;
3654 } else if (!list_empty(&rec->orphan_extents)) {
3656 filetype = BTRFS_FT_REG_FILE;
3658 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3661 filetype = BTRFS_FT_REG_FILE;
3665 ret = btrfs_new_inode(trans, root, rec->ino,
3666 mode | btrfs_type_to_imode(filetype));
3671 * Here inode rebuild is done, we only rebuild the inode item,
3672 * don't repair the nlink(like move to lost+found).
3673 * That is the job of nlink repair.
3675 * We just fill the record and return
3677 rec->found_dir_item = 1;
3678 rec->imode = mode | btrfs_type_to_imode(filetype);
3680 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681 /* Ensure the inode_nlinks repair function will be called */
3682 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688 struct btrfs_root *root,
3689 struct btrfs_path *path,
3690 struct inode_record *rec)
3692 struct orphan_data_extent *orphan;
3693 struct orphan_data_extent *tmp;
3696 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3698 * Check for conflicting file extents
3700 * Here we don't know whether the extents is compressed or not,
3701 * so we can only assume it not compressed nor data offset,
3702 * and use its disk_len as extent length.
3704 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705 orphan->offset, orphan->disk_len, 0);
3706 btrfs_release_path(path);
3711 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712 orphan->disk_bytenr, orphan->disk_len);
3713 ret = btrfs_free_extent(trans,
3714 root->fs_info->extent_root,
3715 orphan->disk_bytenr, orphan->disk_len,
3716 0, root->objectid, orphan->objectid,
3721 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722 orphan->offset, orphan->disk_bytenr,
3723 orphan->disk_len, orphan->disk_len);
3727 /* Update file size info */
3728 rec->found_size += orphan->disk_len;
3729 if (rec->found_size == rec->nbytes)
3730 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3732 /* Update the file extent hole info too */
3733 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3737 if (RB_EMPTY_ROOT(&rec->holes))
3738 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3740 list_del(&orphan->list);
3743 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749 struct btrfs_root *root,
3750 struct btrfs_path *path,
3751 struct inode_record *rec)
3753 struct rb_node *node;
3754 struct file_extent_hole *hole;
3758 node = rb_first(&rec->holes);
3762 hole = rb_entry(node, struct file_extent_hole, node);
3763 ret = btrfs_punch_hole(trans, root, rec->ino,
3764 hole->start, hole->len);
3767 ret = del_file_extent_hole(&rec->holes, hole->start,
3771 if (RB_EMPTY_ROOT(&rec->holes))
3772 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773 node = rb_first(&rec->holes);
3775 /* special case for a file losing all its file extent */
3777 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778 round_up(rec->isize,
3779 root->fs_info->sectorsize));
3783 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784 rec->ino, root->objectid);
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3791 struct btrfs_trans_handle *trans;
3792 struct btrfs_path path;
3795 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796 I_ERR_NO_ORPHAN_ITEM |
3797 I_ERR_LINK_COUNT_WRONG |
3798 I_ERR_NO_INODE_ITEM |
3799 I_ERR_FILE_EXTENT_ORPHAN |
3800 I_ERR_FILE_EXTENT_DISCOUNT|
3801 I_ERR_FILE_NBYTES_WRONG)))
3805 * For nlink repair, it may create a dir and add link, so
3806 * 2 for parent(256)'s dir_index and dir_item
3807 * 2 for lost+found dir's inode_item and inode_ref
3808 * 1 for the new inode_ref of the file
3809 * 2 for lost+found dir's dir_index and dir_item for the file
3811 trans = btrfs_start_transaction(root, 7);
3813 return PTR_ERR(trans);
3815 btrfs_init_path(&path);
3816 if (rec->errors & I_ERR_NO_INODE_ITEM)
3817 ret = repair_inode_no_item(trans, root, &path, rec);
3818 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823 ret = repair_inode_isize(trans, root, &path, rec);
3824 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827 ret = repair_inode_nlinks(trans, root, &path, rec);
3828 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829 ret = repair_inode_nbytes(trans, root, &path, rec);
3830 btrfs_commit_transaction(trans, root);
3831 btrfs_release_path(&path);
3835 static int check_inode_recs(struct btrfs_root *root,
3836 struct cache_tree *inode_cache)
3838 struct cache_extent *cache;
3839 struct ptr_node *node;
3840 struct inode_record *rec;
3841 struct inode_backref *backref;
3846 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3848 if (btrfs_root_refs(&root->root_item) == 0) {
3849 if (!cache_tree_empty(inode_cache))
3850 fprintf(stderr, "warning line %d\n", __LINE__);
3855 * We need to repair backrefs first because we could change some of the
3856 * errors in the inode recs.
3858 * We also need to go through and delete invalid backrefs first and then
3859 * add the correct ones second. We do this because we may get EEXIST
3860 * when adding back the correct index because we hadn't yet deleted the
3863 * For example, if we were missing a dir index then the directories
3864 * isize would be wrong, so if we fixed the isize to what we thought it
3865 * would be and then fixed the backref we'd still have a invalid fs, so
3866 * we need to add back the dir index and then check to see if the isize
3871 if (stage == 3 && !err)
3874 cache = search_cache_extent(inode_cache, 0);
3875 while (repair && cache) {
3876 node = container_of(cache, struct ptr_node, cache);
3878 cache = next_cache_extent(cache);
3880 /* Need to free everything up and rescan */
3882 remove_cache_extent(inode_cache, &node->cache);
3884 free_inode_rec(rec);
3888 if (list_empty(&rec->backrefs))
3891 ret = repair_inode_backrefs(root, rec, inode_cache,
3905 rec = get_inode_rec(inode_cache, root_dirid, 0);
3906 BUG_ON(IS_ERR(rec));
3908 ret = check_root_dir(rec);
3910 fprintf(stderr, "root %llu root dir %llu error\n",
3911 (unsigned long long)root->root_key.objectid,
3912 (unsigned long long)root_dirid);
3913 print_inode_error(root, rec);
3918 struct btrfs_trans_handle *trans;
3920 trans = btrfs_start_transaction(root, 1);
3921 if (IS_ERR(trans)) {
3922 err = PTR_ERR(trans);
3927 "root %llu missing its root dir, recreating\n",
3928 (unsigned long long)root->objectid);
3930 ret = btrfs_make_root_dir(trans, root, root_dirid);
3933 btrfs_commit_transaction(trans, root);
3937 fprintf(stderr, "root %llu root dir %llu not found\n",
3938 (unsigned long long)root->root_key.objectid,
3939 (unsigned long long)root_dirid);
3943 cache = search_cache_extent(inode_cache, 0);
3946 node = container_of(cache, struct ptr_node, cache);
3948 remove_cache_extent(inode_cache, &node->cache);
3950 if (rec->ino == root_dirid ||
3951 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952 free_inode_rec(rec);
3956 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957 ret = check_orphan_item(root, rec->ino);
3959 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960 if (can_free_inode_rec(rec)) {
3961 free_inode_rec(rec);
3966 if (!rec->found_inode_item)
3967 rec->errors |= I_ERR_NO_INODE_ITEM;
3968 if (rec->found_link != rec->nlink)
3969 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3971 ret = try_repair_inode(root, rec);
3972 if (ret == 0 && can_free_inode_rec(rec)) {
3973 free_inode_rec(rec);
3979 if (!(repair && ret == 0))
3981 print_inode_error(root, rec);
3982 list_for_each_entry(backref, &rec->backrefs, list) {
3983 if (!backref->found_dir_item)
3984 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985 if (!backref->found_dir_index)
3986 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987 if (!backref->found_inode_ref)
3988 backref->errors |= REF_ERR_NO_INODE_REF;
3989 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990 " namelen %u name %s filetype %d errors %x",
3991 (unsigned long long)backref->dir,
3992 (unsigned long long)backref->index,
3993 backref->namelen, backref->name,
3994 backref->filetype, backref->errors);
3995 print_ref_error(backref->errors);
3997 free_inode_rec(rec);
3999 return (error > 0) ? -1 : 0;
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4005 struct cache_extent *cache;
4006 struct root_record *rec = NULL;
4009 cache = lookup_cache_extent(root_cache, objectid, 1);
4011 rec = container_of(cache, struct root_record, cache);
4013 rec = calloc(1, sizeof(*rec));
4015 return ERR_PTR(-ENOMEM);
4016 rec->objectid = objectid;
4017 INIT_LIST_HEAD(&rec->backrefs);
4018 rec->cache.start = objectid;
4019 rec->cache.size = 1;
4021 ret = insert_cache_extent(root_cache, &rec->cache);
4023 return ERR_PTR(-EEXIST);
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029 u64 ref_root, u64 dir, u64 index,
4030 const char *name, int namelen)
4032 struct root_backref *backref;
4034 list_for_each_entry(backref, &rec->backrefs, list) {
4035 if (backref->ref_root != ref_root || backref->dir != dir ||
4036 backref->namelen != namelen)
4038 if (memcmp(name, backref->name, namelen))
4043 backref = calloc(1, sizeof(*backref) + namelen + 1);
4046 backref->ref_root = ref_root;
4048 backref->index = index;
4049 backref->namelen = namelen;
4050 memcpy(backref->name, name, namelen);
4051 backref->name[namelen] = '\0';
4052 list_add_tail(&backref->list, &rec->backrefs);
4056 static void free_root_record(struct cache_extent *cache)
4058 struct root_record *rec;
4059 struct root_backref *backref;
4061 rec = container_of(cache, struct root_record, cache);
4062 while (!list_empty(&rec->backrefs)) {
4063 backref = to_root_backref(rec->backrefs.next);
4064 list_del(&backref->list);
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4073 static int add_root_backref(struct cache_tree *root_cache,
4074 u64 root_id, u64 ref_root, u64 dir, u64 index,
4075 const char *name, int namelen,
4076 int item_type, int errors)
4078 struct root_record *rec;
4079 struct root_backref *backref;
4081 rec = get_root_rec(root_cache, root_id);
4082 BUG_ON(IS_ERR(rec));
4083 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4086 backref->errors |= errors;
4088 if (item_type != BTRFS_DIR_ITEM_KEY) {
4089 if (backref->found_dir_index || backref->found_back_ref ||
4090 backref->found_forward_ref) {
4091 if (backref->index != index)
4092 backref->errors |= REF_ERR_INDEX_UNMATCH;
4094 backref->index = index;
4098 if (item_type == BTRFS_DIR_ITEM_KEY) {
4099 if (backref->found_forward_ref)
4101 backref->found_dir_item = 1;
4102 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103 backref->found_dir_index = 1;
4104 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105 if (backref->found_forward_ref)
4106 backref->errors |= REF_ERR_DUP_ROOT_REF;
4107 else if (backref->found_dir_item)
4109 backref->found_forward_ref = 1;
4110 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111 if (backref->found_back_ref)
4112 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113 backref->found_back_ref = 1;
4118 if (backref->found_forward_ref && backref->found_dir_item)
4119 backref->reachable = 1;
4123 static int merge_root_recs(struct btrfs_root *root,
4124 struct cache_tree *src_cache,
4125 struct cache_tree *dst_cache)
4127 struct cache_extent *cache;
4128 struct ptr_node *node;
4129 struct inode_record *rec;
4130 struct inode_backref *backref;
4133 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134 free_inode_recs_tree(src_cache);
4139 cache = search_cache_extent(src_cache, 0);
4142 node = container_of(cache, struct ptr_node, cache);
4144 remove_cache_extent(src_cache, &node->cache);
4147 ret = is_child_root(root, root->objectid, rec->ino);
4153 list_for_each_entry(backref, &rec->backrefs, list) {
4154 BUG_ON(backref->found_inode_ref);
4155 if (backref->found_dir_item)
4156 add_root_backref(dst_cache, rec->ino,
4157 root->root_key.objectid, backref->dir,
4158 backref->index, backref->name,
4159 backref->namelen, BTRFS_DIR_ITEM_KEY,
4161 if (backref->found_dir_index)
4162 add_root_backref(dst_cache, rec->ino,
4163 root->root_key.objectid, backref->dir,
4164 backref->index, backref->name,
4165 backref->namelen, BTRFS_DIR_INDEX_KEY,
4169 free_inode_rec(rec);
4176 static int check_root_refs(struct btrfs_root *root,
4177 struct cache_tree *root_cache)
4179 struct root_record *rec;
4180 struct root_record *ref_root;
4181 struct root_backref *backref;
4182 struct cache_extent *cache;
4188 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189 BUG_ON(IS_ERR(rec));
4192 /* fixme: this can not detect circular references */
4195 cache = search_cache_extent(root_cache, 0);
4199 rec = container_of(cache, struct root_record, cache);
4200 cache = next_cache_extent(cache);
4202 if (rec->found_ref == 0)
4205 list_for_each_entry(backref, &rec->backrefs, list) {
4206 if (!backref->reachable)
4209 ref_root = get_root_rec(root_cache,
4211 BUG_ON(IS_ERR(ref_root));
4212 if (ref_root->found_ref > 0)
4215 backref->reachable = 0;
4217 if (rec->found_ref == 0)
4223 cache = search_cache_extent(root_cache, 0);
4227 rec = container_of(cache, struct root_record, cache);
4228 cache = next_cache_extent(cache);
4230 if (rec->found_ref == 0 &&
4231 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233 ret = check_orphan_item(root->fs_info->tree_root,
4239 * If we don't have a root item then we likely just have
4240 * a dir item in a snapshot for this root but no actual
4241 * ref key or anything so it's meaningless.
4243 if (!rec->found_root_item)
4246 fprintf(stderr, "fs tree %llu not referenced\n",
4247 (unsigned long long)rec->objectid);
4251 if (rec->found_ref > 0 && !rec->found_root_item)
4253 list_for_each_entry(backref, &rec->backrefs, list) {
4254 if (!backref->found_dir_item)
4255 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256 if (!backref->found_dir_index)
4257 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258 if (!backref->found_back_ref)
4259 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260 if (!backref->found_forward_ref)
4261 backref->errors |= REF_ERR_NO_ROOT_REF;
4262 if (backref->reachable && backref->errors)
4269 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270 (unsigned long long)rec->objectid, rec->found_ref,
4271 rec->found_root_item ? "" : "not found");
4273 list_for_each_entry(backref, &rec->backrefs, list) {
4274 if (!backref->reachable)
4276 if (!backref->errors && rec->found_root_item)
4278 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279 " index %llu namelen %u name %s errors %x\n",
4280 (unsigned long long)backref->ref_root,
4281 (unsigned long long)backref->dir,
4282 (unsigned long long)backref->index,
4283 backref->namelen, backref->name,
4285 print_ref_error(backref->errors);
4288 return errors > 0 ? 1 : 0;
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292 struct btrfs_key *key,
4293 struct cache_tree *root_cache)
4299 struct btrfs_root_ref *ref;
4300 char namebuf[BTRFS_NAME_LEN];
4303 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4305 dirid = btrfs_root_ref_dirid(eb, ref);
4306 index = btrfs_root_ref_sequence(eb, ref);
4307 name_len = btrfs_root_ref_name_len(eb, ref);
4309 if (name_len <= BTRFS_NAME_LEN) {
4313 len = BTRFS_NAME_LEN;
4314 error = REF_ERR_NAME_TOO_LONG;
4316 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4318 if (key->type == BTRFS_ROOT_REF_KEY) {
4319 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320 index, namebuf, len, key->type, error);
4322 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323 index, namebuf, len, key->type, error);
4328 static void free_corrupt_block(struct cache_extent *cache)
4330 struct btrfs_corrupt_block *corrupt;
4332 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4339 * Repair the btree of the given root.
4341 * The fix is to remove the node key in corrupt_blocks cache_tree.
4342 * and rebalance the tree.
4343 * After the fix, the btree should be writeable.
4345 static int repair_btree(struct btrfs_root *root,
4346 struct cache_tree *corrupt_blocks)
4348 struct btrfs_trans_handle *trans;
4349 struct btrfs_path path;
4350 struct btrfs_corrupt_block *corrupt;
4351 struct cache_extent *cache;
4352 struct btrfs_key key;
4357 if (cache_tree_empty(corrupt_blocks))
4360 trans = btrfs_start_transaction(root, 1);
4361 if (IS_ERR(trans)) {
4362 ret = PTR_ERR(trans);
4363 fprintf(stderr, "Error starting transaction: %s\n",
4367 btrfs_init_path(&path);
4368 cache = first_cache_extent(corrupt_blocks);
4370 corrupt = container_of(cache, struct btrfs_corrupt_block,
4372 level = corrupt->level;
4373 path.lowest_level = level;
4374 key.objectid = corrupt->key.objectid;
4375 key.type = corrupt->key.type;
4376 key.offset = corrupt->key.offset;
4379 * Here we don't want to do any tree balance, since it may
4380 * cause a balance with corrupted brother leaf/node,
4381 * so ins_len set to 0 here.
4382 * Balance will be done after all corrupt node/leaf is deleted.
4384 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4387 offset = btrfs_node_blockptr(path.nodes[level],
4390 /* Remove the ptr */
4391 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4395 * Remove the corresponding extent
4396 * return value is not concerned.
4398 btrfs_release_path(&path);
4399 ret = btrfs_free_extent(trans, root, offset,
4400 root->fs_info->nodesize, 0,
4401 root->root_key.objectid, level - 1, 0);
4402 cache = next_cache_extent(cache);
4405 /* Balance the btree using btrfs_search_slot() */
4406 cache = first_cache_extent(corrupt_blocks);
4408 corrupt = container_of(cache, struct btrfs_corrupt_block,
4410 memcpy(&key, &corrupt->key, sizeof(key));
4411 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4414 /* return will always >0 since it won't find the item */
4416 btrfs_release_path(&path);
4417 cache = next_cache_extent(cache);
4420 btrfs_commit_transaction(trans, root);
4421 btrfs_release_path(&path);
4425 static int check_fs_root(struct btrfs_root *root,
4426 struct cache_tree *root_cache,
4427 struct walk_control *wc)
4433 struct btrfs_path path;
4434 struct shared_node root_node;
4435 struct root_record *rec;
4436 struct btrfs_root_item *root_item = &root->root_item;
4437 struct cache_tree corrupt_blocks;
4438 struct orphan_data_extent *orphan;
4439 struct orphan_data_extent *tmp;
4440 enum btrfs_tree_block_status status;
4441 struct node_refs nrefs;
4444 * Reuse the corrupt_block cache tree to record corrupted tree block
4446 * Unlike the usage in extent tree check, here we do it in a per
4447 * fs/subvol tree base.
4449 cache_tree_init(&corrupt_blocks);
4450 root->fs_info->corrupt_blocks = &corrupt_blocks;
4452 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453 rec = get_root_rec(root_cache, root->root_key.objectid);
4454 BUG_ON(IS_ERR(rec));
4455 if (btrfs_root_refs(root_item) > 0)
4456 rec->found_root_item = 1;
4459 btrfs_init_path(&path);
4460 memset(&root_node, 0, sizeof(root_node));
4461 cache_tree_init(&root_node.root_cache);
4462 cache_tree_init(&root_node.inode_cache);
4463 memset(&nrefs, 0, sizeof(nrefs));
4465 /* Move the orphan extent record to corresponding inode_record */
4466 list_for_each_entry_safe(orphan, tmp,
4467 &root->orphan_data_extents, list) {
4468 struct inode_record *inode;
4470 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4472 BUG_ON(IS_ERR(inode));
4473 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474 list_move(&orphan->list, &inode->orphan_extents);
4477 level = btrfs_header_level(root->node);
4478 memset(wc->nodes, 0, sizeof(wc->nodes));
4479 wc->nodes[level] = &root_node;
4480 wc->active_node = level;
4481 wc->root_level = level;
4483 /* We may not have checked the root block, lets do that now */
4484 if (btrfs_is_leaf(root->node))
4485 status = btrfs_check_leaf(root, NULL, root->node);
4487 status = btrfs_check_node(root, NULL, root->node);
4488 if (status != BTRFS_TREE_BLOCK_CLEAN)
4491 if (btrfs_root_refs(root_item) > 0 ||
4492 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493 path.nodes[level] = root->node;
4494 extent_buffer_get(root->node);
4495 path.slots[level] = 0;
4497 struct btrfs_key key;
4498 struct btrfs_disk_key found_key;
4500 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501 level = root_item->drop_level;
4502 path.lowest_level = level;
4503 if (level > btrfs_header_level(root->node) ||
4504 level >= BTRFS_MAX_LEVEL) {
4505 error("ignoring invalid drop level: %u", level);
4508 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4511 btrfs_node_key(path.nodes[level], &found_key,
4513 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514 sizeof(found_key)));
4518 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4524 wret = walk_up_tree(root, &path, wc, &level);
4531 btrfs_release_path(&path);
4533 if (!cache_tree_empty(&corrupt_blocks)) {
4534 struct cache_extent *cache;
4535 struct btrfs_corrupt_block *corrupt;
4537 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538 root->root_key.objectid);
4539 cache = first_cache_extent(&corrupt_blocks);
4541 corrupt = container_of(cache,
4542 struct btrfs_corrupt_block,
4544 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545 cache->start, corrupt->level,
4546 corrupt->key.objectid, corrupt->key.type,
4547 corrupt->key.offset);
4548 cache = next_cache_extent(cache);
4551 printf("Try to repair the btree for root %llu\n",
4552 root->root_key.objectid);
4553 ret = repair_btree(root, &corrupt_blocks);
4555 fprintf(stderr, "Failed to repair btree: %s\n",
4558 printf("Btree for root %llu is fixed\n",
4559 root->root_key.objectid);
4563 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4567 if (root_node.current) {
4568 root_node.current->checked = 1;
4569 maybe_free_inode_rec(&root_node.inode_cache,
4573 err = check_inode_recs(root, &root_node.inode_cache);
4577 free_corrupt_blocks_tree(&corrupt_blocks);
4578 root->fs_info->corrupt_blocks = NULL;
4579 free_orphan_data_extents(&root->orphan_data_extents);
4583 static int fs_root_objectid(u64 objectid)
4585 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4588 return is_fstree(objectid);
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592 struct cache_tree *root_cache)
4594 struct btrfs_path path;
4595 struct btrfs_key key;
4596 struct walk_control wc;
4597 struct extent_buffer *leaf, *tree_node;
4598 struct btrfs_root *tmp_root;
4599 struct btrfs_root *tree_root = fs_info->tree_root;
4603 if (ctx.progress_enabled) {
4604 ctx.tp = TASK_FS_ROOTS;
4605 task_start(ctx.info);
4609 * Just in case we made any changes to the extent tree that weren't
4610 * reflected into the free space cache yet.
4613 reset_cached_block_groups(fs_info);
4614 memset(&wc, 0, sizeof(wc));
4615 cache_tree_init(&wc.shared);
4616 btrfs_init_path(&path);
4621 key.type = BTRFS_ROOT_ITEM_KEY;
4622 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4627 tree_node = tree_root->node;
4629 if (tree_node != tree_root->node) {
4630 free_root_recs_tree(root_cache);
4631 btrfs_release_path(&path);
4634 leaf = path.nodes[0];
4635 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636 ret = btrfs_next_leaf(tree_root, &path);
4642 leaf = path.nodes[0];
4644 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646 fs_root_objectid(key.objectid)) {
4647 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648 tmp_root = btrfs_read_fs_root_no_cache(
4651 key.offset = (u64)-1;
4652 tmp_root = btrfs_read_fs_root(
4655 if (IS_ERR(tmp_root)) {
4659 ret = check_fs_root(tmp_root, root_cache, &wc);
4660 if (ret == -EAGAIN) {
4661 free_root_recs_tree(root_cache);
4662 btrfs_release_path(&path);
4667 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668 btrfs_free_fs_root(tmp_root);
4669 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670 key.type == BTRFS_ROOT_BACKREF_KEY) {
4671 process_root_ref(leaf, path.slots[0], &key,
4678 btrfs_release_path(&path);
4680 free_extent_cache_tree(&wc.shared);
4681 if (!cache_tree_empty(&wc.shared))
4682 fprintf(stderr, "warning line %d\n", __LINE__);
4684 task_stop(ctx.info);
4690 * Find the @index according by @ino and name.
4691 * Notice:time efficiency is O(N)
4693 * @root: the root of the fs/file tree
4694 * @index_ret: the index as return value
4695 * @namebuf: the name to match
4696 * @name_len: the length of name to match
4697 * @file_type: the file_type of INODE_ITEM to match
4699 * Returns 0 if found and *@index_ret will be modified with right value
4700 * Returns< 0 not found and *@index_ret will be (u64)-1
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703 u64 *index_ret, char *namebuf, u32 name_len,
4706 struct btrfs_path path;
4707 struct extent_buffer *node;
4708 struct btrfs_dir_item *di;
4709 struct btrfs_key key;
4710 struct btrfs_key location;
4711 char name[BTRFS_NAME_LEN] = {0};
4723 /* search from the last index */
4724 key.objectid = dirid;
4725 key.offset = (u64)-1;
4726 key.type = BTRFS_DIR_INDEX_KEY;
4728 btrfs_init_path(&path);
4729 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4734 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4737 *index_ret = (64)-1;
4740 /* Check whether inode_id/filetype/name match */
4741 node = path.nodes[0];
4742 slot = path.slots[0];
4743 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744 total = btrfs_item_size_nr(node, slot);
4745 while (cur < total) {
4747 len = btrfs_dir_name_len(node, di);
4748 data_len = btrfs_dir_data_len(node, di);
4750 btrfs_dir_item_key_to_cpu(node, di, &location);
4751 if (location.objectid != location_id ||
4752 location.type != BTRFS_INODE_ITEM_KEY ||
4753 location.offset != 0)
4756 filetype = btrfs_dir_type(node, di);
4757 if (file_type != filetype)
4760 if (len > BTRFS_NAME_LEN)
4761 len = BTRFS_NAME_LEN;
4763 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764 if (len != name_len || strncmp(namebuf, name, len))
4767 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768 *index_ret = key.offset;
4772 len += sizeof(*di) + data_len;
4773 di = (struct btrfs_dir_item *)((char *)di + len);
4779 btrfs_release_path(&path);
4784 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785 * INODE_REF/INODE_EXTREF match.
4787 * @root: the root of the fs/file tree
4788 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789 * value while find index
4790 * @location_key: location key of the struct btrfs_dir_item to match
4791 * @name: the name to match
4792 * @namelen: the length of name
4793 * @file_type: the type of file to math
4795 * Return 0 if no error occurred.
4796 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797 * DIR_ITEM/DIR_INDEX
4798 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799 * and DIR_ITEM/DIR_INDEX mismatch
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802 struct btrfs_key *location_key, char *name,
4803 u32 namelen, u8 file_type)
4805 struct btrfs_path path;
4806 struct extent_buffer *node;
4807 struct btrfs_dir_item *di;
4808 struct btrfs_key location;
4809 char namebuf[BTRFS_NAME_LEN] = {0};
4818 /* get the index by traversing all index */
4819 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820 ret = find_dir_index(root, key->objectid,
4821 location_key->objectid, &key->offset,
4822 name, namelen, file_type);
4824 ret = DIR_INDEX_MISSING;
4828 btrfs_init_path(&path);
4829 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4831 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4836 /* Check whether inode_id/filetype/name match */
4837 node = path.nodes[0];
4838 slot = path.slots[0];
4839 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840 total = btrfs_item_size_nr(node, slot);
4841 while (cur < total) {
4842 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4845 len = btrfs_dir_name_len(node, di);
4846 data_len = btrfs_dir_data_len(node, di);
4848 btrfs_dir_item_key_to_cpu(node, di, &location);
4849 if (location.objectid != location_key->objectid ||
4850 location.type != location_key->type ||
4851 location.offset != location_key->offset)
4854 filetype = btrfs_dir_type(node, di);
4855 if (file_type != filetype)
4858 if (len > BTRFS_NAME_LEN) {
4859 len = BTRFS_NAME_LEN;
4860 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4862 key->type == BTRFS_DIR_ITEM_KEY ?
4863 "DIR_ITEM" : "DIR_INDEX",
4864 key->objectid, key->offset, len);
4866 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4868 if (len != namelen || strncmp(namebuf, name, len))
4874 len += sizeof(*di) + data_len;
4875 di = (struct btrfs_dir_item *)((char *)di + len);
4880 btrfs_release_path(&path);
4885 * Prints inode ref error message
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888 u64 index, const char *namebuf, int name_len,
4889 u8 filetype, int err)
4894 /* root dir error */
4895 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4897 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898 root->objectid, key->objectid, key->offset, namebuf);
4903 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905 root->objectid, key->offset,
4906 btrfs_name_hash(namebuf, name_len),
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4909 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911 root->objectid, key->offset, index,
4912 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4917 * Insert the missing inode item.
4919 * Returns 0 means success.
4920 * Returns <0 means error.
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4925 struct btrfs_key key;
4926 struct btrfs_trans_handle *trans;
4927 struct btrfs_path path;
4931 key.type = BTRFS_INODE_ITEM_KEY;
4934 btrfs_init_path(&path);
4935 trans = btrfs_start_transaction(root, 1);
4936 if (IS_ERR(trans)) {
4941 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942 if (ret < 0 || !ret)
4945 /* insert inode item */
4946 create_inode_item_lowmem(trans, root, ino, filetype);
4949 btrfs_commit_transaction(trans, root);
4952 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953 root->objectid, ino);
4954 btrfs_release_path(&path);
4959 * The ternary means dir item, dir index and relative inode ref.
4960 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4963 * If two of three is missing or mismatched, delete the existing one.
4964 * If one of three is missing or mismatched, add the missing one.
4966 * returns 0 means success.
4967 * returns not 0 means on error;
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970 u64 index, char *name, int name_len, u8 filetype,
4973 struct btrfs_trans_handle *trans;
4978 * stage shall be one of following valild values:
4979 * 0: Fine, nothing to do.
4980 * 1: One of three is wrong, so add missing one.
4981 * 2: Two of three is wrong, so delete existed one.
4983 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4985 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4987 if (err & (INODE_REF_MISSING))
4990 /* stage must be smllarer than 3 */
4993 trans = btrfs_start_transaction(root, 1);
4995 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
5000 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001 filetype, &index, 1, 1);
5005 btrfs_commit_transaction(trans, root);
5008 error("fail to repair inode %llu name %s filetype %u",
5009 ino, name, filetype);
5011 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012 stage == 2 ? "Delete" : "Add",
5013 ino, name, filetype);
5019 * Traverse the given INODE_REF and call find_dir_item() to find related
5020 * DIR_ITEM/DIR_INDEX.
5022 * @root: the root of the fs/file tree
5023 * @ref_key: the key of the INODE_REF
5024 * @path the path provides node and slot
5025 * @refs: the count of INODE_REF
5026 * @mode: the st_mode of INODE_ITEM
5027 * @name_ret: returns with the first ref's name
5028 * @name_len_ret: len of the name_ret
5030 * Return 0 if no error occurred.
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033 struct btrfs_path *path, char *name_ret,
5034 u32 *namelen_ret, u64 *refs_ret, int mode)
5036 struct btrfs_key key;
5037 struct btrfs_key location;
5038 struct btrfs_inode_ref *ref;
5039 struct extent_buffer *node;
5040 char namebuf[BTRFS_NAME_LEN] = {0};
5050 int need_research = 0;
5058 /* since after repair, path and the dir item may be changed */
5059 if (need_research) {
5061 btrfs_release_path(path);
5062 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063 /* the item was deleted, let path point to the last checked item */
5065 if (path->slots[0] == 0)
5066 btrfs_prev_leaf(root, path);
5074 location.objectid = ref_key->objectid;
5075 location.type = BTRFS_INODE_ITEM_KEY;
5076 location.offset = 0;
5077 node = path->nodes[0];
5078 slot = path->slots[0];
5080 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082 total = btrfs_item_size_nr(node, slot);
5085 /* Update inode ref count */
5088 index = btrfs_inode_ref_index(node, ref);
5089 name_len = btrfs_inode_ref_name_len(node, ref);
5091 if (name_len <= BTRFS_NAME_LEN) {
5094 len = BTRFS_NAME_LEN;
5095 warning("root %llu INODE_REF[%llu %llu] name too long",
5096 root->objectid, ref_key->objectid, ref_key->offset);
5099 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5101 /* copy the first name found to name_ret */
5102 if (refs == 1 && name_ret) {
5103 memcpy(name_ret, namebuf, len);
5107 /* Check root dir ref */
5108 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109 if (index != 0 || len != strlen("..") ||
5110 strncmp("..", namebuf, len) ||
5111 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112 /* set err bits then repair will delete the ref */
5113 err |= DIR_INDEX_MISSING;
5114 err |= DIR_ITEM_MISSING;
5119 /* Find related DIR_INDEX */
5120 key.objectid = ref_key->offset;
5121 key.type = BTRFS_DIR_INDEX_KEY;
5123 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124 imode_to_type(mode));
5126 /* Find related dir_item */
5127 key.objectid = ref_key->offset;
5128 key.type = BTRFS_DIR_ITEM_KEY;
5129 key.offset = btrfs_name_hash(namebuf, len);
5130 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131 imode_to_type(mode));
5133 if (tmp_err && repair) {
5134 ret = repair_ternary_lowmem(root, ref_key->offset,
5135 ref_key->objectid, index, namebuf,
5136 name_len, imode_to_type(mode),
5143 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144 imode_to_type(mode), tmp_err);
5146 len = sizeof(*ref) + name_len;
5147 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5158 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159 * DIR_ITEM/DIR_INDEX.
5161 * @root: the root of the fs/file tree
5162 * @ref_key: the key of the INODE_EXTREF
5163 * @refs: the count of INODE_EXTREF
5164 * @mode: the st_mode of INODE_ITEM
5166 * Return 0 if no error occurred.
5168 static int check_inode_extref(struct btrfs_root *root,
5169 struct btrfs_key *ref_key,
5170 struct extent_buffer *node, int slot, u64 *refs,
5173 struct btrfs_key key;
5174 struct btrfs_key location;
5175 struct btrfs_inode_extref *extref;
5176 char namebuf[BTRFS_NAME_LEN] = {0};
5186 location.objectid = ref_key->objectid;
5187 location.type = BTRFS_INODE_ITEM_KEY;
5188 location.offset = 0;
5190 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191 total = btrfs_item_size_nr(node, slot);
5194 /* update inode ref count */
5196 name_len = btrfs_inode_extref_name_len(node, extref);
5197 index = btrfs_inode_extref_index(node, extref);
5198 parent = btrfs_inode_extref_parent(node, extref);
5199 if (name_len <= BTRFS_NAME_LEN) {
5202 len = BTRFS_NAME_LEN;
5203 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204 root->objectid, ref_key->objectid, ref_key->offset);
5206 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5208 /* Check root dir ref name */
5209 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211 root->objectid, ref_key->objectid, ref_key->offset,
5213 err |= ROOT_DIR_ERROR;
5216 /* find related dir_index */
5217 key.objectid = parent;
5218 key.type = BTRFS_DIR_INDEX_KEY;
5220 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223 /* find related dir_item */
5224 key.objectid = parent;
5225 key.type = BTRFS_DIR_ITEM_KEY;
5226 key.offset = btrfs_name_hash(namebuf, len);
5227 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5230 len = sizeof(*extref) + name_len;
5231 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5241 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242 * DIR_ITEM/DIR_INDEX match.
5243 * Return with @index_ret.
5245 * @root: the root of the fs/file tree
5246 * @key: the key of the INODE_REF/INODE_EXTREF
5247 * @name: the name in the INODE_REF/INODE_EXTREF
5248 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5249 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5250 * value (64)-1 means do not check index
5251 * @ext_ref: the EXTENDED_IREF feature
5253 * Return 0 if no error occurred.
5254 * Return >0 for error bitmap
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257 char *name, int namelen, u64 *index_ret,
5258 unsigned int ext_ref)
5260 struct btrfs_path path;
5261 struct btrfs_inode_ref *ref;
5262 struct btrfs_inode_extref *extref;
5263 struct extent_buffer *node;
5264 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5277 btrfs_init_path(&path);
5278 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5280 ret = INODE_REF_MISSING;
5284 node = path.nodes[0];
5285 slot = path.slots[0];
5287 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288 total = btrfs_item_size_nr(node, slot);
5290 /* Iterate all entry of INODE_REF */
5291 while (cur < total) {
5292 ret = INODE_REF_MISSING;
5294 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295 ref_index = btrfs_inode_ref_index(node, ref);
5296 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5299 if (cur + sizeof(*ref) + ref_namelen > total ||
5300 ref_namelen > BTRFS_NAME_LEN) {
5301 warning("root %llu INODE %s[%llu %llu] name too long",
5303 key->type == BTRFS_INODE_REF_KEY ?
5305 key->objectid, key->offset);
5307 if (cur + sizeof(*ref) > total)
5309 len = min_t(u32, total - cur - sizeof(*ref),
5315 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5318 if (len != namelen || strncmp(ref_namebuf, name, len))
5321 *index_ret = ref_index;
5325 len = sizeof(*ref) + ref_namelen;
5326 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5331 /* Skip if not support EXTENDED_IREF feature */
5335 btrfs_release_path(&path);
5336 btrfs_init_path(&path);
5338 dir_id = key->offset;
5339 key->type = BTRFS_INODE_EXTREF_KEY;
5340 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5342 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5344 ret = INODE_REF_MISSING;
5348 node = path.nodes[0];
5349 slot = path.slots[0];
5351 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5353 total = btrfs_item_size_nr(node, slot);
5355 /* Iterate all entry of INODE_EXTREF */
5356 while (cur < total) {
5357 ret = INODE_REF_MISSING;
5359 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360 ref_index = btrfs_inode_extref_index(node, extref);
5361 parent = btrfs_inode_extref_parent(node, extref);
5362 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5365 if (parent != dir_id)
5368 if (ref_namelen <= BTRFS_NAME_LEN) {
5371 len = BTRFS_NAME_LEN;
5372 warning("root %llu INODE %s[%llu %llu] name too long",
5374 key->type == BTRFS_INODE_REF_KEY ?
5376 key->objectid, key->offset);
5378 read_extent_buffer(node, ref_namebuf,
5379 (unsigned long)(extref + 1), len);
5381 if (len != namelen || strncmp(ref_namebuf, name, len))
5384 *index_ret = ref_index;
5389 len = sizeof(*extref) + ref_namelen;
5390 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5395 btrfs_release_path(&path);
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400 u64 ino, u64 index, const char *namebuf,
5401 int name_len, u8 filetype, int err)
5403 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405 root->objectid, key->objectid, key->offset, namebuf,
5407 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5410 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412 root->objectid, key->objectid, index, namebuf, filetype,
5413 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5416 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5418 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419 root->objectid, ino, index, namebuf, filetype,
5420 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5423 if (err & INODE_REF_MISSING)
5425 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426 root->objectid, ino, key->objectid, namebuf, filetype);
5431 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5433 * Returns error after repair
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436 u64 index, u8 filetype, char *namebuf, u32 name_len,
5441 if (err & INODE_ITEM_MISSING) {
5442 ret = repair_inode_item_missing(root, ino, filetype);
5444 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5447 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449 name_len, filetype, err);
5451 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453 err &= ~(INODE_REF_MISSING);
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5462 struct btrfs_key key;
5463 struct btrfs_path path;
5465 struct btrfs_dir_item *di;
5475 key.offset = (u64)-1;
5477 btrfs_init_path(&path);
5478 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5483 /* if found, go to spacial case */
5488 ret = btrfs_previous_item(root, &path, ino, type);
5496 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5498 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5500 while (cur < total) {
5501 len = btrfs_dir_name_len(path.nodes[0], di);
5502 if (len > BTRFS_NAME_LEN)
5503 len = BTRFS_NAME_LEN;
5506 len += btrfs_dir_data_len(path.nodes[0], di);
5508 di = (struct btrfs_dir_item *)((char *)di + len);
5514 btrfs_release_path(&path);
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5525 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5529 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5533 *size = item_size + index_size;
5537 error("failed to count root %llu INODE[%llu] root size",
5538 root->objectid, ino);
5543 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5546 * @root: the root of the fs/file tree
5547 * @key: the key of the INODE_REF/INODE_EXTREF
5549 * @size: the st_size of the INODE_ITEM
5550 * @ext_ref: the EXTENDED_IREF feature
5552 * Return 0 if no error occurred.
5553 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556 struct btrfs_path *path, u64 *size,
5557 unsigned int ext_ref)
5559 struct btrfs_dir_item *di;
5560 struct btrfs_inode_item *ii;
5561 struct btrfs_key key;
5562 struct btrfs_key location;
5563 struct extent_buffer *node;
5565 char namebuf[BTRFS_NAME_LEN] = {0};
5577 int need_research = 0;
5580 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581 * ignore index check.
5583 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584 index = di_key->offset;
5591 /* since after repair, path and the dir item may be changed */
5592 if (need_research) {
5594 err |= DIR_COUNT_AGAIN;
5595 btrfs_release_path(path);
5596 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597 /* the item was deleted, let path point the last checked item */
5599 if (path->slots[0] == 0)
5600 btrfs_prev_leaf(root, path);
5608 node = path->nodes[0];
5609 slot = path->slots[0];
5611 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612 total = btrfs_item_size_nr(node, slot);
5613 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5615 while (cur < total) {
5616 data_len = btrfs_dir_data_len(node, di);
5619 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5621 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622 di_key->objectid, di_key->offset, data_len);
5624 name_len = btrfs_dir_name_len(node, di);
5625 if (name_len <= BTRFS_NAME_LEN) {
5628 len = BTRFS_NAME_LEN;
5629 warning("root %llu %s[%llu %llu] name too long",
5631 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632 di_key->objectid, di_key->offset);
5634 (*size) += name_len;
5635 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5637 filetype = btrfs_dir_type(node, di);
5639 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640 di_key->offset != btrfs_name_hash(namebuf, len)) {
5642 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643 root->objectid, di_key->objectid, di_key->offset,
5644 namebuf, len, filetype, di_key->offset,
5645 btrfs_name_hash(namebuf, len));
5648 btrfs_dir_item_key_to_cpu(node, di, &location);
5649 /* Ignore related ROOT_ITEM check */
5650 if (location.type == BTRFS_ROOT_ITEM_KEY)
5653 btrfs_release_path(path);
5654 /* Check relative INODE_ITEM(existence/filetype) */
5655 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5657 tmp_err |= INODE_ITEM_MISSING;
5661 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662 struct btrfs_inode_item);
5663 mode = btrfs_inode_mode(path->nodes[0], ii);
5664 if (imode_to_type(mode) != filetype) {
5665 tmp_err |= INODE_ITEM_MISMATCH;
5669 /* Check relative INODE_REF/INODE_EXTREF */
5670 key.objectid = location.objectid;
5671 key.type = BTRFS_INODE_REF_KEY;
5672 key.offset = di_key->objectid;
5673 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5676 /* check relative INDEX/ITEM */
5677 key.objectid = di_key->objectid;
5678 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679 key.type = BTRFS_DIR_INDEX_KEY;
5682 key.type = BTRFS_DIR_ITEM_KEY;
5683 key.offset = btrfs_name_hash(namebuf, name_len);
5686 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687 name_len, filetype);
5688 /* find_dir_item may find index */
5689 if (key.type == BTRFS_DIR_INDEX_KEY)
5693 if (tmp_err && repair) {
5694 ret = repair_dir_item(root, di_key->objectid,
5695 location.objectid, index,
5696 imode_to_type(mode), namebuf,
5698 if (ret != tmp_err) {
5703 btrfs_release_path(path);
5704 print_dir_item_err(root, di_key, location.objectid, index,
5705 namebuf, name_len, filetype, tmp_err);
5707 len = sizeof(*di) + name_len + data_len;
5708 di = (struct btrfs_dir_item *)((char *)di + len);
5711 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713 root->objectid, di_key->objectid,
5720 btrfs_release_path(path);
5721 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5723 err |= ret > 0 ? -ENOENT : ret;
5728 * Wrapper function of btrfs_punch_hole.
5730 * Returns 0 means success.
5731 * Returns not 0 means error.
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5736 struct btrfs_trans_handle *trans;
5739 trans = btrfs_start_transaction(root, 1);
5741 return PTR_ERR(trans);
5743 ret = btrfs_punch_hole(trans, root, ino, start, len);
5745 error("failed to add hole [%llu, %llu] in inode [%llu]",
5748 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5751 btrfs_commit_transaction(trans, root);
5756 * Check file extent datasum/hole, update the size of the file extents,
5757 * check and update the last offset of the file extent.
5759 * @root: the root of fs/file tree.
5760 * @fkey: the key of the file extent.
5761 * @nodatasum: INODE_NODATASUM feature.
5762 * @size: the sum of all EXTENT_DATA items size for this inode.
5763 * @end: the offset of the last extent.
5765 * Return 0 if no error occurred.
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768 struct extent_buffer *node, int slot,
5769 unsigned int nodatasum, u64 *size, u64 *end)
5771 struct btrfs_file_extent_item *fi;
5774 u64 extent_num_bytes;
5776 u64 csum_found; /* In byte size, sectorsize aligned */
5777 u64 search_start; /* Logical range start we search for csum */
5778 u64 search_len; /* Logical range len we search for csum */
5779 unsigned int extent_type;
5780 unsigned int is_hole;
5785 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5787 /* Check inline extent */
5788 extent_type = btrfs_file_extent_type(node, fi);
5789 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790 struct btrfs_item *e = btrfs_item_nr(slot);
5791 u32 item_inline_len;
5793 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795 compressed = btrfs_file_extent_compression(node, fi);
5796 if (extent_num_bytes == 0) {
5798 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799 root->objectid, fkey->objectid, fkey->offset);
5800 err |= FILE_EXTENT_ERROR;
5802 if (!compressed && extent_num_bytes != item_inline_len) {
5804 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805 root->objectid, fkey->objectid, fkey->offset,
5806 extent_num_bytes, item_inline_len);
5807 err |= FILE_EXTENT_ERROR;
5809 *end += extent_num_bytes;
5810 *size += extent_num_bytes;
5814 /* Check extent type */
5815 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817 err |= FILE_EXTENT_ERROR;
5818 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819 root->objectid, fkey->objectid, fkey->offset);
5823 /* Check REG_EXTENT/PREALLOC_EXTENT */
5824 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827 extent_offset = btrfs_file_extent_offset(node, fi);
5828 compressed = btrfs_file_extent_compression(node, fi);
5829 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5832 * Check EXTENT_DATA csum
5834 * For plain (uncompressed) extent, we should only check the range
5835 * we're referring to, as it's possible that part of prealloc extent
5836 * has been written, and has csum:
5838 * |<--- Original large preallocated extent A ---->|
5839 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5842 * For compressed extent, we should check the whole range.
5845 search_start = disk_bytenr + extent_offset;
5846 search_len = extent_num_bytes;
5848 search_start = disk_bytenr;
5849 search_len = disk_num_bytes;
5851 ret = count_csum_range(root, search_start, search_len, &csum_found);
5852 if (csum_found > 0 && nodatasum) {
5853 err |= ODD_CSUM_ITEM;
5854 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855 root->objectid, fkey->objectid, fkey->offset);
5856 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857 !is_hole && (ret < 0 || csum_found < search_len)) {
5858 err |= CSUM_ITEM_MISSING;
5859 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860 root->objectid, fkey->objectid, fkey->offset,
5861 csum_found, search_len);
5862 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863 err |= ODD_CSUM_ITEM;
5864 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865 root->objectid, fkey->objectid, fkey->offset, csum_found);
5868 /* Check EXTENT_DATA hole */
5869 if (!no_holes && *end != fkey->offset) {
5871 ret = punch_extent_hole(root, fkey->objectid,
5872 *end, fkey->offset - *end);
5873 if (!repair || ret) {
5874 err |= FILE_EXTENT_ERROR;
5876 "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
5877 root->objectid, fkey->objectid, fkey->offset, *end);
5881 *end += extent_num_bytes;
5883 *size += extent_num_bytes;
5889 * Set inode item nbytes to @nbytes
5891 * Returns 0 on success
5892 * Returns != 0 on error
5894 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5895 struct btrfs_path *path,
5896 u64 ino, u64 nbytes)
5898 struct btrfs_trans_handle *trans;
5899 struct btrfs_inode_item *ii;
5900 struct btrfs_key key;
5901 struct btrfs_key research_key;
5905 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5908 key.type = BTRFS_INODE_ITEM_KEY;
5911 trans = btrfs_start_transaction(root, 1);
5912 if (IS_ERR(trans)) {
5913 ret = PTR_ERR(trans);
5918 btrfs_release_path(path);
5919 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5927 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5928 struct btrfs_inode_item);
5929 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5930 btrfs_mark_buffer_dirty(path->nodes[0]);
5932 btrfs_commit_transaction(trans, root);
5935 error("failed to set nbytes in inode %llu root %llu",
5936 ino, root->root_key.objectid);
5938 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5939 root->root_key.objectid, nbytes);
5942 btrfs_release_path(path);
5943 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5950 * Set directory inode isize to @isize.
5952 * Returns 0 on success.
5953 * Returns != 0 on error.
5955 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5956 struct btrfs_path *path,
5959 struct btrfs_trans_handle *trans;
5960 struct btrfs_inode_item *ii;
5961 struct btrfs_key key;
5962 struct btrfs_key research_key;
5966 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5969 key.type = BTRFS_INODE_ITEM_KEY;
5972 trans = btrfs_start_transaction(root, 1);
5973 if (IS_ERR(trans)) {
5974 ret = PTR_ERR(trans);
5979 btrfs_release_path(path);
5980 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5988 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5989 struct btrfs_inode_item);
5990 btrfs_set_inode_size(path->nodes[0], ii, isize);
5991 btrfs_mark_buffer_dirty(path->nodes[0]);
5993 btrfs_commit_transaction(trans, root);
5996 error("failed to set isize in inode %llu root %llu",
5997 ino, root->root_key.objectid);
5999 printf("Set isize in inode %llu root %llu to %llu\n",
6000 ino, root->root_key.objectid, isize);
6002 btrfs_release_path(path);
6003 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6010 * Wrapper function for btrfs_add_orphan_item().
6012 * Returns 0 on success.
6013 * Returns != 0 on error.
6015 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6016 struct btrfs_path *path, u64 ino)
6018 struct btrfs_trans_handle *trans;
6019 struct btrfs_key research_key;
6023 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6025 trans = btrfs_start_transaction(root, 1);
6026 if (IS_ERR(trans)) {
6027 ret = PTR_ERR(trans);
6032 btrfs_release_path(path);
6033 ret = btrfs_add_orphan_item(trans, root, path, ino);
6035 btrfs_commit_transaction(trans, root);
6038 error("failed to add inode %llu as orphan item root %llu",
6039 ino, root->root_key.objectid);
6041 printf("Added inode %llu as orphan item root %llu\n",
6042 ino, root->root_key.objectid);
6044 btrfs_release_path(path);
6045 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6051 /* Set inode_item nlink to @ref_count.
6052 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6054 * Returns 0 on success
6056 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6057 struct btrfs_path *path, u64 ino,
6058 const char *name, u32 namelen,
6059 u64 ref_count, u8 filetype, u64 *nlink)
6061 struct btrfs_trans_handle *trans;
6062 struct btrfs_inode_item *ii;
6063 struct btrfs_key key;
6064 struct btrfs_key old_key;
6065 char namebuf[BTRFS_NAME_LEN] = {0};
6071 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6073 if (name && namelen) {
6074 ASSERT(namelen <= BTRFS_NAME_LEN);
6075 memcpy(namebuf, name, namelen);
6078 sprintf(namebuf, "%llu", ino);
6079 name_len = count_digits(ino);
6080 printf("Can't find file name for inode %llu, use %s instead\n",
6084 trans = btrfs_start_transaction(root, 1);
6085 if (IS_ERR(trans)) {
6086 ret = PTR_ERR(trans);
6090 btrfs_release_path(path);
6091 /* if refs is 0, put it into lostfound */
6092 if (ref_count == 0) {
6093 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6094 name_len, filetype, &ref_count);
6099 /* reset inode_item's nlink to ref_count */
6101 key.type = BTRFS_INODE_ITEM_KEY;
6104 btrfs_release_path(path);
6105 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6111 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6112 struct btrfs_inode_item);
6113 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6114 btrfs_mark_buffer_dirty(path->nodes[0]);
6119 btrfs_commit_transaction(trans, root);
6123 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6124 root->objectid, ino, namebuf, filetype);
6126 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6127 root->objectid, ino, namebuf, filetype);
6130 btrfs_release_path(path);
6131 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6138 * Check INODE_ITEM and related ITEMs (the same inode number)
6139 * 1. check link count
6140 * 2. check inode ref/extref
6141 * 3. check dir item/index
6143 * @ext_ref: the EXTENDED_IREF feature
6145 * Return 0 if no error occurred.
6146 * Return >0 for error or hit the traversal is done(by error bitmap)
6148 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6149 unsigned int ext_ref)
6151 struct extent_buffer *node;
6152 struct btrfs_inode_item *ii;
6153 struct btrfs_key key;
6154 struct btrfs_key last_key;
6163 u64 extent_size = 0;
6165 unsigned int nodatasum;
6169 char namebuf[BTRFS_NAME_LEN] = {0};
6172 node = path->nodes[0];
6173 slot = path->slots[0];
6175 btrfs_item_key_to_cpu(node, &key, slot);
6176 inode_id = key.objectid;
6178 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6179 ret = btrfs_next_item(root, path);
6185 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6186 isize = btrfs_inode_size(node, ii);
6187 nbytes = btrfs_inode_nbytes(node, ii);
6188 mode = btrfs_inode_mode(node, ii);
6189 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6190 nlink = btrfs_inode_nlink(node, ii);
6191 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6194 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6195 ret = btrfs_next_item(root, path);
6197 /* out will fill 'err' rusing current statistics */
6199 } else if (ret > 0) {
6204 node = path->nodes[0];
6205 slot = path->slots[0];
6206 btrfs_item_key_to_cpu(node, &key, slot);
6207 if (key.objectid != inode_id)
6211 case BTRFS_INODE_REF_KEY:
6212 ret = check_inode_ref(root, &key, path, namebuf,
6213 &name_len, &refs, mode);
6216 case BTRFS_INODE_EXTREF_KEY:
6217 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6218 warning("root %llu EXTREF[%llu %llu] isn't supported",
6219 root->objectid, key.objectid,
6221 ret = check_inode_extref(root, &key, node, slot, &refs,
6225 case BTRFS_DIR_ITEM_KEY:
6226 case BTRFS_DIR_INDEX_KEY:
6228 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6229 root->objectid, inode_id,
6230 imode_to_type(mode), key.objectid,
6233 ret = check_dir_item(root, &key, path, &size, ext_ref);
6236 case BTRFS_EXTENT_DATA_KEY:
6238 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6239 root->objectid, inode_id, key.objectid,
6242 ret = check_file_extent(root, &key, node, slot,
6243 nodatasum, &extent_size,
6247 case BTRFS_XATTR_ITEM_KEY:
6250 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6251 key.objectid, key.type, key.offset);
6256 if (err & LAST_ITEM) {
6257 btrfs_release_path(path);
6258 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6263 /* verify INODE_ITEM nlink/isize/nbytes */
6265 if (repair && (err & DIR_COUNT_AGAIN)) {
6266 err &= ~DIR_COUNT_AGAIN;
6267 count_dir_isize(root, inode_id, &size);
6270 if ((nlink != 1 || refs != 1) && repair) {
6271 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6272 namebuf, name_len, refs, imode_to_type(mode),
6277 err |= LINK_COUNT_ERROR;
6278 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6279 root->objectid, inode_id, nlink);
6283 * Just a warning, as dir inode nbytes is just an
6284 * instructive value.
6286 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6287 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6288 root->objectid, inode_id,
6289 root->fs_info->nodesize);
6292 if (isize != size) {
6294 ret = repair_dir_isize_lowmem(root, path,
6296 if (!repair || ret) {
6299 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6300 root->objectid, inode_id, isize, size);
6304 if (nlink != refs) {
6306 ret = repair_inode_nlinks_lowmem(root, path,
6307 inode_id, namebuf, name_len, refs,
6308 imode_to_type(mode), &nlink);
6309 if (!repair || ret) {
6310 err |= LINK_COUNT_ERROR;
6312 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6313 root->objectid, inode_id, nlink, refs);
6315 } else if (!nlink) {
6317 ret = repair_inode_orphan_item_lowmem(root,
6319 if (!repair || ret) {
6321 error("root %llu INODE[%llu] is orphan item",
6322 root->objectid, inode_id);
6326 if (!nbytes && !no_holes && extent_end < isize) {
6328 ret = punch_extent_hole(root, inode_id,
6329 extent_end, isize - extent_end);
6330 if (!repair || ret) {
6331 err |= NBYTES_ERROR;
6333 "root %llu INODE[%llu] size %llu should have a file extent hole",
6334 root->objectid, inode_id, isize);
6338 if (nbytes != extent_size) {
6340 ret = repair_inode_nbytes_lowmem(root, path,
6341 inode_id, extent_size);
6342 if (!repair || ret) {
6343 err |= NBYTES_ERROR;
6345 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6346 root->objectid, inode_id, nbytes,
6352 if (err & LAST_ITEM)
6353 btrfs_next_item(root, path);
6358 * Insert the missing inode item and inode ref.
6360 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6361 * Root dir should be handled specially because root dir is the root of fs.
6363 * returns err (>0 or 0) after repair
6365 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6367 struct btrfs_trans_handle *trans;
6368 struct btrfs_key key;
6369 struct btrfs_path path;
6370 int filetype = BTRFS_FT_DIR;
6373 btrfs_init_path(&path);
6375 if (err & INODE_REF_MISSING) {
6376 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6377 key.type = BTRFS_INODE_REF_KEY;
6378 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6380 trans = btrfs_start_transaction(root, 1);
6381 if (IS_ERR(trans)) {
6382 ret = PTR_ERR(trans);
6386 btrfs_release_path(&path);
6387 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6391 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6392 BTRFS_FIRST_FREE_OBJECTID,
6393 BTRFS_FIRST_FREE_OBJECTID, 0);
6397 printf("Add INODE_REF[%llu %llu] name %s\n",
6398 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6400 err &= ~INODE_REF_MISSING;
6403 error("fail to insert first inode's ref");
6404 btrfs_commit_transaction(trans, root);
6407 if (err & INODE_ITEM_MISSING) {
6408 ret = repair_inode_item_missing(root,
6409 BTRFS_FIRST_FREE_OBJECTID, filetype);
6412 err &= ~INODE_ITEM_MISSING;
6416 error("fail to repair first inode");
6417 btrfs_release_path(&path);
6422 * check first root dir's inode_item and inode_ref
6424 * returns 0 means no error
6425 * returns >0 means error
6426 * returns <0 means fatal error
6428 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6430 struct btrfs_path path;
6431 struct btrfs_key key;
6432 struct btrfs_inode_item *ii;
6438 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6439 key.type = BTRFS_INODE_ITEM_KEY;
6442 /* For root being dropped, we don't need to check first inode */
6443 if (btrfs_root_refs(&root->root_item) == 0 &&
6444 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6445 BTRFS_FIRST_FREE_OBJECTID)
6448 btrfs_init_path(&path);
6449 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6454 err |= INODE_ITEM_MISSING;
6456 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6457 struct btrfs_inode_item);
6458 mode = btrfs_inode_mode(path.nodes[0], ii);
6459 if (imode_to_type(mode) != BTRFS_FT_DIR)
6460 err |= INODE_ITEM_MISMATCH;
6463 /* lookup first inode ref */
6464 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6465 key.type = BTRFS_INODE_REF_KEY;
6466 /* special index value */
6469 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6475 btrfs_release_path(&path);
6478 err = repair_fs_first_inode(root, err);
6480 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6481 error("root dir INODE_ITEM is %s",
6482 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6483 if (err & INODE_REF_MISSING)
6484 error("root dir INODE_REF is missing");
6486 return ret < 0 ? ret : err;
6489 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6490 u64 parent, u64 root)
6492 struct rb_node *node;
6493 struct tree_backref *back = NULL;
6494 struct tree_backref match = {
6501 match.parent = parent;
6502 match.node.full_backref = 1;
6507 node = rb_search(&rec->backref_tree, &match.node.node,
6508 (rb_compare_keys)compare_extent_backref, NULL);
6510 back = to_tree_backref(rb_node_to_extent_backref(node));
6515 static struct data_backref *find_data_backref(struct extent_record *rec,
6516 u64 parent, u64 root,
6517 u64 owner, u64 offset,
6519 u64 disk_bytenr, u64 bytes)
6521 struct rb_node *node;
6522 struct data_backref *back = NULL;
6523 struct data_backref match = {
6530 .found_ref = found_ref,
6531 .disk_bytenr = disk_bytenr,
6535 match.parent = parent;
6536 match.node.full_backref = 1;
6541 node = rb_search(&rec->backref_tree, &match.node.node,
6542 (rb_compare_keys)compare_extent_backref, NULL);
6544 back = to_data_backref(rb_node_to_extent_backref(node));
6549 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6550 * blocks and integrity of fs tree items.
6552 * @root: the root of the tree to be checked.
6553 * @ext_ref feature EXTENDED_IREF is enable or not.
6554 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6555 * otherwise means check fs tree(s) items relationship and
6556 * @root MUST be a fs tree root.
6557 * Returns 0 represents OK.
6558 * Returns not 0 represents error.
6560 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6561 struct btrfs_root *root, unsigned int ext_ref,
6565 struct btrfs_path path;
6566 struct node_refs nrefs;
6567 struct btrfs_root_item *root_item = &root->root_item;
6572 memset(&nrefs, 0, sizeof(nrefs));
6575 * We need to manually check the first inode item (256)
6576 * As the following traversal function will only start from
6577 * the first inode item in the leaf, if inode item (256) is
6578 * missing we will skip it forever.
6580 ret = check_fs_first_inode(root, ext_ref);
6586 level = btrfs_header_level(root->node);
6587 btrfs_init_path(&path);
6589 if (btrfs_root_refs(root_item) > 0 ||
6590 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6591 path.nodes[level] = root->node;
6592 path.slots[level] = 0;
6593 extent_buffer_get(root->node);
6595 struct btrfs_key key;
6597 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6598 level = root_item->drop_level;
6599 path.lowest_level = level;
6600 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6607 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6608 ext_ref, check_all);
6612 /* if ret is negative, walk shall stop */
6618 ret = walk_up_tree_v2(root, &path, &level);
6620 /* Normal exit, reset ret to err */
6627 btrfs_release_path(&path);
6632 * Iterate all items in the tree and call check_inode_item() to check.
6634 * @root: the root of the tree to be checked.
6635 * @ext_ref: the EXTENDED_IREF feature
6637 * Return 0 if no error found.
6638 * Return <0 for error.
6640 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6642 reset_cached_block_groups(root->fs_info);
6643 return check_btrfs_root(NULL, root, ext_ref, 0);
6647 * Find the relative ref for root_ref and root_backref.
6649 * @root: the root of the root tree.
6650 * @ref_key: the key of the root ref.
6652 * Return 0 if no error occurred.
6654 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6655 struct extent_buffer *node, int slot)
6657 struct btrfs_path path;
6658 struct btrfs_key key;
6659 struct btrfs_root_ref *ref;
6660 struct btrfs_root_ref *backref;
6661 char ref_name[BTRFS_NAME_LEN] = {0};
6662 char backref_name[BTRFS_NAME_LEN] = {0};
6668 u32 backref_namelen;
6673 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6674 ref_dirid = btrfs_root_ref_dirid(node, ref);
6675 ref_seq = btrfs_root_ref_sequence(node, ref);
6676 ref_namelen = btrfs_root_ref_name_len(node, ref);
6678 if (ref_namelen <= BTRFS_NAME_LEN) {
6681 len = BTRFS_NAME_LEN;
6682 warning("%s[%llu %llu] ref_name too long",
6683 ref_key->type == BTRFS_ROOT_REF_KEY ?
6684 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6687 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6689 /* Find relative root_ref */
6690 key.objectid = ref_key->offset;
6691 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6692 key.offset = ref_key->objectid;
6694 btrfs_init_path(&path);
6695 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6697 err |= ROOT_REF_MISSING;
6698 error("%s[%llu %llu] couldn't find relative ref",
6699 ref_key->type == BTRFS_ROOT_REF_KEY ?
6700 "ROOT_REF" : "ROOT_BACKREF",
6701 ref_key->objectid, ref_key->offset);
6705 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6706 struct btrfs_root_ref);
6707 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6708 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6709 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6711 if (backref_namelen <= BTRFS_NAME_LEN) {
6712 len = backref_namelen;
6714 len = BTRFS_NAME_LEN;
6715 warning("%s[%llu %llu] ref_name too long",
6716 key.type == BTRFS_ROOT_REF_KEY ?
6717 "ROOT_REF" : "ROOT_BACKREF",
6718 key.objectid, key.offset);
6720 read_extent_buffer(path.nodes[0], backref_name,
6721 (unsigned long)(backref + 1), len);
6723 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6724 ref_namelen != backref_namelen ||
6725 strncmp(ref_name, backref_name, len)) {
6726 err |= ROOT_REF_MISMATCH;
6727 error("%s[%llu %llu] mismatch relative ref",
6728 ref_key->type == BTRFS_ROOT_REF_KEY ?
6729 "ROOT_REF" : "ROOT_BACKREF",
6730 ref_key->objectid, ref_key->offset);
6733 btrfs_release_path(&path);
6738 * Check all fs/file tree in low_memory mode.
6740 * 1. for fs tree root item, call check_fs_root_v2()
6741 * 2. for fs tree root ref/backref, call check_root_ref()
6743 * Return 0 if no error occurred.
6745 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6747 struct btrfs_root *tree_root = fs_info->tree_root;
6748 struct btrfs_root *cur_root = NULL;
6749 struct btrfs_path path;
6750 struct btrfs_key key;
6751 struct extent_buffer *node;
6752 unsigned int ext_ref;
6757 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6759 btrfs_init_path(&path);
6760 key.objectid = BTRFS_FS_TREE_OBJECTID;
6762 key.type = BTRFS_ROOT_ITEM_KEY;
6764 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6768 } else if (ret > 0) {
6774 node = path.nodes[0];
6775 slot = path.slots[0];
6776 btrfs_item_key_to_cpu(node, &key, slot);
6777 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6779 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6780 fs_root_objectid(key.objectid)) {
6781 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6782 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6785 key.offset = (u64)-1;
6786 cur_root = btrfs_read_fs_root(fs_info, &key);
6789 if (IS_ERR(cur_root)) {
6790 error("Fail to read fs/subvol tree: %lld",
6796 ret = check_fs_root_v2(cur_root, ext_ref);
6799 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6800 btrfs_free_fs_root(cur_root);
6801 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6802 key.type == BTRFS_ROOT_BACKREF_KEY) {
6803 ret = check_root_ref(tree_root, &key, node, slot);
6807 ret = btrfs_next_item(tree_root, &path);
6817 btrfs_release_path(&path);
6821 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6822 struct cache_tree *root_cache)
6826 if (!ctx.progress_enabled)
6827 fprintf(stderr, "checking fs roots\n");
6828 if (check_mode == CHECK_MODE_LOWMEM)
6829 ret = check_fs_roots_v2(fs_info);
6831 ret = check_fs_roots(fs_info, root_cache);
6836 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6838 struct extent_backref *back, *tmp;
6839 struct tree_backref *tback;
6840 struct data_backref *dback;
6844 rbtree_postorder_for_each_entry_safe(back, tmp,
6845 &rec->backref_tree, node) {
6846 if (!back->found_extent_tree) {
6850 if (back->is_data) {
6851 dback = to_data_backref(back);
6852 fprintf(stderr, "Data backref %llu %s %llu"
6853 " owner %llu offset %llu num_refs %lu"
6854 " not found in extent tree\n",
6855 (unsigned long long)rec->start,
6856 back->full_backref ?
6858 back->full_backref ?
6859 (unsigned long long)dback->parent:
6860 (unsigned long long)dback->root,
6861 (unsigned long long)dback->owner,
6862 (unsigned long long)dback->offset,
6863 (unsigned long)dback->num_refs);
6865 tback = to_tree_backref(back);
6866 fprintf(stderr, "Tree backref %llu parent %llu"
6867 " root %llu not found in extent tree\n",
6868 (unsigned long long)rec->start,
6869 (unsigned long long)tback->parent,
6870 (unsigned long long)tback->root);
6873 if (!back->is_data && !back->found_ref) {
6877 tback = to_tree_backref(back);
6878 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6879 (unsigned long long)rec->start,
6880 back->full_backref ? "parent" : "root",
6881 back->full_backref ?
6882 (unsigned long long)tback->parent :
6883 (unsigned long long)tback->root, back);
6885 if (back->is_data) {
6886 dback = to_data_backref(back);
6887 if (dback->found_ref != dback->num_refs) {
6891 fprintf(stderr, "Incorrect local backref count"
6892 " on %llu %s %llu owner %llu"
6893 " offset %llu found %u wanted %u back %p\n",
6894 (unsigned long long)rec->start,
6895 back->full_backref ?
6897 back->full_backref ?
6898 (unsigned long long)dback->parent:
6899 (unsigned long long)dback->root,
6900 (unsigned long long)dback->owner,
6901 (unsigned long long)dback->offset,
6902 dback->found_ref, dback->num_refs, back);
6904 if (dback->disk_bytenr != rec->start) {
6908 fprintf(stderr, "Backref disk bytenr does not"
6909 " match extent record, bytenr=%llu, "
6910 "ref bytenr=%llu\n",
6911 (unsigned long long)rec->start,
6912 (unsigned long long)dback->disk_bytenr);
6915 if (dback->bytes != rec->nr) {
6919 fprintf(stderr, "Backref bytes do not match "
6920 "extent backref, bytenr=%llu, ref "
6921 "bytes=%llu, backref bytes=%llu\n",
6922 (unsigned long long)rec->start,
6923 (unsigned long long)rec->nr,
6924 (unsigned long long)dback->bytes);
6927 if (!back->is_data) {
6930 dback = to_data_backref(back);
6931 found += dback->found_ref;
6934 if (found != rec->refs) {
6938 fprintf(stderr, "Incorrect global backref count "
6939 "on %llu found %llu wanted %llu\n",
6940 (unsigned long long)rec->start,
6941 (unsigned long long)found,
6942 (unsigned long long)rec->refs);
6948 static void __free_one_backref(struct rb_node *node)
6950 struct extent_backref *back = rb_node_to_extent_backref(node);
6955 static void free_all_extent_backrefs(struct extent_record *rec)
6957 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6960 static void free_extent_record_cache(struct cache_tree *extent_cache)
6962 struct cache_extent *cache;
6963 struct extent_record *rec;
6966 cache = first_cache_extent(extent_cache);
6969 rec = container_of(cache, struct extent_record, cache);
6970 remove_cache_extent(extent_cache, cache);
6971 free_all_extent_backrefs(rec);
6976 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6977 struct extent_record *rec)
6979 if (rec->content_checked && rec->owner_ref_checked &&
6980 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6981 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6982 !rec->bad_full_backref && !rec->crossing_stripes &&
6983 !rec->wrong_chunk_type) {
6984 remove_cache_extent(extent_cache, &rec->cache);
6985 free_all_extent_backrefs(rec);
6986 list_del_init(&rec->list);
6992 static int check_owner_ref(struct btrfs_root *root,
6993 struct extent_record *rec,
6994 struct extent_buffer *buf)
6996 struct extent_backref *node, *tmp;
6997 struct tree_backref *back;
6998 struct btrfs_root *ref_root;
6999 struct btrfs_key key;
7000 struct btrfs_path path;
7001 struct extent_buffer *parent;
7006 rbtree_postorder_for_each_entry_safe(node, tmp,
7007 &rec->backref_tree, node) {
7010 if (!node->found_ref)
7012 if (node->full_backref)
7014 back = to_tree_backref(node);
7015 if (btrfs_header_owner(buf) == back->root)
7018 BUG_ON(rec->is_root);
7020 /* try to find the block by search corresponding fs tree */
7021 key.objectid = btrfs_header_owner(buf);
7022 key.type = BTRFS_ROOT_ITEM_KEY;
7023 key.offset = (u64)-1;
7025 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7026 if (IS_ERR(ref_root))
7029 level = btrfs_header_level(buf);
7031 btrfs_item_key_to_cpu(buf, &key, 0);
7033 btrfs_node_key_to_cpu(buf, &key, 0);
7035 btrfs_init_path(&path);
7036 path.lowest_level = level + 1;
7037 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7041 parent = path.nodes[level + 1];
7042 if (parent && buf->start == btrfs_node_blockptr(parent,
7043 path.slots[level + 1]))
7046 btrfs_release_path(&path);
7047 return found ? 0 : 1;
7050 static int is_extent_tree_record(struct extent_record *rec)
7052 struct extent_backref *node, *tmp;
7053 struct tree_backref *back;
7056 rbtree_postorder_for_each_entry_safe(node, tmp,
7057 &rec->backref_tree, node) {
7060 back = to_tree_backref(node);
7061 if (node->full_backref)
7063 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7070 static int record_bad_block_io(struct btrfs_fs_info *info,
7071 struct cache_tree *extent_cache,
7074 struct extent_record *rec;
7075 struct cache_extent *cache;
7076 struct btrfs_key key;
7078 cache = lookup_cache_extent(extent_cache, start, len);
7082 rec = container_of(cache, struct extent_record, cache);
7083 if (!is_extent_tree_record(rec))
7086 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7087 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7090 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7091 struct extent_buffer *buf, int slot)
7093 if (btrfs_header_level(buf)) {
7094 struct btrfs_key_ptr ptr1, ptr2;
7096 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7097 sizeof(struct btrfs_key_ptr));
7098 read_extent_buffer(buf, &ptr2,
7099 btrfs_node_key_ptr_offset(slot + 1),
7100 sizeof(struct btrfs_key_ptr));
7101 write_extent_buffer(buf, &ptr1,
7102 btrfs_node_key_ptr_offset(slot + 1),
7103 sizeof(struct btrfs_key_ptr));
7104 write_extent_buffer(buf, &ptr2,
7105 btrfs_node_key_ptr_offset(slot),
7106 sizeof(struct btrfs_key_ptr));
7108 struct btrfs_disk_key key;
7109 btrfs_node_key(buf, &key, 0);
7110 btrfs_fixup_low_keys(root, path, &key,
7111 btrfs_header_level(buf) + 1);
7114 struct btrfs_item *item1, *item2;
7115 struct btrfs_key k1, k2;
7116 char *item1_data, *item2_data;
7117 u32 item1_offset, item2_offset, item1_size, item2_size;
7119 item1 = btrfs_item_nr(slot);
7120 item2 = btrfs_item_nr(slot + 1);
7121 btrfs_item_key_to_cpu(buf, &k1, slot);
7122 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7123 item1_offset = btrfs_item_offset(buf, item1);
7124 item2_offset = btrfs_item_offset(buf, item2);
7125 item1_size = btrfs_item_size(buf, item1);
7126 item2_size = btrfs_item_size(buf, item2);
7128 item1_data = malloc(item1_size);
7131 item2_data = malloc(item2_size);
7137 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7138 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7140 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7141 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7145 btrfs_set_item_offset(buf, item1, item2_offset);
7146 btrfs_set_item_offset(buf, item2, item1_offset);
7147 btrfs_set_item_size(buf, item1, item2_size);
7148 btrfs_set_item_size(buf, item2, item1_size);
7150 path->slots[0] = slot;
7151 btrfs_set_item_key_unsafe(root, path, &k2);
7152 path->slots[0] = slot + 1;
7153 btrfs_set_item_key_unsafe(root, path, &k1);
7158 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7160 struct extent_buffer *buf;
7161 struct btrfs_key k1, k2;
7163 int level = path->lowest_level;
7166 buf = path->nodes[level];
7167 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7169 btrfs_node_key_to_cpu(buf, &k1, i);
7170 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7172 btrfs_item_key_to_cpu(buf, &k1, i);
7173 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7175 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7177 ret = swap_values(root, path, buf, i);
7180 btrfs_mark_buffer_dirty(buf);
7186 static int delete_bogus_item(struct btrfs_root *root,
7187 struct btrfs_path *path,
7188 struct extent_buffer *buf, int slot)
7190 struct btrfs_key key;
7191 int nritems = btrfs_header_nritems(buf);
7193 btrfs_item_key_to_cpu(buf, &key, slot);
7195 /* These are all the keys we can deal with missing. */
7196 if (key.type != BTRFS_DIR_INDEX_KEY &&
7197 key.type != BTRFS_EXTENT_ITEM_KEY &&
7198 key.type != BTRFS_METADATA_ITEM_KEY &&
7199 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7200 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7203 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7204 (unsigned long long)key.objectid, key.type,
7205 (unsigned long long)key.offset, slot, buf->start);
7206 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7207 btrfs_item_nr_offset(slot + 1),
7208 sizeof(struct btrfs_item) *
7209 (nritems - slot - 1));
7210 btrfs_set_header_nritems(buf, nritems - 1);
7212 struct btrfs_disk_key disk_key;
7214 btrfs_item_key(buf, &disk_key, 0);
7215 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7217 btrfs_mark_buffer_dirty(buf);
7221 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7223 struct extent_buffer *buf;
7227 /* We should only get this for leaves */
7228 BUG_ON(path->lowest_level);
7229 buf = path->nodes[0];
7231 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7232 unsigned int shift = 0, offset;
7234 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7235 BTRFS_LEAF_DATA_SIZE(root)) {
7236 if (btrfs_item_end_nr(buf, i) >
7237 BTRFS_LEAF_DATA_SIZE(root)) {
7238 ret = delete_bogus_item(root, path, buf, i);
7241 fprintf(stderr, "item is off the end of the "
7242 "leaf, can't fix\n");
7246 shift = BTRFS_LEAF_DATA_SIZE(root) -
7247 btrfs_item_end_nr(buf, i);
7248 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7249 btrfs_item_offset_nr(buf, i - 1)) {
7250 if (btrfs_item_end_nr(buf, i) >
7251 btrfs_item_offset_nr(buf, i - 1)) {
7252 ret = delete_bogus_item(root, path, buf, i);
7255 fprintf(stderr, "items overlap, can't fix\n");
7259 shift = btrfs_item_offset_nr(buf, i - 1) -
7260 btrfs_item_end_nr(buf, i);
7265 printf("Shifting item nr %d by %u bytes in block %llu\n",
7266 i, shift, (unsigned long long)buf->start);
7267 offset = btrfs_item_offset_nr(buf, i);
7268 memmove_extent_buffer(buf,
7269 btrfs_leaf_data(buf) + offset + shift,
7270 btrfs_leaf_data(buf) + offset,
7271 btrfs_item_size_nr(buf, i));
7272 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7274 btrfs_mark_buffer_dirty(buf);
7278 * We may have moved things, in which case we want to exit so we don't
7279 * write those changes out. Once we have proper abort functionality in
7280 * progs this can be changed to something nicer.
7287 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7288 * then just return -EIO.
7290 static int try_to_fix_bad_block(struct btrfs_root *root,
7291 struct extent_buffer *buf,
7292 enum btrfs_tree_block_status status)
7294 struct btrfs_trans_handle *trans;
7295 struct ulist *roots;
7296 struct ulist_node *node;
7297 struct btrfs_root *search_root;
7298 struct btrfs_path path;
7299 struct ulist_iterator iter;
7300 struct btrfs_key root_key, key;
7303 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7304 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7307 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7311 btrfs_init_path(&path);
7312 ULIST_ITER_INIT(&iter);
7313 while ((node = ulist_next(roots, &iter))) {
7314 root_key.objectid = node->val;
7315 root_key.type = BTRFS_ROOT_ITEM_KEY;
7316 root_key.offset = (u64)-1;
7318 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7325 trans = btrfs_start_transaction(search_root, 0);
7326 if (IS_ERR(trans)) {
7327 ret = PTR_ERR(trans);
7331 path.lowest_level = btrfs_header_level(buf);
7332 path.skip_check_block = 1;
7333 if (path.lowest_level)
7334 btrfs_node_key_to_cpu(buf, &key, 0);
7336 btrfs_item_key_to_cpu(buf, &key, 0);
7337 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7340 btrfs_commit_transaction(trans, search_root);
7343 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7344 ret = fix_key_order(search_root, &path);
7345 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7346 ret = fix_item_offset(search_root, &path);
7348 btrfs_commit_transaction(trans, search_root);
7351 btrfs_release_path(&path);
7352 btrfs_commit_transaction(trans, search_root);
7355 btrfs_release_path(&path);
7359 static int check_block(struct btrfs_root *root,
7360 struct cache_tree *extent_cache,
7361 struct extent_buffer *buf, u64 flags)
7363 struct extent_record *rec;
7364 struct cache_extent *cache;
7365 struct btrfs_key key;
7366 enum btrfs_tree_block_status status;
7370 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7373 rec = container_of(cache, struct extent_record, cache);
7374 rec->generation = btrfs_header_generation(buf);
7376 level = btrfs_header_level(buf);
7377 if (btrfs_header_nritems(buf) > 0) {
7380 btrfs_item_key_to_cpu(buf, &key, 0);
7382 btrfs_node_key_to_cpu(buf, &key, 0);
7384 rec->info_objectid = key.objectid;
7386 rec->info_level = level;
7388 if (btrfs_is_leaf(buf))
7389 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7391 status = btrfs_check_node(root, &rec->parent_key, buf);
7393 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7395 status = try_to_fix_bad_block(root, buf, status);
7396 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7398 fprintf(stderr, "bad block %llu\n",
7399 (unsigned long long)buf->start);
7402 * Signal to callers we need to start the scan over
7403 * again since we'll have cowed blocks.
7408 rec->content_checked = 1;
7409 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7410 rec->owner_ref_checked = 1;
7412 ret = check_owner_ref(root, rec, buf);
7414 rec->owner_ref_checked = 1;
7418 maybe_free_extent_rec(extent_cache, rec);
7423 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7424 u64 parent, u64 root)
7426 struct list_head *cur = rec->backrefs.next;
7427 struct extent_backref *node;
7428 struct tree_backref *back;
7430 while(cur != &rec->backrefs) {
7431 node = to_extent_backref(cur);
7435 back = to_tree_backref(node);
7437 if (!node->full_backref)
7439 if (parent == back->parent)
7442 if (node->full_backref)
7444 if (back->root == root)
7452 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7453 u64 parent, u64 root)
7455 struct tree_backref *ref = malloc(sizeof(*ref));
7459 memset(&ref->node, 0, sizeof(ref->node));
7461 ref->parent = parent;
7462 ref->node.full_backref = 1;
7465 ref->node.full_backref = 0;
7472 static struct data_backref *find_data_backref(struct extent_record *rec,
7473 u64 parent, u64 root,
7474 u64 owner, u64 offset,
7476 u64 disk_bytenr, u64 bytes)
7478 struct list_head *cur = rec->backrefs.next;
7479 struct extent_backref *node;
7480 struct data_backref *back;
7482 while(cur != &rec->backrefs) {
7483 node = to_extent_backref(cur);
7487 back = to_data_backref(node);
7489 if (!node->full_backref)
7491 if (parent == back->parent)
7494 if (node->full_backref)
7496 if (back->root == root && back->owner == owner &&
7497 back->offset == offset) {
7498 if (found_ref && node->found_ref &&
7499 (back->bytes != bytes ||
7500 back->disk_bytenr != disk_bytenr))
7510 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7511 u64 parent, u64 root,
7512 u64 owner, u64 offset,
7515 struct data_backref *ref = malloc(sizeof(*ref));
7519 memset(&ref->node, 0, sizeof(ref->node));
7520 ref->node.is_data = 1;
7523 ref->parent = parent;
7526 ref->node.full_backref = 1;
7530 ref->offset = offset;
7531 ref->node.full_backref = 0;
7533 ref->bytes = max_size;
7536 if (max_size > rec->max_size)
7537 rec->max_size = max_size;
7541 /* Check if the type of extent matches with its chunk */
7542 static void check_extent_type(struct extent_record *rec)
7544 struct btrfs_block_group_cache *bg_cache;
7546 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7550 /* data extent, check chunk directly*/
7551 if (!rec->metadata) {
7552 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7553 rec->wrong_chunk_type = 1;
7557 /* metadata extent, check the obvious case first */
7558 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7559 BTRFS_BLOCK_GROUP_METADATA))) {
7560 rec->wrong_chunk_type = 1;
7565 * Check SYSTEM extent, as it's also marked as metadata, we can only
7566 * make sure it's a SYSTEM extent by its backref
7568 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7569 struct extent_backref *node;
7570 struct tree_backref *tback;
7573 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7574 if (node->is_data) {
7575 /* tree block shouldn't have data backref */
7576 rec->wrong_chunk_type = 1;
7579 tback = container_of(node, struct tree_backref, node);
7581 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7582 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7584 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7585 if (!(bg_cache->flags & bg_type))
7586 rec->wrong_chunk_type = 1;
7591 * Allocate a new extent record, fill default values from @tmpl and insert int
7592 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7593 * the cache, otherwise it fails.
7595 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7596 struct extent_record *tmpl)
7598 struct extent_record *rec;
7601 BUG_ON(tmpl->max_size == 0);
7602 rec = malloc(sizeof(*rec));
7605 rec->start = tmpl->start;
7606 rec->max_size = tmpl->max_size;
7607 rec->nr = max(tmpl->nr, tmpl->max_size);
7608 rec->found_rec = tmpl->found_rec;
7609 rec->content_checked = tmpl->content_checked;
7610 rec->owner_ref_checked = tmpl->owner_ref_checked;
7611 rec->num_duplicates = 0;
7612 rec->metadata = tmpl->metadata;
7613 rec->flag_block_full_backref = FLAG_UNSET;
7614 rec->bad_full_backref = 0;
7615 rec->crossing_stripes = 0;
7616 rec->wrong_chunk_type = 0;
7617 rec->is_root = tmpl->is_root;
7618 rec->refs = tmpl->refs;
7619 rec->extent_item_refs = tmpl->extent_item_refs;
7620 rec->parent_generation = tmpl->parent_generation;
7621 INIT_LIST_HEAD(&rec->backrefs);
7622 INIT_LIST_HEAD(&rec->dups);
7623 INIT_LIST_HEAD(&rec->list);
7624 rec->backref_tree = RB_ROOT;
7625 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7626 rec->cache.start = tmpl->start;
7627 rec->cache.size = tmpl->nr;
7628 ret = insert_cache_extent(extent_cache, &rec->cache);
7633 bytes_used += rec->nr;
7636 rec->crossing_stripes = check_crossing_stripes(global_info,
7637 rec->start, global_info->nodesize);
7638 check_extent_type(rec);
7643 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7645 * - refs - if found, increase refs
7646 * - is_root - if found, set
7647 * - content_checked - if found, set
7648 * - owner_ref_checked - if found, set
7650 * If not found, create a new one, initialize and insert.
7652 static int add_extent_rec(struct cache_tree *extent_cache,
7653 struct extent_record *tmpl)
7655 struct extent_record *rec;
7656 struct cache_extent *cache;
7660 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7662 rec = container_of(cache, struct extent_record, cache);
7666 rec->nr = max(tmpl->nr, tmpl->max_size);
7669 * We need to make sure to reset nr to whatever the extent
7670 * record says was the real size, this way we can compare it to
7673 if (tmpl->found_rec) {
7674 if (tmpl->start != rec->start || rec->found_rec) {
7675 struct extent_record *tmp;
7678 if (list_empty(&rec->list))
7679 list_add_tail(&rec->list,
7680 &duplicate_extents);
7683 * We have to do this song and dance in case we
7684 * find an extent record that falls inside of
7685 * our current extent record but does not have
7686 * the same objectid.
7688 tmp = malloc(sizeof(*tmp));
7691 tmp->start = tmpl->start;
7692 tmp->max_size = tmpl->max_size;
7695 tmp->metadata = tmpl->metadata;
7696 tmp->extent_item_refs = tmpl->extent_item_refs;
7697 INIT_LIST_HEAD(&tmp->list);
7698 list_add_tail(&tmp->list, &rec->dups);
7699 rec->num_duplicates++;
7706 if (tmpl->extent_item_refs && !dup) {
7707 if (rec->extent_item_refs) {
7708 fprintf(stderr, "block %llu rec "
7709 "extent_item_refs %llu, passed %llu\n",
7710 (unsigned long long)tmpl->start,
7711 (unsigned long long)
7712 rec->extent_item_refs,
7713 (unsigned long long)tmpl->extent_item_refs);
7715 rec->extent_item_refs = tmpl->extent_item_refs;
7719 if (tmpl->content_checked)
7720 rec->content_checked = 1;
7721 if (tmpl->owner_ref_checked)
7722 rec->owner_ref_checked = 1;
7723 memcpy(&rec->parent_key, &tmpl->parent_key,
7724 sizeof(tmpl->parent_key));
7725 if (tmpl->parent_generation)
7726 rec->parent_generation = tmpl->parent_generation;
7727 if (rec->max_size < tmpl->max_size)
7728 rec->max_size = tmpl->max_size;
7731 * A metadata extent can't cross stripe_len boundary, otherwise
7732 * kernel scrub won't be able to handle it.
7733 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7737 rec->crossing_stripes = check_crossing_stripes(
7738 global_info, rec->start,
7739 global_info->nodesize);
7740 check_extent_type(rec);
7741 maybe_free_extent_rec(extent_cache, rec);
7745 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7750 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7751 u64 parent, u64 root, int found_ref)
7753 struct extent_record *rec;
7754 struct tree_backref *back;
7755 struct cache_extent *cache;
7757 bool insert = false;
7759 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7761 struct extent_record tmpl;
7763 memset(&tmpl, 0, sizeof(tmpl));
7764 tmpl.start = bytenr;
7769 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7773 /* really a bug in cache_extent implement now */
7774 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7779 rec = container_of(cache, struct extent_record, cache);
7780 if (rec->start != bytenr) {
7782 * Several cause, from unaligned bytenr to over lapping extents
7787 back = find_tree_backref(rec, parent, root);
7789 back = alloc_tree_backref(rec, parent, root);
7796 if (back->node.found_ref) {
7797 fprintf(stderr, "Extent back ref already exists "
7798 "for %llu parent %llu root %llu \n",
7799 (unsigned long long)bytenr,
7800 (unsigned long long)parent,
7801 (unsigned long long)root);
7803 back->node.found_ref = 1;
7805 if (back->node.found_extent_tree) {
7806 fprintf(stderr, "Extent back ref already exists "
7807 "for %llu parent %llu root %llu \n",
7808 (unsigned long long)bytenr,
7809 (unsigned long long)parent,
7810 (unsigned long long)root);
7812 back->node.found_extent_tree = 1;
7815 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7816 compare_extent_backref));
7817 check_extent_type(rec);
7818 maybe_free_extent_rec(extent_cache, rec);
7822 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7823 u64 parent, u64 root, u64 owner, u64 offset,
7824 u32 num_refs, int found_ref, u64 max_size)
7826 struct extent_record *rec;
7827 struct data_backref *back;
7828 struct cache_extent *cache;
7830 bool insert = false;
7832 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7834 struct extent_record tmpl;
7836 memset(&tmpl, 0, sizeof(tmpl));
7837 tmpl.start = bytenr;
7839 tmpl.max_size = max_size;
7841 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7845 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7850 rec = container_of(cache, struct extent_record, cache);
7851 if (rec->max_size < max_size)
7852 rec->max_size = max_size;
7855 * If found_ref is set then max_size is the real size and must match the
7856 * existing refs. So if we have already found a ref then we need to
7857 * make sure that this ref matches the existing one, otherwise we need
7858 * to add a new backref so we can notice that the backrefs don't match
7859 * and we need to figure out who is telling the truth. This is to
7860 * account for that awful fsync bug I introduced where we'd end up with
7861 * a btrfs_file_extent_item that would have its length include multiple
7862 * prealloc extents or point inside of a prealloc extent.
7864 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7867 back = alloc_data_backref(rec, parent, root, owner, offset,
7874 BUG_ON(num_refs != 1);
7875 if (back->node.found_ref)
7876 BUG_ON(back->bytes != max_size);
7877 back->node.found_ref = 1;
7878 back->found_ref += 1;
7879 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7880 back->bytes = max_size;
7881 back->disk_bytenr = bytenr;
7883 /* Need to reinsert if not already in the tree */
7885 rb_erase(&back->node.node, &rec->backref_tree);
7890 rec->content_checked = 1;
7891 rec->owner_ref_checked = 1;
7893 if (back->node.found_extent_tree) {
7894 fprintf(stderr, "Extent back ref already exists "
7895 "for %llu parent %llu root %llu "
7896 "owner %llu offset %llu num_refs %lu\n",
7897 (unsigned long long)bytenr,
7898 (unsigned long long)parent,
7899 (unsigned long long)root,
7900 (unsigned long long)owner,
7901 (unsigned long long)offset,
7902 (unsigned long)num_refs);
7904 back->num_refs = num_refs;
7905 back->node.found_extent_tree = 1;
7908 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7909 compare_extent_backref));
7911 maybe_free_extent_rec(extent_cache, rec);
7915 static int add_pending(struct cache_tree *pending,
7916 struct cache_tree *seen, u64 bytenr, u32 size)
7919 ret = add_cache_extent(seen, bytenr, size);
7922 add_cache_extent(pending, bytenr, size);
7926 static int pick_next_pending(struct cache_tree *pending,
7927 struct cache_tree *reada,
7928 struct cache_tree *nodes,
7929 u64 last, struct block_info *bits, int bits_nr,
7932 unsigned long node_start = last;
7933 struct cache_extent *cache;
7936 cache = search_cache_extent(reada, 0);
7938 bits[0].start = cache->start;
7939 bits[0].size = cache->size;
7944 if (node_start > 32768)
7945 node_start -= 32768;
7947 cache = search_cache_extent(nodes, node_start);
7949 cache = search_cache_extent(nodes, 0);
7952 cache = search_cache_extent(pending, 0);
7957 bits[ret].start = cache->start;
7958 bits[ret].size = cache->size;
7959 cache = next_cache_extent(cache);
7961 } while (cache && ret < bits_nr);
7967 bits[ret].start = cache->start;
7968 bits[ret].size = cache->size;
7969 cache = next_cache_extent(cache);
7971 } while (cache && ret < bits_nr);
7973 if (bits_nr - ret > 8) {
7974 u64 lookup = bits[0].start + bits[0].size;
7975 struct cache_extent *next;
7976 next = search_cache_extent(pending, lookup);
7978 if (next->start - lookup > 32768)
7980 bits[ret].start = next->start;
7981 bits[ret].size = next->size;
7982 lookup = next->start + next->size;
7986 next = next_cache_extent(next);
7994 static void free_chunk_record(struct cache_extent *cache)
7996 struct chunk_record *rec;
7998 rec = container_of(cache, struct chunk_record, cache);
7999 list_del_init(&rec->list);
8000 list_del_init(&rec->dextents);
8004 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8006 cache_tree_free_extents(chunk_cache, free_chunk_record);
8009 static void free_device_record(struct rb_node *node)
8011 struct device_record *rec;
8013 rec = container_of(node, struct device_record, node);
8017 FREE_RB_BASED_TREE(device_cache, free_device_record);
8019 int insert_block_group_record(struct block_group_tree *tree,
8020 struct block_group_record *bg_rec)
8024 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8028 list_add_tail(&bg_rec->list, &tree->block_groups);
8032 static void free_block_group_record(struct cache_extent *cache)
8034 struct block_group_record *rec;
8036 rec = container_of(cache, struct block_group_record, cache);
8037 list_del_init(&rec->list);
8041 void free_block_group_tree(struct block_group_tree *tree)
8043 cache_tree_free_extents(&tree->tree, free_block_group_record);
8046 int insert_device_extent_record(struct device_extent_tree *tree,
8047 struct device_extent_record *de_rec)
8052 * Device extent is a bit different from the other extents, because
8053 * the extents which belong to the different devices may have the
8054 * same start and size, so we need use the special extent cache
8055 * search/insert functions.
8057 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8061 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8062 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8066 static void free_device_extent_record(struct cache_extent *cache)
8068 struct device_extent_record *rec;
8070 rec = container_of(cache, struct device_extent_record, cache);
8071 if (!list_empty(&rec->chunk_list))
8072 list_del_init(&rec->chunk_list);
8073 if (!list_empty(&rec->device_list))
8074 list_del_init(&rec->device_list);
8078 void free_device_extent_tree(struct device_extent_tree *tree)
8080 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8083 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8084 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8085 struct extent_buffer *leaf, int slot)
8087 struct btrfs_extent_ref_v0 *ref0;
8088 struct btrfs_key key;
8091 btrfs_item_key_to_cpu(leaf, &key, slot);
8092 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8093 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8094 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8097 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8098 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8104 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8105 struct btrfs_key *key,
8108 struct btrfs_chunk *ptr;
8109 struct chunk_record *rec;
8112 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8113 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8115 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8117 fprintf(stderr, "memory allocation failed\n");
8121 INIT_LIST_HEAD(&rec->list);
8122 INIT_LIST_HEAD(&rec->dextents);
8125 rec->cache.start = key->offset;
8126 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8128 rec->generation = btrfs_header_generation(leaf);
8130 rec->objectid = key->objectid;
8131 rec->type = key->type;
8132 rec->offset = key->offset;
8134 rec->length = rec->cache.size;
8135 rec->owner = btrfs_chunk_owner(leaf, ptr);
8136 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8137 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8138 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8139 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8140 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8141 rec->num_stripes = num_stripes;
8142 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8144 for (i = 0; i < rec->num_stripes; ++i) {
8145 rec->stripes[i].devid =
8146 btrfs_stripe_devid_nr(leaf, ptr, i);
8147 rec->stripes[i].offset =
8148 btrfs_stripe_offset_nr(leaf, ptr, i);
8149 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8150 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8157 static int process_chunk_item(struct cache_tree *chunk_cache,
8158 struct btrfs_key *key, struct extent_buffer *eb,
8161 struct chunk_record *rec;
8162 struct btrfs_chunk *chunk;
8165 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8167 * Do extra check for this chunk item,
8169 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8170 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8171 * and owner<->key_type check.
8173 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8176 error("chunk(%llu, %llu) is not valid, ignore it",
8177 key->offset, btrfs_chunk_length(eb, chunk));
8180 rec = btrfs_new_chunk_record(eb, key, slot);
8181 ret = insert_cache_extent(chunk_cache, &rec->cache);
8183 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8184 rec->offset, rec->length);
8191 static int process_device_item(struct rb_root *dev_cache,
8192 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8194 struct btrfs_dev_item *ptr;
8195 struct device_record *rec;
8198 ptr = btrfs_item_ptr(eb,
8199 slot, struct btrfs_dev_item);
8201 rec = malloc(sizeof(*rec));
8203 fprintf(stderr, "memory allocation failed\n");
8207 rec->devid = key->offset;
8208 rec->generation = btrfs_header_generation(eb);
8210 rec->objectid = key->objectid;
8211 rec->type = key->type;
8212 rec->offset = key->offset;
8214 rec->devid = btrfs_device_id(eb, ptr);
8215 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8216 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8218 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8220 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8227 struct block_group_record *
8228 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8231 struct btrfs_block_group_item *ptr;
8232 struct block_group_record *rec;
8234 rec = calloc(1, sizeof(*rec));
8236 fprintf(stderr, "memory allocation failed\n");
8240 rec->cache.start = key->objectid;
8241 rec->cache.size = key->offset;
8243 rec->generation = btrfs_header_generation(leaf);
8245 rec->objectid = key->objectid;
8246 rec->type = key->type;
8247 rec->offset = key->offset;
8249 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8250 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8252 INIT_LIST_HEAD(&rec->list);
8257 static int process_block_group_item(struct block_group_tree *block_group_cache,
8258 struct btrfs_key *key,
8259 struct extent_buffer *eb, int slot)
8261 struct block_group_record *rec;
8264 rec = btrfs_new_block_group_record(eb, key, slot);
8265 ret = insert_block_group_record(block_group_cache, rec);
8267 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8268 rec->objectid, rec->offset);
8275 struct device_extent_record *
8276 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8277 struct btrfs_key *key, int slot)
8279 struct device_extent_record *rec;
8280 struct btrfs_dev_extent *ptr;
8282 rec = calloc(1, sizeof(*rec));
8284 fprintf(stderr, "memory allocation failed\n");
8288 rec->cache.objectid = key->objectid;
8289 rec->cache.start = key->offset;
8291 rec->generation = btrfs_header_generation(leaf);
8293 rec->objectid = key->objectid;
8294 rec->type = key->type;
8295 rec->offset = key->offset;
8297 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8298 rec->chunk_objecteid =
8299 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8301 btrfs_dev_extent_chunk_offset(leaf, ptr);
8302 rec->length = btrfs_dev_extent_length(leaf, ptr);
8303 rec->cache.size = rec->length;
8305 INIT_LIST_HEAD(&rec->chunk_list);
8306 INIT_LIST_HEAD(&rec->device_list);
8312 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8313 struct btrfs_key *key, struct extent_buffer *eb,
8316 struct device_extent_record *rec;
8319 rec = btrfs_new_device_extent_record(eb, key, slot);
8320 ret = insert_device_extent_record(dev_extent_cache, rec);
8323 "Device extent[%llu, %llu, %llu] existed.\n",
8324 rec->objectid, rec->offset, rec->length);
8331 static int process_extent_item(struct btrfs_root *root,
8332 struct cache_tree *extent_cache,
8333 struct extent_buffer *eb, int slot)
8335 struct btrfs_extent_item *ei;
8336 struct btrfs_extent_inline_ref *iref;
8337 struct btrfs_extent_data_ref *dref;
8338 struct btrfs_shared_data_ref *sref;
8339 struct btrfs_key key;
8340 struct extent_record tmpl;
8345 u32 item_size = btrfs_item_size_nr(eb, slot);
8351 btrfs_item_key_to_cpu(eb, &key, slot);
8353 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8355 num_bytes = root->fs_info->nodesize;
8357 num_bytes = key.offset;
8360 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8361 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8362 key.objectid, root->fs_info->sectorsize);
8365 if (item_size < sizeof(*ei)) {
8366 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8367 struct btrfs_extent_item_v0 *ei0;
8368 BUG_ON(item_size != sizeof(*ei0));
8369 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8370 refs = btrfs_extent_refs_v0(eb, ei0);
8374 memset(&tmpl, 0, sizeof(tmpl));
8375 tmpl.start = key.objectid;
8376 tmpl.nr = num_bytes;
8377 tmpl.extent_item_refs = refs;
8378 tmpl.metadata = metadata;
8380 tmpl.max_size = num_bytes;
8382 return add_extent_rec(extent_cache, &tmpl);
8385 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8386 refs = btrfs_extent_refs(eb, ei);
8387 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8391 if (metadata && num_bytes != root->fs_info->nodesize) {
8392 error("ignore invalid metadata extent, length %llu does not equal to %u",
8393 num_bytes, root->fs_info->nodesize);
8396 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8397 error("ignore invalid data extent, length %llu is not aligned to %u",
8398 num_bytes, root->fs_info->sectorsize);
8402 memset(&tmpl, 0, sizeof(tmpl));
8403 tmpl.start = key.objectid;
8404 tmpl.nr = num_bytes;
8405 tmpl.extent_item_refs = refs;
8406 tmpl.metadata = metadata;
8408 tmpl.max_size = num_bytes;
8409 add_extent_rec(extent_cache, &tmpl);
8411 ptr = (unsigned long)(ei + 1);
8412 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8413 key.type == BTRFS_EXTENT_ITEM_KEY)
8414 ptr += sizeof(struct btrfs_tree_block_info);
8416 end = (unsigned long)ei + item_size;
8418 iref = (struct btrfs_extent_inline_ref *)ptr;
8419 type = btrfs_extent_inline_ref_type(eb, iref);
8420 offset = btrfs_extent_inline_ref_offset(eb, iref);
8422 case BTRFS_TREE_BLOCK_REF_KEY:
8423 ret = add_tree_backref(extent_cache, key.objectid,
8427 "add_tree_backref failed (extent items tree block): %s",
8430 case BTRFS_SHARED_BLOCK_REF_KEY:
8431 ret = add_tree_backref(extent_cache, key.objectid,
8435 "add_tree_backref failed (extent items shared block): %s",
8438 case BTRFS_EXTENT_DATA_REF_KEY:
8439 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8440 add_data_backref(extent_cache, key.objectid, 0,
8441 btrfs_extent_data_ref_root(eb, dref),
8442 btrfs_extent_data_ref_objectid(eb,
8444 btrfs_extent_data_ref_offset(eb, dref),
8445 btrfs_extent_data_ref_count(eb, dref),
8448 case BTRFS_SHARED_DATA_REF_KEY:
8449 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8450 add_data_backref(extent_cache, key.objectid, offset,
8452 btrfs_shared_data_ref_count(eb, sref),
8456 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8457 key.objectid, key.type, num_bytes);
8460 ptr += btrfs_extent_inline_ref_size(type);
8467 static int check_cache_range(struct btrfs_root *root,
8468 struct btrfs_block_group_cache *cache,
8469 u64 offset, u64 bytes)
8471 struct btrfs_free_space *entry;
8477 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8478 bytenr = btrfs_sb_offset(i);
8479 ret = btrfs_rmap_block(root->fs_info,
8480 cache->key.objectid, bytenr, 0,
8481 &logical, &nr, &stripe_len);
8486 if (logical[nr] + stripe_len <= offset)
8488 if (offset + bytes <= logical[nr])
8490 if (logical[nr] == offset) {
8491 if (stripe_len >= bytes) {
8495 bytes -= stripe_len;
8496 offset += stripe_len;
8497 } else if (logical[nr] < offset) {
8498 if (logical[nr] + stripe_len >=
8503 bytes = (offset + bytes) -
8504 (logical[nr] + stripe_len);
8505 offset = logical[nr] + stripe_len;
8508 * Could be tricky, the super may land in the
8509 * middle of the area we're checking. First
8510 * check the easiest case, it's at the end.
8512 if (logical[nr] + stripe_len >=
8514 bytes = logical[nr] - offset;
8518 /* Check the left side */
8519 ret = check_cache_range(root, cache,
8521 logical[nr] - offset);
8527 /* Now we continue with the right side */
8528 bytes = (offset + bytes) -
8529 (logical[nr] + stripe_len);
8530 offset = logical[nr] + stripe_len;
8537 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8539 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8540 offset, offset+bytes);
8544 if (entry->offset != offset) {
8545 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8550 if (entry->bytes != bytes) {
8551 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8552 bytes, entry->bytes, offset);
8556 unlink_free_space(cache->free_space_ctl, entry);
8561 static int verify_space_cache(struct btrfs_root *root,
8562 struct btrfs_block_group_cache *cache)
8564 struct btrfs_path path;
8565 struct extent_buffer *leaf;
8566 struct btrfs_key key;
8570 root = root->fs_info->extent_root;
8572 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8574 btrfs_init_path(&path);
8575 key.objectid = last;
8577 key.type = BTRFS_EXTENT_ITEM_KEY;
8578 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8583 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8584 ret = btrfs_next_leaf(root, &path);
8592 leaf = path.nodes[0];
8593 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8594 if (key.objectid >= cache->key.offset + cache->key.objectid)
8596 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8597 key.type != BTRFS_METADATA_ITEM_KEY) {
8602 if (last == key.objectid) {
8603 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8604 last = key.objectid + key.offset;
8606 last = key.objectid + root->fs_info->nodesize;
8611 ret = check_cache_range(root, cache, last,
8612 key.objectid - last);
8615 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8616 last = key.objectid + key.offset;
8618 last = key.objectid + root->fs_info->nodesize;
8622 if (last < cache->key.objectid + cache->key.offset)
8623 ret = check_cache_range(root, cache, last,
8624 cache->key.objectid +
8625 cache->key.offset - last);
8628 btrfs_release_path(&path);
8631 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8632 fprintf(stderr, "There are still entries left in the space "
8640 static int check_space_cache(struct btrfs_root *root)
8642 struct btrfs_block_group_cache *cache;
8643 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8647 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8648 btrfs_super_generation(root->fs_info->super_copy) !=
8649 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8650 printf("cache and super generation don't match, space cache "
8651 "will be invalidated\n");
8655 if (ctx.progress_enabled) {
8656 ctx.tp = TASK_FREE_SPACE;
8657 task_start(ctx.info);
8661 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8665 start = cache->key.objectid + cache->key.offset;
8666 if (!cache->free_space_ctl) {
8667 if (btrfs_init_free_space_ctl(cache,
8668 root->fs_info->sectorsize)) {
8673 btrfs_remove_free_space_cache(cache);
8676 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8677 ret = exclude_super_stripes(root, cache);
8679 fprintf(stderr, "could not exclude super stripes: %s\n",
8684 ret = load_free_space_tree(root->fs_info, cache);
8685 free_excluded_extents(root, cache);
8687 fprintf(stderr, "could not load free space tree: %s\n",
8694 ret = load_free_space_cache(root->fs_info, cache);
8699 ret = verify_space_cache(root, cache);
8701 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8702 cache->key.objectid);
8707 task_stop(ctx.info);
8709 return error ? -EINVAL : 0;
8712 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8713 u64 num_bytes, unsigned long leaf_offset,
8714 struct extent_buffer *eb) {
8716 struct btrfs_fs_info *fs_info = root->fs_info;
8718 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8720 unsigned long csum_offset;
8724 u64 data_checked = 0;
8730 if (num_bytes % fs_info->sectorsize)
8733 data = malloc(num_bytes);
8737 while (offset < num_bytes) {
8740 read_len = num_bytes - offset;
8741 /* read as much space once a time */
8742 ret = read_extent_data(fs_info, data + offset,
8743 bytenr + offset, &read_len, mirror);
8747 /* verify every 4k data's checksum */
8748 while (data_checked < read_len) {
8750 tmp = offset + data_checked;
8752 csum = btrfs_csum_data((char *)data + tmp,
8753 csum, fs_info->sectorsize);
8754 btrfs_csum_final(csum, (u8 *)&csum);
8756 csum_offset = leaf_offset +
8757 tmp / fs_info->sectorsize * csum_size;
8758 read_extent_buffer(eb, (char *)&csum_expected,
8759 csum_offset, csum_size);
8760 /* try another mirror */
8761 if (csum != csum_expected) {
8762 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8763 mirror, bytenr + tmp,
8764 csum, csum_expected);
8765 num_copies = btrfs_num_copies(root->fs_info,
8767 if (mirror < num_copies - 1) {
8772 data_checked += fs_info->sectorsize;
8781 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8784 struct btrfs_path path;
8785 struct extent_buffer *leaf;
8786 struct btrfs_key key;
8789 btrfs_init_path(&path);
8790 key.objectid = bytenr;
8791 key.type = BTRFS_EXTENT_ITEM_KEY;
8792 key.offset = (u64)-1;
8795 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8798 fprintf(stderr, "Error looking up extent record %d\n", ret);
8799 btrfs_release_path(&path);
8802 if (path.slots[0] > 0) {
8805 ret = btrfs_prev_leaf(root, &path);
8808 } else if (ret > 0) {
8815 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8818 * Block group items come before extent items if they have the same
8819 * bytenr, so walk back one more just in case. Dear future traveller,
8820 * first congrats on mastering time travel. Now if it's not too much
8821 * trouble could you go back to 2006 and tell Chris to make the
8822 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8823 * EXTENT_ITEM_KEY please?
8825 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8826 if (path.slots[0] > 0) {
8829 ret = btrfs_prev_leaf(root, &path);
8832 } else if (ret > 0) {
8837 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8841 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8842 ret = btrfs_next_leaf(root, &path);
8844 fprintf(stderr, "Error going to next leaf "
8846 btrfs_release_path(&path);
8852 leaf = path.nodes[0];
8853 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8854 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8858 if (key.objectid + key.offset < bytenr) {
8862 if (key.objectid > bytenr + num_bytes)
8865 if (key.objectid == bytenr) {
8866 if (key.offset >= num_bytes) {
8870 num_bytes -= key.offset;
8871 bytenr += key.offset;
8872 } else if (key.objectid < bytenr) {
8873 if (key.objectid + key.offset >= bytenr + num_bytes) {
8877 num_bytes = (bytenr + num_bytes) -
8878 (key.objectid + key.offset);
8879 bytenr = key.objectid + key.offset;
8881 if (key.objectid + key.offset < bytenr + num_bytes) {
8882 u64 new_start = key.objectid + key.offset;
8883 u64 new_bytes = bytenr + num_bytes - new_start;
8886 * Weird case, the extent is in the middle of
8887 * our range, we'll have to search one side
8888 * and then the other. Not sure if this happens
8889 * in real life, but no harm in coding it up
8890 * anyway just in case.
8892 btrfs_release_path(&path);
8893 ret = check_extent_exists(root, new_start,
8896 fprintf(stderr, "Right section didn't "
8900 num_bytes = key.objectid - bytenr;
8903 num_bytes = key.objectid - bytenr;
8910 if (num_bytes && !ret) {
8911 fprintf(stderr, "There are no extents for csum range "
8912 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8916 btrfs_release_path(&path);
8920 static int check_csums(struct btrfs_root *root)
8922 struct btrfs_path path;
8923 struct extent_buffer *leaf;
8924 struct btrfs_key key;
8925 u64 offset = 0, num_bytes = 0;
8926 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8930 unsigned long leaf_offset;
8932 root = root->fs_info->csum_root;
8933 if (!extent_buffer_uptodate(root->node)) {
8934 fprintf(stderr, "No valid csum tree found\n");
8938 btrfs_init_path(&path);
8939 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8940 key.type = BTRFS_EXTENT_CSUM_KEY;
8942 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8944 fprintf(stderr, "Error searching csum tree %d\n", ret);
8945 btrfs_release_path(&path);
8949 if (ret > 0 && path.slots[0])
8954 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8955 ret = btrfs_next_leaf(root, &path);
8957 fprintf(stderr, "Error going to next leaf "
8964 leaf = path.nodes[0];
8966 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8967 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8972 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8973 csum_size) * root->fs_info->sectorsize;
8974 if (!check_data_csum)
8975 goto skip_csum_check;
8976 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8977 ret = check_extent_csums(root, key.offset, data_len,
8983 offset = key.offset;
8984 } else if (key.offset != offset + num_bytes) {
8985 ret = check_extent_exists(root, offset, num_bytes);
8987 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8988 "there is no extent record\n",
8989 offset, offset+num_bytes);
8992 offset = key.offset;
8995 num_bytes += data_len;
8999 btrfs_release_path(&path);
9003 static int is_dropped_key(struct btrfs_key *key,
9004 struct btrfs_key *drop_key) {
9005 if (key->objectid < drop_key->objectid)
9007 else if (key->objectid == drop_key->objectid) {
9008 if (key->type < drop_key->type)
9010 else if (key->type == drop_key->type) {
9011 if (key->offset < drop_key->offset)
9019 * Here are the rules for FULL_BACKREF.
9021 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9022 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9024 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9025 * if it happened after the relocation occurred since we'll have dropped the
9026 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9027 * have no real way to know for sure.
9029 * We process the blocks one root at a time, and we start from the lowest root
9030 * objectid and go to the highest. So we can just lookup the owner backref for
9031 * the record and if we don't find it then we know it doesn't exist and we have
9034 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9035 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9036 * be set or not and then we can check later once we've gathered all the refs.
9038 static int calc_extent_flag(struct cache_tree *extent_cache,
9039 struct extent_buffer *buf,
9040 struct root_item_record *ri,
9043 struct extent_record *rec;
9044 struct cache_extent *cache;
9045 struct tree_backref *tback;
9048 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9049 /* we have added this extent before */
9053 rec = container_of(cache, struct extent_record, cache);
9056 * Except file/reloc tree, we can not have
9059 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9064 if (buf->start == ri->bytenr)
9067 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9070 owner = btrfs_header_owner(buf);
9071 if (owner == ri->objectid)
9074 tback = find_tree_backref(rec, 0, owner);
9079 if (rec->flag_block_full_backref != FLAG_UNSET &&
9080 rec->flag_block_full_backref != 0)
9081 rec->bad_full_backref = 1;
9084 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9085 if (rec->flag_block_full_backref != FLAG_UNSET &&
9086 rec->flag_block_full_backref != 1)
9087 rec->bad_full_backref = 1;
9091 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9093 fprintf(stderr, "Invalid key type(");
9094 print_key_type(stderr, 0, key_type);
9095 fprintf(stderr, ") found in root(");
9096 print_objectid(stderr, rootid, 0);
9097 fprintf(stderr, ")\n");
9101 * Check if the key is valid with its extent buffer.
9103 * This is a early check in case invalid key exists in a extent buffer
9104 * This is not comprehensive yet, but should prevent wrong key/item passed
9107 static int check_type_with_root(u64 rootid, u8 key_type)
9110 /* Only valid in chunk tree */
9111 case BTRFS_DEV_ITEM_KEY:
9112 case BTRFS_CHUNK_ITEM_KEY:
9113 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9116 /* valid in csum and log tree */
9117 case BTRFS_CSUM_TREE_OBJECTID:
9118 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9122 case BTRFS_EXTENT_ITEM_KEY:
9123 case BTRFS_METADATA_ITEM_KEY:
9124 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9125 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9128 case BTRFS_ROOT_ITEM_KEY:
9129 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9132 case BTRFS_DEV_EXTENT_KEY:
9133 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9139 report_mismatch_key_root(key_type, rootid);
9143 static int run_next_block(struct btrfs_root *root,
9144 struct block_info *bits,
9147 struct cache_tree *pending,
9148 struct cache_tree *seen,
9149 struct cache_tree *reada,
9150 struct cache_tree *nodes,
9151 struct cache_tree *extent_cache,
9152 struct cache_tree *chunk_cache,
9153 struct rb_root *dev_cache,
9154 struct block_group_tree *block_group_cache,
9155 struct device_extent_tree *dev_extent_cache,
9156 struct root_item_record *ri)
9158 struct btrfs_fs_info *fs_info = root->fs_info;
9159 struct extent_buffer *buf;
9160 struct extent_record *rec = NULL;
9171 struct btrfs_key key;
9172 struct cache_extent *cache;
9175 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9176 bits_nr, &reada_bits);
9181 for(i = 0; i < nritems; i++) {
9182 ret = add_cache_extent(reada, bits[i].start,
9187 /* fixme, get the parent transid */
9188 readahead_tree_block(fs_info, bits[i].start, 0);
9191 *last = bits[0].start;
9192 bytenr = bits[0].start;
9193 size = bits[0].size;
9195 cache = lookup_cache_extent(pending, bytenr, size);
9197 remove_cache_extent(pending, cache);
9200 cache = lookup_cache_extent(reada, bytenr, size);
9202 remove_cache_extent(reada, cache);
9205 cache = lookup_cache_extent(nodes, bytenr, size);
9207 remove_cache_extent(nodes, cache);
9210 cache = lookup_cache_extent(extent_cache, bytenr, size);
9212 rec = container_of(cache, struct extent_record, cache);
9213 gen = rec->parent_generation;
9216 /* fixme, get the real parent transid */
9217 buf = read_tree_block(root->fs_info, bytenr, gen);
9218 if (!extent_buffer_uptodate(buf)) {
9219 record_bad_block_io(root->fs_info,
9220 extent_cache, bytenr, size);
9224 nritems = btrfs_header_nritems(buf);
9227 if (!init_extent_tree) {
9228 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9229 btrfs_header_level(buf), 1, NULL,
9232 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9234 fprintf(stderr, "Couldn't calc extent flags\n");
9235 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9240 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9242 fprintf(stderr, "Couldn't calc extent flags\n");
9243 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9247 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9249 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9250 ri->objectid == btrfs_header_owner(buf)) {
9252 * Ok we got to this block from it's original owner and
9253 * we have FULL_BACKREF set. Relocation can leave
9254 * converted blocks over so this is altogether possible,
9255 * however it's not possible if the generation > the
9256 * last snapshot, so check for this case.
9258 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9259 btrfs_header_generation(buf) > ri->last_snapshot) {
9260 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9261 rec->bad_full_backref = 1;
9266 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9267 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9268 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9269 rec->bad_full_backref = 1;
9273 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9274 rec->flag_block_full_backref = 1;
9278 rec->flag_block_full_backref = 0;
9280 owner = btrfs_header_owner(buf);
9283 ret = check_block(root, extent_cache, buf, flags);
9287 if (btrfs_is_leaf(buf)) {
9288 btree_space_waste += btrfs_leaf_free_space(root, buf);
9289 for (i = 0; i < nritems; i++) {
9290 struct btrfs_file_extent_item *fi;
9291 btrfs_item_key_to_cpu(buf, &key, i);
9293 * Check key type against the leaf owner.
9294 * Could filter quite a lot of early error if
9297 if (check_type_with_root(btrfs_header_owner(buf),
9299 fprintf(stderr, "ignoring invalid key\n");
9302 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9303 process_extent_item(root, extent_cache, buf,
9307 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9308 process_extent_item(root, extent_cache, buf,
9312 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9314 btrfs_item_size_nr(buf, i);
9317 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9318 process_chunk_item(chunk_cache, &key, buf, i);
9321 if (key.type == BTRFS_DEV_ITEM_KEY) {
9322 process_device_item(dev_cache, &key, buf, i);
9325 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9326 process_block_group_item(block_group_cache,
9330 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9331 process_device_extent_item(dev_extent_cache,
9336 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9337 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9338 process_extent_ref_v0(extent_cache, buf, i);
9345 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9346 ret = add_tree_backref(extent_cache,
9347 key.objectid, 0, key.offset, 0);
9350 "add_tree_backref failed (leaf tree block): %s",
9354 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9355 ret = add_tree_backref(extent_cache,
9356 key.objectid, key.offset, 0, 0);
9359 "add_tree_backref failed (leaf shared block): %s",
9363 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9364 struct btrfs_extent_data_ref *ref;
9365 ref = btrfs_item_ptr(buf, i,
9366 struct btrfs_extent_data_ref);
9367 add_data_backref(extent_cache,
9369 btrfs_extent_data_ref_root(buf, ref),
9370 btrfs_extent_data_ref_objectid(buf,
9372 btrfs_extent_data_ref_offset(buf, ref),
9373 btrfs_extent_data_ref_count(buf, ref),
9374 0, root->fs_info->sectorsize);
9377 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9378 struct btrfs_shared_data_ref *ref;
9379 ref = btrfs_item_ptr(buf, i,
9380 struct btrfs_shared_data_ref);
9381 add_data_backref(extent_cache,
9382 key.objectid, key.offset, 0, 0, 0,
9383 btrfs_shared_data_ref_count(buf, ref),
9384 0, root->fs_info->sectorsize);
9387 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9388 struct bad_item *bad;
9390 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9394 bad = malloc(sizeof(struct bad_item));
9397 INIT_LIST_HEAD(&bad->list);
9398 memcpy(&bad->key, &key,
9399 sizeof(struct btrfs_key));
9400 bad->root_id = owner;
9401 list_add_tail(&bad->list, &delete_items);
9404 if (key.type != BTRFS_EXTENT_DATA_KEY)
9406 fi = btrfs_item_ptr(buf, i,
9407 struct btrfs_file_extent_item);
9408 if (btrfs_file_extent_type(buf, fi) ==
9409 BTRFS_FILE_EXTENT_INLINE)
9411 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9414 data_bytes_allocated +=
9415 btrfs_file_extent_disk_num_bytes(buf, fi);
9416 if (data_bytes_allocated < root->fs_info->sectorsize) {
9419 data_bytes_referenced +=
9420 btrfs_file_extent_num_bytes(buf, fi);
9421 add_data_backref(extent_cache,
9422 btrfs_file_extent_disk_bytenr(buf, fi),
9423 parent, owner, key.objectid, key.offset -
9424 btrfs_file_extent_offset(buf, fi), 1, 1,
9425 btrfs_file_extent_disk_num_bytes(buf, fi));
9429 struct btrfs_key first_key;
9431 first_key.objectid = 0;
9434 btrfs_item_key_to_cpu(buf, &first_key, 0);
9435 level = btrfs_header_level(buf);
9436 for (i = 0; i < nritems; i++) {
9437 struct extent_record tmpl;
9439 ptr = btrfs_node_blockptr(buf, i);
9440 size = root->fs_info->nodesize;
9441 btrfs_node_key_to_cpu(buf, &key, i);
9443 if ((level == ri->drop_level)
9444 && is_dropped_key(&key, &ri->drop_key)) {
9449 memset(&tmpl, 0, sizeof(tmpl));
9450 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9451 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9456 tmpl.max_size = size;
9457 ret = add_extent_rec(extent_cache, &tmpl);
9461 ret = add_tree_backref(extent_cache, ptr, parent,
9465 "add_tree_backref failed (non-leaf block): %s",
9471 add_pending(nodes, seen, ptr, size);
9473 add_pending(pending, seen, ptr, size);
9476 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9477 nritems) * sizeof(struct btrfs_key_ptr);
9479 total_btree_bytes += buf->len;
9480 if (fs_root_objectid(btrfs_header_owner(buf)))
9481 total_fs_tree_bytes += buf->len;
9482 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9483 total_extent_tree_bytes += buf->len;
9485 free_extent_buffer(buf);
9489 static int add_root_to_pending(struct extent_buffer *buf,
9490 struct cache_tree *extent_cache,
9491 struct cache_tree *pending,
9492 struct cache_tree *seen,
9493 struct cache_tree *nodes,
9496 struct extent_record tmpl;
9499 if (btrfs_header_level(buf) > 0)
9500 add_pending(nodes, seen, buf->start, buf->len);
9502 add_pending(pending, seen, buf->start, buf->len);
9504 memset(&tmpl, 0, sizeof(tmpl));
9505 tmpl.start = buf->start;
9510 tmpl.max_size = buf->len;
9511 add_extent_rec(extent_cache, &tmpl);
9513 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9514 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9515 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9518 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9523 /* as we fix the tree, we might be deleting blocks that
9524 * we're tracking for repair. This hook makes sure we
9525 * remove any backrefs for blocks as we are fixing them.
9527 static int free_extent_hook(struct btrfs_trans_handle *trans,
9528 struct btrfs_root *root,
9529 u64 bytenr, u64 num_bytes, u64 parent,
9530 u64 root_objectid, u64 owner, u64 offset,
9533 struct extent_record *rec;
9534 struct cache_extent *cache;
9536 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9538 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9539 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9543 rec = container_of(cache, struct extent_record, cache);
9545 struct data_backref *back;
9546 back = find_data_backref(rec, parent, root_objectid, owner,
9547 offset, 1, bytenr, num_bytes);
9550 if (back->node.found_ref) {
9551 back->found_ref -= refs_to_drop;
9553 rec->refs -= refs_to_drop;
9555 if (back->node.found_extent_tree) {
9556 back->num_refs -= refs_to_drop;
9557 if (rec->extent_item_refs)
9558 rec->extent_item_refs -= refs_to_drop;
9560 if (back->found_ref == 0)
9561 back->node.found_ref = 0;
9562 if (back->num_refs == 0)
9563 back->node.found_extent_tree = 0;
9565 if (!back->node.found_extent_tree && back->node.found_ref) {
9566 rb_erase(&back->node.node, &rec->backref_tree);
9570 struct tree_backref *back;
9571 back = find_tree_backref(rec, parent, root_objectid);
9574 if (back->node.found_ref) {
9577 back->node.found_ref = 0;
9579 if (back->node.found_extent_tree) {
9580 if (rec->extent_item_refs)
9581 rec->extent_item_refs--;
9582 back->node.found_extent_tree = 0;
9584 if (!back->node.found_extent_tree && back->node.found_ref) {
9585 rb_erase(&back->node.node, &rec->backref_tree);
9589 maybe_free_extent_rec(extent_cache, rec);
9594 static int delete_extent_records(struct btrfs_trans_handle *trans,
9595 struct btrfs_root *root,
9596 struct btrfs_path *path,
9599 struct btrfs_key key;
9600 struct btrfs_key found_key;
9601 struct extent_buffer *leaf;
9606 key.objectid = bytenr;
9608 key.offset = (u64)-1;
9611 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9618 if (path->slots[0] == 0)
9624 leaf = path->nodes[0];
9625 slot = path->slots[0];
9627 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9628 if (found_key.objectid != bytenr)
9631 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9632 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9633 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9634 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9635 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9636 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9637 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9638 btrfs_release_path(path);
9639 if (found_key.type == 0) {
9640 if (found_key.offset == 0)
9642 key.offset = found_key.offset - 1;
9643 key.type = found_key.type;
9645 key.type = found_key.type - 1;
9646 key.offset = (u64)-1;
9650 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9651 found_key.objectid, found_key.type, found_key.offset);
9653 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9656 btrfs_release_path(path);
9658 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9659 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9660 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9661 found_key.offset : root->fs_info->nodesize;
9663 ret = btrfs_update_block_group(trans, root, bytenr,
9670 btrfs_release_path(path);
9675 * for a single backref, this will allocate a new extent
9676 * and add the backref to it.
9678 static int record_extent(struct btrfs_trans_handle *trans,
9679 struct btrfs_fs_info *info,
9680 struct btrfs_path *path,
9681 struct extent_record *rec,
9682 struct extent_backref *back,
9683 int allocated, u64 flags)
9686 struct btrfs_root *extent_root = info->extent_root;
9687 struct extent_buffer *leaf;
9688 struct btrfs_key ins_key;
9689 struct btrfs_extent_item *ei;
9690 struct data_backref *dback;
9691 struct btrfs_tree_block_info *bi;
9694 rec->max_size = max_t(u64, rec->max_size,
9698 u32 item_size = sizeof(*ei);
9701 item_size += sizeof(*bi);
9703 ins_key.objectid = rec->start;
9704 ins_key.offset = rec->max_size;
9705 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9707 ret = btrfs_insert_empty_item(trans, extent_root, path,
9708 &ins_key, item_size);
9712 leaf = path->nodes[0];
9713 ei = btrfs_item_ptr(leaf, path->slots[0],
9714 struct btrfs_extent_item);
9716 btrfs_set_extent_refs(leaf, ei, 0);
9717 btrfs_set_extent_generation(leaf, ei, rec->generation);
9719 if (back->is_data) {
9720 btrfs_set_extent_flags(leaf, ei,
9721 BTRFS_EXTENT_FLAG_DATA);
9723 struct btrfs_disk_key copy_key;;
9725 bi = (struct btrfs_tree_block_info *)(ei + 1);
9726 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9729 btrfs_set_disk_key_objectid(©_key,
9730 rec->info_objectid);
9731 btrfs_set_disk_key_type(©_key, 0);
9732 btrfs_set_disk_key_offset(©_key, 0);
9734 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9735 btrfs_set_tree_block_key(leaf, bi, ©_key);
9737 btrfs_set_extent_flags(leaf, ei,
9738 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9741 btrfs_mark_buffer_dirty(leaf);
9742 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9743 rec->max_size, 1, 0);
9746 btrfs_release_path(path);
9749 if (back->is_data) {
9753 dback = to_data_backref(back);
9754 if (back->full_backref)
9755 parent = dback->parent;
9759 for (i = 0; i < dback->found_ref; i++) {
9760 /* if parent != 0, we're doing a full backref
9761 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9762 * just makes the backref allocator create a data
9765 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9766 rec->start, rec->max_size,
9770 BTRFS_FIRST_FREE_OBJECTID :
9776 fprintf(stderr, "adding new data backref"
9777 " on %llu %s %llu owner %llu"
9778 " offset %llu found %d\n",
9779 (unsigned long long)rec->start,
9780 back->full_backref ?
9782 back->full_backref ?
9783 (unsigned long long)parent :
9784 (unsigned long long)dback->root,
9785 (unsigned long long)dback->owner,
9786 (unsigned long long)dback->offset,
9790 struct tree_backref *tback;
9792 tback = to_tree_backref(back);
9793 if (back->full_backref)
9794 parent = tback->parent;
9798 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9799 rec->start, rec->max_size,
9800 parent, tback->root, 0, 0);
9801 fprintf(stderr, "adding new tree backref on "
9802 "start %llu len %llu parent %llu root %llu\n",
9803 rec->start, rec->max_size, parent, tback->root);
9806 btrfs_release_path(path);
9810 static struct extent_entry *find_entry(struct list_head *entries,
9811 u64 bytenr, u64 bytes)
9813 struct extent_entry *entry = NULL;
9815 list_for_each_entry(entry, entries, list) {
9816 if (entry->bytenr == bytenr && entry->bytes == bytes)
9823 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9825 struct extent_entry *entry, *best = NULL, *prev = NULL;
9827 list_for_each_entry(entry, entries, list) {
9829 * If there are as many broken entries as entries then we know
9830 * not to trust this particular entry.
9832 if (entry->broken == entry->count)
9836 * Special case, when there are only two entries and 'best' is
9846 * If our current entry == best then we can't be sure our best
9847 * is really the best, so we need to keep searching.
9849 if (best && best->count == entry->count) {
9855 /* Prev == entry, not good enough, have to keep searching */
9856 if (!prev->broken && prev->count == entry->count)
9860 best = (prev->count > entry->count) ? prev : entry;
9861 else if (best->count < entry->count)
9869 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9870 struct data_backref *dback, struct extent_entry *entry)
9872 struct btrfs_trans_handle *trans;
9873 struct btrfs_root *root;
9874 struct btrfs_file_extent_item *fi;
9875 struct extent_buffer *leaf;
9876 struct btrfs_key key;
9880 key.objectid = dback->root;
9881 key.type = BTRFS_ROOT_ITEM_KEY;
9882 key.offset = (u64)-1;
9883 root = btrfs_read_fs_root(info, &key);
9885 fprintf(stderr, "Couldn't find root for our ref\n");
9890 * The backref points to the original offset of the extent if it was
9891 * split, so we need to search down to the offset we have and then walk
9892 * forward until we find the backref we're looking for.
9894 key.objectid = dback->owner;
9895 key.type = BTRFS_EXTENT_DATA_KEY;
9896 key.offset = dback->offset;
9897 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9899 fprintf(stderr, "Error looking up ref %d\n", ret);
9904 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9905 ret = btrfs_next_leaf(root, path);
9907 fprintf(stderr, "Couldn't find our ref, next\n");
9911 leaf = path->nodes[0];
9912 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9913 if (key.objectid != dback->owner ||
9914 key.type != BTRFS_EXTENT_DATA_KEY) {
9915 fprintf(stderr, "Couldn't find our ref, search\n");
9918 fi = btrfs_item_ptr(leaf, path->slots[0],
9919 struct btrfs_file_extent_item);
9920 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9921 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9923 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9928 btrfs_release_path(path);
9930 trans = btrfs_start_transaction(root, 1);
9932 return PTR_ERR(trans);
9935 * Ok we have the key of the file extent we want to fix, now we can cow
9936 * down to the thing and fix it.
9938 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9940 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9941 key.objectid, key.type, key.offset, ret);
9945 fprintf(stderr, "Well that's odd, we just found this key "
9946 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9951 leaf = path->nodes[0];
9952 fi = btrfs_item_ptr(leaf, path->slots[0],
9953 struct btrfs_file_extent_item);
9955 if (btrfs_file_extent_compression(leaf, fi) &&
9956 dback->disk_bytenr != entry->bytenr) {
9957 fprintf(stderr, "Ref doesn't match the record start and is "
9958 "compressed, please take a btrfs-image of this file "
9959 "system and send it to a btrfs developer so they can "
9960 "complete this functionality for bytenr %Lu\n",
9961 dback->disk_bytenr);
9966 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9967 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9968 } else if (dback->disk_bytenr > entry->bytenr) {
9969 u64 off_diff, offset;
9971 off_diff = dback->disk_bytenr - entry->bytenr;
9972 offset = btrfs_file_extent_offset(leaf, fi);
9973 if (dback->disk_bytenr + offset +
9974 btrfs_file_extent_num_bytes(leaf, fi) >
9975 entry->bytenr + entry->bytes) {
9976 fprintf(stderr, "Ref is past the entry end, please "
9977 "take a btrfs-image of this file system and "
9978 "send it to a btrfs developer, ref %Lu\n",
9979 dback->disk_bytenr);
9984 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9985 btrfs_set_file_extent_offset(leaf, fi, offset);
9986 } else if (dback->disk_bytenr < entry->bytenr) {
9989 offset = btrfs_file_extent_offset(leaf, fi);
9990 if (dback->disk_bytenr + offset < entry->bytenr) {
9991 fprintf(stderr, "Ref is before the entry start, please"
9992 " take a btrfs-image of this file system and "
9993 "send it to a btrfs developer, ref %Lu\n",
9994 dback->disk_bytenr);
9999 offset += dback->disk_bytenr;
10000 offset -= entry->bytenr;
10001 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10002 btrfs_set_file_extent_offset(leaf, fi, offset);
10005 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10008 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10009 * only do this if we aren't using compression, otherwise it's a
10012 if (!btrfs_file_extent_compression(leaf, fi))
10013 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10015 printf("ram bytes may be wrong?\n");
10016 btrfs_mark_buffer_dirty(leaf);
10018 err = btrfs_commit_transaction(trans, root);
10019 btrfs_release_path(path);
10020 return ret ? ret : err;
10023 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10024 struct extent_record *rec)
10026 struct extent_backref *back, *tmp;
10027 struct data_backref *dback;
10028 struct extent_entry *entry, *best = NULL;
10029 LIST_HEAD(entries);
10030 int nr_entries = 0;
10031 int broken_entries = 0;
10033 short mismatch = 0;
10036 * Metadata is easy and the backrefs should always agree on bytenr and
10037 * size, if not we've got bigger issues.
10042 rbtree_postorder_for_each_entry_safe(back, tmp,
10043 &rec->backref_tree, node) {
10044 if (back->full_backref || !back->is_data)
10047 dback = to_data_backref(back);
10050 * We only pay attention to backrefs that we found a real
10053 if (dback->found_ref == 0)
10057 * For now we only catch when the bytes don't match, not the
10058 * bytenr. We can easily do this at the same time, but I want
10059 * to have a fs image to test on before we just add repair
10060 * functionality willy-nilly so we know we won't screw up the
10064 entry = find_entry(&entries, dback->disk_bytenr,
10067 entry = malloc(sizeof(struct extent_entry));
10072 memset(entry, 0, sizeof(*entry));
10073 entry->bytenr = dback->disk_bytenr;
10074 entry->bytes = dback->bytes;
10075 list_add_tail(&entry->list, &entries);
10080 * If we only have on entry we may think the entries agree when
10081 * in reality they don't so we have to do some extra checking.
10083 if (dback->disk_bytenr != rec->start ||
10084 dback->bytes != rec->nr || back->broken)
10087 if (back->broken) {
10095 /* Yay all the backrefs agree, carry on good sir */
10096 if (nr_entries <= 1 && !mismatch)
10099 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10100 "%Lu\n", rec->start);
10103 * First we want to see if the backrefs can agree amongst themselves who
10104 * is right, so figure out which one of the entries has the highest
10107 best = find_most_right_entry(&entries);
10110 * Ok so we may have an even split between what the backrefs think, so
10111 * this is where we use the extent ref to see what it thinks.
10114 entry = find_entry(&entries, rec->start, rec->nr);
10115 if (!entry && (!broken_entries || !rec->found_rec)) {
10116 fprintf(stderr, "Backrefs don't agree with each other "
10117 "and extent record doesn't agree with anybody,"
10118 " so we can't fix bytenr %Lu bytes %Lu\n",
10119 rec->start, rec->nr);
10122 } else if (!entry) {
10124 * Ok our backrefs were broken, we'll assume this is the
10125 * correct value and add an entry for this range.
10127 entry = malloc(sizeof(struct extent_entry));
10132 memset(entry, 0, sizeof(*entry));
10133 entry->bytenr = rec->start;
10134 entry->bytes = rec->nr;
10135 list_add_tail(&entry->list, &entries);
10139 best = find_most_right_entry(&entries);
10141 fprintf(stderr, "Backrefs and extent record evenly "
10142 "split on who is right, this is going to "
10143 "require user input to fix bytenr %Lu bytes "
10144 "%Lu\n", rec->start, rec->nr);
10151 * I don't think this can happen currently as we'll abort() if we catch
10152 * this case higher up, but in case somebody removes that we still can't
10153 * deal with it properly here yet, so just bail out of that's the case.
10155 if (best->bytenr != rec->start) {
10156 fprintf(stderr, "Extent start and backref starts don't match, "
10157 "please use btrfs-image on this file system and send "
10158 "it to a btrfs developer so they can make fsck fix "
10159 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10160 rec->start, rec->nr);
10166 * Ok great we all agreed on an extent record, let's go find the real
10167 * references and fix up the ones that don't match.
10169 rbtree_postorder_for_each_entry_safe(back, tmp,
10170 &rec->backref_tree, node) {
10171 if (back->full_backref || !back->is_data)
10174 dback = to_data_backref(back);
10177 * Still ignoring backrefs that don't have a real ref attached
10180 if (dback->found_ref == 0)
10183 if (dback->bytes == best->bytes &&
10184 dback->disk_bytenr == best->bytenr)
10187 ret = repair_ref(info, path, dback, best);
10193 * Ok we messed with the actual refs, which means we need to drop our
10194 * entire cache and go back and rescan. I know this is a huge pain and
10195 * adds a lot of extra work, but it's the only way to be safe. Once all
10196 * the backrefs agree we may not need to do anything to the extent
10201 while (!list_empty(&entries)) {
10202 entry = list_entry(entries.next, struct extent_entry, list);
10203 list_del_init(&entry->list);
10209 static int process_duplicates(struct cache_tree *extent_cache,
10210 struct extent_record *rec)
10212 struct extent_record *good, *tmp;
10213 struct cache_extent *cache;
10217 * If we found a extent record for this extent then return, or if we
10218 * have more than one duplicate we are likely going to need to delete
10221 if (rec->found_rec || rec->num_duplicates > 1)
10224 /* Shouldn't happen but just in case */
10225 BUG_ON(!rec->num_duplicates);
10228 * So this happens if we end up with a backref that doesn't match the
10229 * actual extent entry. So either the backref is bad or the extent
10230 * entry is bad. Either way we want to have the extent_record actually
10231 * reflect what we found in the extent_tree, so we need to take the
10232 * duplicate out and use that as the extent_record since the only way we
10233 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10235 remove_cache_extent(extent_cache, &rec->cache);
10237 good = to_extent_record(rec->dups.next);
10238 list_del_init(&good->list);
10239 INIT_LIST_HEAD(&good->backrefs);
10240 INIT_LIST_HEAD(&good->dups);
10241 good->cache.start = good->start;
10242 good->cache.size = good->nr;
10243 good->content_checked = 0;
10244 good->owner_ref_checked = 0;
10245 good->num_duplicates = 0;
10246 good->refs = rec->refs;
10247 list_splice_init(&rec->backrefs, &good->backrefs);
10249 cache = lookup_cache_extent(extent_cache, good->start,
10253 tmp = container_of(cache, struct extent_record, cache);
10256 * If we find another overlapping extent and it's found_rec is
10257 * set then it's a duplicate and we need to try and delete
10260 if (tmp->found_rec || tmp->num_duplicates > 0) {
10261 if (list_empty(&good->list))
10262 list_add_tail(&good->list,
10263 &duplicate_extents);
10264 good->num_duplicates += tmp->num_duplicates + 1;
10265 list_splice_init(&tmp->dups, &good->dups);
10266 list_del_init(&tmp->list);
10267 list_add_tail(&tmp->list, &good->dups);
10268 remove_cache_extent(extent_cache, &tmp->cache);
10273 * Ok we have another non extent item backed extent rec, so lets
10274 * just add it to this extent and carry on like we did above.
10276 good->refs += tmp->refs;
10277 list_splice_init(&tmp->backrefs, &good->backrefs);
10278 remove_cache_extent(extent_cache, &tmp->cache);
10281 ret = insert_cache_extent(extent_cache, &good->cache);
10284 return good->num_duplicates ? 0 : 1;
10287 static int delete_duplicate_records(struct btrfs_root *root,
10288 struct extent_record *rec)
10290 struct btrfs_trans_handle *trans;
10291 LIST_HEAD(delete_list);
10292 struct btrfs_path path;
10293 struct extent_record *tmp, *good, *n;
10296 struct btrfs_key key;
10298 btrfs_init_path(&path);
10301 /* Find the record that covers all of the duplicates. */
10302 list_for_each_entry(tmp, &rec->dups, list) {
10303 if (good->start < tmp->start)
10305 if (good->nr > tmp->nr)
10308 if (tmp->start + tmp->nr < good->start + good->nr) {
10309 fprintf(stderr, "Ok we have overlapping extents that "
10310 "aren't completely covered by each other, this "
10311 "is going to require more careful thought. "
10312 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10313 tmp->start, tmp->nr, good->start, good->nr);
10320 list_add_tail(&rec->list, &delete_list);
10322 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10325 list_move_tail(&tmp->list, &delete_list);
10328 root = root->fs_info->extent_root;
10329 trans = btrfs_start_transaction(root, 1);
10330 if (IS_ERR(trans)) {
10331 ret = PTR_ERR(trans);
10335 list_for_each_entry(tmp, &delete_list, list) {
10336 if (tmp->found_rec == 0)
10338 key.objectid = tmp->start;
10339 key.type = BTRFS_EXTENT_ITEM_KEY;
10340 key.offset = tmp->nr;
10342 /* Shouldn't happen but just in case */
10343 if (tmp->metadata) {
10344 fprintf(stderr, "Well this shouldn't happen, extent "
10345 "record overlaps but is metadata? "
10346 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10350 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10356 ret = btrfs_del_item(trans, root, &path);
10359 btrfs_release_path(&path);
10362 err = btrfs_commit_transaction(trans, root);
10366 while (!list_empty(&delete_list)) {
10367 tmp = to_extent_record(delete_list.next);
10368 list_del_init(&tmp->list);
10374 while (!list_empty(&rec->dups)) {
10375 tmp = to_extent_record(rec->dups.next);
10376 list_del_init(&tmp->list);
10380 btrfs_release_path(&path);
10382 if (!ret && !nr_del)
10383 rec->num_duplicates = 0;
10385 return ret ? ret : nr_del;
10388 static int find_possible_backrefs(struct btrfs_fs_info *info,
10389 struct btrfs_path *path,
10390 struct cache_tree *extent_cache,
10391 struct extent_record *rec)
10393 struct btrfs_root *root;
10394 struct extent_backref *back, *tmp;
10395 struct data_backref *dback;
10396 struct cache_extent *cache;
10397 struct btrfs_file_extent_item *fi;
10398 struct btrfs_key key;
10402 rbtree_postorder_for_each_entry_safe(back, tmp,
10403 &rec->backref_tree, node) {
10404 /* Don't care about full backrefs (poor unloved backrefs) */
10405 if (back->full_backref || !back->is_data)
10408 dback = to_data_backref(back);
10410 /* We found this one, we don't need to do a lookup */
10411 if (dback->found_ref)
10414 key.objectid = dback->root;
10415 key.type = BTRFS_ROOT_ITEM_KEY;
10416 key.offset = (u64)-1;
10418 root = btrfs_read_fs_root(info, &key);
10420 /* No root, definitely a bad ref, skip */
10421 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10423 /* Other err, exit */
10425 return PTR_ERR(root);
10427 key.objectid = dback->owner;
10428 key.type = BTRFS_EXTENT_DATA_KEY;
10429 key.offset = dback->offset;
10430 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10432 btrfs_release_path(path);
10435 /* Didn't find it, we can carry on */
10440 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10441 struct btrfs_file_extent_item);
10442 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10443 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10444 btrfs_release_path(path);
10445 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10447 struct extent_record *tmp;
10448 tmp = container_of(cache, struct extent_record, cache);
10451 * If we found an extent record for the bytenr for this
10452 * particular backref then we can't add it to our
10453 * current extent record. We only want to add backrefs
10454 * that don't have a corresponding extent item in the
10455 * extent tree since they likely belong to this record
10456 * and we need to fix it if it doesn't match bytenrs.
10458 if (tmp->found_rec)
10462 dback->found_ref += 1;
10463 dback->disk_bytenr = bytenr;
10464 dback->bytes = bytes;
10467 * Set this so the verify backref code knows not to trust the
10468 * values in this backref.
10477 * Record orphan data ref into corresponding root.
10479 * Return 0 if the extent item contains data ref and recorded.
10480 * Return 1 if the extent item contains no useful data ref
10481 * On that case, it may contains only shared_dataref or metadata backref
10482 * or the file extent exists(this should be handled by the extent bytenr
10483 * recovery routine)
10484 * Return <0 if something goes wrong.
10486 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10487 struct extent_record *rec)
10489 struct btrfs_key key;
10490 struct btrfs_root *dest_root;
10491 struct extent_backref *back, *tmp;
10492 struct data_backref *dback;
10493 struct orphan_data_extent *orphan;
10494 struct btrfs_path path;
10495 int recorded_data_ref = 0;
10500 btrfs_init_path(&path);
10501 rbtree_postorder_for_each_entry_safe(back, tmp,
10502 &rec->backref_tree, node) {
10503 if (back->full_backref || !back->is_data ||
10504 !back->found_extent_tree)
10506 dback = to_data_backref(back);
10507 if (dback->found_ref)
10509 key.objectid = dback->root;
10510 key.type = BTRFS_ROOT_ITEM_KEY;
10511 key.offset = (u64)-1;
10513 dest_root = btrfs_read_fs_root(fs_info, &key);
10515 /* For non-exist root we just skip it */
10516 if (IS_ERR(dest_root) || !dest_root)
10519 key.objectid = dback->owner;
10520 key.type = BTRFS_EXTENT_DATA_KEY;
10521 key.offset = dback->offset;
10523 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10524 btrfs_release_path(&path);
10526 * For ret < 0, it's OK since the fs-tree may be corrupted,
10527 * we need to record it for inode/file extent rebuild.
10528 * For ret > 0, we record it only for file extent rebuild.
10529 * For ret == 0, the file extent exists but only bytenr
10530 * mismatch, let the original bytenr fix routine to handle,
10536 orphan = malloc(sizeof(*orphan));
10541 INIT_LIST_HEAD(&orphan->list);
10542 orphan->root = dback->root;
10543 orphan->objectid = dback->owner;
10544 orphan->offset = dback->offset;
10545 orphan->disk_bytenr = rec->cache.start;
10546 orphan->disk_len = rec->cache.size;
10547 list_add(&dest_root->orphan_data_extents, &orphan->list);
10548 recorded_data_ref = 1;
10551 btrfs_release_path(&path);
10553 return !recorded_data_ref;
10559 * when an incorrect extent item is found, this will delete
10560 * all of the existing entries for it and recreate them
10561 * based on what the tree scan found.
10563 static int fixup_extent_refs(struct btrfs_fs_info *info,
10564 struct cache_tree *extent_cache,
10565 struct extent_record *rec)
10567 struct btrfs_trans_handle *trans = NULL;
10569 struct btrfs_path path;
10570 struct cache_extent *cache;
10571 struct extent_backref *back, *tmp;
10575 if (rec->flag_block_full_backref)
10576 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10578 btrfs_init_path(&path);
10579 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10581 * Sometimes the backrefs themselves are so broken they don't
10582 * get attached to any meaningful rec, so first go back and
10583 * check any of our backrefs that we couldn't find and throw
10584 * them into the list if we find the backref so that
10585 * verify_backrefs can figure out what to do.
10587 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10592 /* step one, make sure all of the backrefs agree */
10593 ret = verify_backrefs(info, &path, rec);
10597 trans = btrfs_start_transaction(info->extent_root, 1);
10598 if (IS_ERR(trans)) {
10599 ret = PTR_ERR(trans);
10603 /* step two, delete all the existing records */
10604 ret = delete_extent_records(trans, info->extent_root, &path,
10610 /* was this block corrupt? If so, don't add references to it */
10611 cache = lookup_cache_extent(info->corrupt_blocks,
10612 rec->start, rec->max_size);
10618 /* step three, recreate all the refs we did find */
10619 rbtree_postorder_for_each_entry_safe(back, tmp,
10620 &rec->backref_tree, node) {
10622 * if we didn't find any references, don't create a
10623 * new extent record
10625 if (!back->found_ref)
10628 rec->bad_full_backref = 0;
10629 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10637 int err = btrfs_commit_transaction(trans, info->extent_root);
10643 fprintf(stderr, "Repaired extent references for %llu\n",
10644 (unsigned long long)rec->start);
10646 btrfs_release_path(&path);
10650 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10651 struct extent_record *rec)
10653 struct btrfs_trans_handle *trans;
10654 struct btrfs_root *root = fs_info->extent_root;
10655 struct btrfs_path path;
10656 struct btrfs_extent_item *ei;
10657 struct btrfs_key key;
10661 key.objectid = rec->start;
10662 if (rec->metadata) {
10663 key.type = BTRFS_METADATA_ITEM_KEY;
10664 key.offset = rec->info_level;
10666 key.type = BTRFS_EXTENT_ITEM_KEY;
10667 key.offset = rec->max_size;
10670 trans = btrfs_start_transaction(root, 0);
10672 return PTR_ERR(trans);
10674 btrfs_init_path(&path);
10675 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10677 btrfs_release_path(&path);
10678 btrfs_commit_transaction(trans, root);
10681 fprintf(stderr, "Didn't find extent for %llu\n",
10682 (unsigned long long)rec->start);
10683 btrfs_release_path(&path);
10684 btrfs_commit_transaction(trans, root);
10688 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10689 struct btrfs_extent_item);
10690 flags = btrfs_extent_flags(path.nodes[0], ei);
10691 if (rec->flag_block_full_backref) {
10692 fprintf(stderr, "setting full backref on %llu\n",
10693 (unsigned long long)key.objectid);
10694 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696 fprintf(stderr, "clearing full backref on %llu\n",
10697 (unsigned long long)key.objectid);
10698 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10700 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10701 btrfs_mark_buffer_dirty(path.nodes[0]);
10702 btrfs_release_path(&path);
10703 ret = btrfs_commit_transaction(trans, root);
10705 fprintf(stderr, "Repaired extent flags for %llu\n",
10706 (unsigned long long)rec->start);
10711 /* right now we only prune from the extent allocation tree */
10712 static int prune_one_block(struct btrfs_trans_handle *trans,
10713 struct btrfs_fs_info *info,
10714 struct btrfs_corrupt_block *corrupt)
10717 struct btrfs_path path;
10718 struct extent_buffer *eb;
10722 int level = corrupt->level + 1;
10724 btrfs_init_path(&path);
10726 /* we want to stop at the parent to our busted block */
10727 path.lowest_level = level;
10729 ret = btrfs_search_slot(trans, info->extent_root,
10730 &corrupt->key, &path, -1, 1);
10735 eb = path.nodes[level];
10742 * hopefully the search gave us the block we want to prune,
10743 * lets try that first
10745 slot = path.slots[level];
10746 found = btrfs_node_blockptr(eb, slot);
10747 if (found == corrupt->cache.start)
10750 nritems = btrfs_header_nritems(eb);
10752 /* the search failed, lets scan this node and hope we find it */
10753 for (slot = 0; slot < nritems; slot++) {
10754 found = btrfs_node_blockptr(eb, slot);
10755 if (found == corrupt->cache.start)
10759 * we couldn't find the bad block. TODO, search all the nodes for pointers
10762 if (eb == info->extent_root->node) {
10767 btrfs_release_path(&path);
10772 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10773 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10776 btrfs_release_path(&path);
10780 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10782 struct btrfs_trans_handle *trans = NULL;
10783 struct cache_extent *cache;
10784 struct btrfs_corrupt_block *corrupt;
10787 cache = search_cache_extent(info->corrupt_blocks, 0);
10791 trans = btrfs_start_transaction(info->extent_root, 1);
10793 return PTR_ERR(trans);
10795 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10796 prune_one_block(trans, info, corrupt);
10797 remove_cache_extent(info->corrupt_blocks, cache);
10800 return btrfs_commit_transaction(trans, info->extent_root);
10804 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10806 struct btrfs_block_group_cache *cache;
10811 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10812 &start, &end, EXTENT_DIRTY);
10815 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10820 cache = btrfs_lookup_first_block_group(fs_info, start);
10825 start = cache->key.objectid + cache->key.offset;
10829 static int check_extent_refs(struct btrfs_root *root,
10830 struct cache_tree *extent_cache)
10832 struct extent_record *rec;
10833 struct cache_extent *cache;
10840 * if we're doing a repair, we have to make sure
10841 * we don't allocate from the problem extents.
10842 * In the worst case, this will be all the
10843 * extents in the FS
10845 cache = search_cache_extent(extent_cache, 0);
10847 rec = container_of(cache, struct extent_record, cache);
10848 set_extent_dirty(root->fs_info->excluded_extents,
10850 rec->start + rec->max_size - 1);
10851 cache = next_cache_extent(cache);
10854 /* pin down all the corrupted blocks too */
10855 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10857 set_extent_dirty(root->fs_info->excluded_extents,
10859 cache->start + cache->size - 1);
10860 cache = next_cache_extent(cache);
10862 prune_corrupt_blocks(root->fs_info);
10863 reset_cached_block_groups(root->fs_info);
10866 reset_cached_block_groups(root->fs_info);
10869 * We need to delete any duplicate entries we find first otherwise we
10870 * could mess up the extent tree when we have backrefs that actually
10871 * belong to a different extent item and not the weird duplicate one.
10873 while (repair && !list_empty(&duplicate_extents)) {
10874 rec = to_extent_record(duplicate_extents.next);
10875 list_del_init(&rec->list);
10877 /* Sometimes we can find a backref before we find an actual
10878 * extent, so we need to process it a little bit to see if there
10879 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10880 * if this is a backref screwup. If we need to delete stuff
10881 * process_duplicates() will return 0, otherwise it will return
10884 if (process_duplicates(extent_cache, rec))
10886 ret = delete_duplicate_records(root, rec);
10890 * delete_duplicate_records will return the number of entries
10891 * deleted, so if it's greater than 0 then we know we actually
10892 * did something and we need to remove.
10905 cache = search_cache_extent(extent_cache, 0);
10908 rec = container_of(cache, struct extent_record, cache);
10909 if (rec->num_duplicates) {
10910 fprintf(stderr, "extent item %llu has multiple extent "
10911 "items\n", (unsigned long long)rec->start);
10915 if (rec->refs != rec->extent_item_refs) {
10916 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10917 (unsigned long long)rec->start,
10918 (unsigned long long)rec->nr);
10919 fprintf(stderr, "extent item %llu, found %llu\n",
10920 (unsigned long long)rec->extent_item_refs,
10921 (unsigned long long)rec->refs);
10922 ret = record_orphan_data_extents(root->fs_info, rec);
10928 if (all_backpointers_checked(rec, 1)) {
10929 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10930 (unsigned long long)rec->start,
10931 (unsigned long long)rec->nr);
10935 if (!rec->owner_ref_checked) {
10936 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10937 (unsigned long long)rec->start,
10938 (unsigned long long)rec->nr);
10943 if (repair && fix) {
10944 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10950 if (rec->bad_full_backref) {
10951 fprintf(stderr, "bad full backref, on [%llu]\n",
10952 (unsigned long long)rec->start);
10954 ret = fixup_extent_flags(root->fs_info, rec);
10962 * Although it's not a extent ref's problem, we reuse this
10963 * routine for error reporting.
10964 * No repair function yet.
10966 if (rec->crossing_stripes) {
10968 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10969 rec->start, rec->start + rec->max_size);
10973 if (rec->wrong_chunk_type) {
10975 "bad extent [%llu, %llu), type mismatch with chunk\n",
10976 rec->start, rec->start + rec->max_size);
10981 remove_cache_extent(extent_cache, cache);
10982 free_all_extent_backrefs(rec);
10983 if (!init_extent_tree && repair && (!cur_err || fix))
10984 clear_extent_dirty(root->fs_info->excluded_extents,
10986 rec->start + rec->max_size - 1);
10991 if (ret && ret != -EAGAIN) {
10992 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10995 struct btrfs_trans_handle *trans;
10997 root = root->fs_info->extent_root;
10998 trans = btrfs_start_transaction(root, 1);
10999 if (IS_ERR(trans)) {
11000 ret = PTR_ERR(trans);
11004 ret = btrfs_fix_block_accounting(trans, root);
11007 ret = btrfs_commit_transaction(trans, root);
11019 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11023 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11024 stripe_size = length;
11025 stripe_size /= num_stripes;
11026 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11027 stripe_size = length * 2;
11028 stripe_size /= num_stripes;
11029 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11030 stripe_size = length;
11031 stripe_size /= (num_stripes - 1);
11032 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11033 stripe_size = length;
11034 stripe_size /= (num_stripes - 2);
11036 stripe_size = length;
11038 return stripe_size;
11042 * Check the chunk with its block group/dev list ref:
11043 * Return 0 if all refs seems valid.
11044 * Return 1 if part of refs seems valid, need later check for rebuild ref
11045 * like missing block group and needs to search extent tree to rebuild them.
11046 * Return -1 if essential refs are missing and unable to rebuild.
11048 static int check_chunk_refs(struct chunk_record *chunk_rec,
11049 struct block_group_tree *block_group_cache,
11050 struct device_extent_tree *dev_extent_cache,
11053 struct cache_extent *block_group_item;
11054 struct block_group_record *block_group_rec;
11055 struct cache_extent *dev_extent_item;
11056 struct device_extent_record *dev_extent_rec;
11060 int metadump_v2 = 0;
11064 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11066 chunk_rec->length);
11067 if (block_group_item) {
11068 block_group_rec = container_of(block_group_item,
11069 struct block_group_record,
11071 if (chunk_rec->length != block_group_rec->offset ||
11072 chunk_rec->offset != block_group_rec->objectid ||
11074 chunk_rec->type_flags != block_group_rec->flags)) {
11077 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11078 chunk_rec->objectid,
11083 chunk_rec->type_flags,
11084 block_group_rec->objectid,
11085 block_group_rec->type,
11086 block_group_rec->offset,
11087 block_group_rec->offset,
11088 block_group_rec->objectid,
11089 block_group_rec->flags);
11092 list_del_init(&block_group_rec->list);
11093 chunk_rec->bg_rec = block_group_rec;
11098 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11099 chunk_rec->objectid,
11104 chunk_rec->type_flags);
11111 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11112 chunk_rec->num_stripes);
11113 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11114 devid = chunk_rec->stripes[i].devid;
11115 offset = chunk_rec->stripes[i].offset;
11116 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11117 devid, offset, length);
11118 if (dev_extent_item) {
11119 dev_extent_rec = container_of(dev_extent_item,
11120 struct device_extent_record,
11122 if (dev_extent_rec->objectid != devid ||
11123 dev_extent_rec->offset != offset ||
11124 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11125 dev_extent_rec->length != length) {
11128 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11129 chunk_rec->objectid,
11132 chunk_rec->stripes[i].devid,
11133 chunk_rec->stripes[i].offset,
11134 dev_extent_rec->objectid,
11135 dev_extent_rec->offset,
11136 dev_extent_rec->length);
11139 list_move(&dev_extent_rec->chunk_list,
11140 &chunk_rec->dextents);
11145 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11146 chunk_rec->objectid,
11149 chunk_rec->stripes[i].devid,
11150 chunk_rec->stripes[i].offset);
11157 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11158 int check_chunks(struct cache_tree *chunk_cache,
11159 struct block_group_tree *block_group_cache,
11160 struct device_extent_tree *dev_extent_cache,
11161 struct list_head *good, struct list_head *bad,
11162 struct list_head *rebuild, int silent)
11164 struct cache_extent *chunk_item;
11165 struct chunk_record *chunk_rec;
11166 struct block_group_record *bg_rec;
11167 struct device_extent_record *dext_rec;
11171 chunk_item = first_cache_extent(chunk_cache);
11172 while (chunk_item) {
11173 chunk_rec = container_of(chunk_item, struct chunk_record,
11175 err = check_chunk_refs(chunk_rec, block_group_cache,
11176 dev_extent_cache, silent);
11179 if (err == 0 && good)
11180 list_add_tail(&chunk_rec->list, good);
11181 if (err > 0 && rebuild)
11182 list_add_tail(&chunk_rec->list, rebuild);
11183 if (err < 0 && bad)
11184 list_add_tail(&chunk_rec->list, bad);
11185 chunk_item = next_cache_extent(chunk_item);
11188 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11191 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11199 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11203 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11204 dext_rec->objectid,
11214 static int check_device_used(struct device_record *dev_rec,
11215 struct device_extent_tree *dext_cache)
11217 struct cache_extent *cache;
11218 struct device_extent_record *dev_extent_rec;
11219 u64 total_byte = 0;
11221 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11223 dev_extent_rec = container_of(cache,
11224 struct device_extent_record,
11226 if (dev_extent_rec->objectid != dev_rec->devid)
11229 list_del_init(&dev_extent_rec->device_list);
11230 total_byte += dev_extent_rec->length;
11231 cache = next_cache_extent(cache);
11234 if (total_byte != dev_rec->byte_used) {
11236 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11237 total_byte, dev_rec->byte_used, dev_rec->objectid,
11238 dev_rec->type, dev_rec->offset);
11246 * Extra (optional) check for dev_item size to report possbile problem on a new
11249 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11251 if (!IS_ALIGNED(total_bytes, sectorsize)) {
11253 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11254 devid, total_bytes, sectorsize);
11256 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11257 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11262 * Unlike device size alignment check above, some super total_bytes check
11263 * failure can lead to mount failure for newer kernel.
11265 * So this function will return the error for a fatal super total_bytes problem.
11267 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11269 struct btrfs_device *dev;
11270 struct list_head *dev_list = &fs_info->fs_devices->devices;
11271 u64 total_bytes = 0;
11272 u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11274 list_for_each_entry(dev, dev_list, dev_list)
11275 total_bytes += dev->total_bytes;
11277 /* Important check, which can cause unmountable fs */
11278 if (super_bytes < total_bytes) {
11279 error("super total bytes %llu smaller than real device(s) size %llu",
11280 super_bytes, total_bytes);
11281 error("mounting this fs may fail for newer kernels");
11282 error("this can be fixed by 'btrfs rescue fix-device-size'");
11287 * Optional check, just to make everything aligned and match with each
11290 * For a btrfs-image restored fs, we don't need to check it anyway.
11292 if (btrfs_super_flags(fs_info->super_copy) &
11293 (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11295 if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11296 !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11297 super_bytes != total_bytes) {
11298 warning("minor unaligned/mismatch device size detected");
11300 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11305 /* check btrfs_dev_item -> btrfs_dev_extent */
11306 static int check_devices(struct rb_root *dev_cache,
11307 struct device_extent_tree *dev_extent_cache)
11309 struct rb_node *dev_node;
11310 struct device_record *dev_rec;
11311 struct device_extent_record *dext_rec;
11315 dev_node = rb_first(dev_cache);
11317 dev_rec = container_of(dev_node, struct device_record, node);
11318 err = check_device_used(dev_rec, dev_extent_cache);
11322 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11323 global_info->sectorsize);
11324 dev_node = rb_next(dev_node);
11326 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11329 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11330 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11337 static int add_root_item_to_list(struct list_head *head,
11338 u64 objectid, u64 bytenr, u64 last_snapshot,
11339 u8 level, u8 drop_level,
11340 struct btrfs_key *drop_key)
11343 struct root_item_record *ri_rec;
11344 ri_rec = malloc(sizeof(*ri_rec));
11347 ri_rec->bytenr = bytenr;
11348 ri_rec->objectid = objectid;
11349 ri_rec->level = level;
11350 ri_rec->drop_level = drop_level;
11351 ri_rec->last_snapshot = last_snapshot;
11353 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11354 list_add_tail(&ri_rec->list, head);
11359 static void free_root_item_list(struct list_head *list)
11361 struct root_item_record *ri_rec;
11363 while (!list_empty(list)) {
11364 ri_rec = list_first_entry(list, struct root_item_record,
11366 list_del_init(&ri_rec->list);
11371 static int deal_root_from_list(struct list_head *list,
11372 struct btrfs_root *root,
11373 struct block_info *bits,
11375 struct cache_tree *pending,
11376 struct cache_tree *seen,
11377 struct cache_tree *reada,
11378 struct cache_tree *nodes,
11379 struct cache_tree *extent_cache,
11380 struct cache_tree *chunk_cache,
11381 struct rb_root *dev_cache,
11382 struct block_group_tree *block_group_cache,
11383 struct device_extent_tree *dev_extent_cache)
11388 while (!list_empty(list)) {
11389 struct root_item_record *rec;
11390 struct extent_buffer *buf;
11391 rec = list_entry(list->next,
11392 struct root_item_record, list);
11394 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11395 if (!extent_buffer_uptodate(buf)) {
11396 free_extent_buffer(buf);
11400 ret = add_root_to_pending(buf, extent_cache, pending,
11401 seen, nodes, rec->objectid);
11405 * To rebuild extent tree, we need deal with snapshot
11406 * one by one, otherwise we deal with node firstly which
11407 * can maximize readahead.
11410 ret = run_next_block(root, bits, bits_nr, &last,
11411 pending, seen, reada, nodes,
11412 extent_cache, chunk_cache,
11413 dev_cache, block_group_cache,
11414 dev_extent_cache, rec);
11418 free_extent_buffer(buf);
11419 list_del(&rec->list);
11425 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11426 reada, nodes, extent_cache, chunk_cache,
11427 dev_cache, block_group_cache,
11428 dev_extent_cache, NULL);
11438 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11440 struct rb_root dev_cache;
11441 struct cache_tree chunk_cache;
11442 struct block_group_tree block_group_cache;
11443 struct device_extent_tree dev_extent_cache;
11444 struct cache_tree extent_cache;
11445 struct cache_tree seen;
11446 struct cache_tree pending;
11447 struct cache_tree reada;
11448 struct cache_tree nodes;
11449 struct extent_io_tree excluded_extents;
11450 struct cache_tree corrupt_blocks;
11451 struct btrfs_path path;
11452 struct btrfs_key key;
11453 struct btrfs_key found_key;
11455 struct block_info *bits;
11457 struct extent_buffer *leaf;
11459 struct btrfs_root_item ri;
11460 struct list_head dropping_trees;
11461 struct list_head normal_trees;
11462 struct btrfs_root *root1;
11463 struct btrfs_root *root;
11467 root = fs_info->fs_root;
11468 dev_cache = RB_ROOT;
11469 cache_tree_init(&chunk_cache);
11470 block_group_tree_init(&block_group_cache);
11471 device_extent_tree_init(&dev_extent_cache);
11473 cache_tree_init(&extent_cache);
11474 cache_tree_init(&seen);
11475 cache_tree_init(&pending);
11476 cache_tree_init(&nodes);
11477 cache_tree_init(&reada);
11478 cache_tree_init(&corrupt_blocks);
11479 extent_io_tree_init(&excluded_extents);
11480 INIT_LIST_HEAD(&dropping_trees);
11481 INIT_LIST_HEAD(&normal_trees);
11484 fs_info->excluded_extents = &excluded_extents;
11485 fs_info->fsck_extent_cache = &extent_cache;
11486 fs_info->free_extent_hook = free_extent_hook;
11487 fs_info->corrupt_blocks = &corrupt_blocks;
11491 bits = malloc(bits_nr * sizeof(struct block_info));
11497 if (ctx.progress_enabled) {
11498 ctx.tp = TASK_EXTENTS;
11499 task_start(ctx.info);
11503 root1 = fs_info->tree_root;
11504 level = btrfs_header_level(root1->node);
11505 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11506 root1->node->start, 0, level, 0, NULL);
11509 root1 = fs_info->chunk_root;
11510 level = btrfs_header_level(root1->node);
11511 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11512 root1->node->start, 0, level, 0, NULL);
11515 btrfs_init_path(&path);
11518 key.type = BTRFS_ROOT_ITEM_KEY;
11519 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11523 leaf = path.nodes[0];
11524 slot = path.slots[0];
11525 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11526 ret = btrfs_next_leaf(root, &path);
11529 leaf = path.nodes[0];
11530 slot = path.slots[0];
11532 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11533 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11534 unsigned long offset;
11537 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11538 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11539 last_snapshot = btrfs_root_last_snapshot(&ri);
11540 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11541 level = btrfs_root_level(&ri);
11542 ret = add_root_item_to_list(&normal_trees,
11543 found_key.objectid,
11544 btrfs_root_bytenr(&ri),
11545 last_snapshot, level,
11550 level = btrfs_root_level(&ri);
11551 objectid = found_key.objectid;
11552 btrfs_disk_key_to_cpu(&found_key,
11553 &ri.drop_progress);
11554 ret = add_root_item_to_list(&dropping_trees,
11556 btrfs_root_bytenr(&ri),
11557 last_snapshot, level,
11558 ri.drop_level, &found_key);
11565 btrfs_release_path(&path);
11568 * check_block can return -EAGAIN if it fixes something, please keep
11569 * this in mind when dealing with return values from these functions, if
11570 * we get -EAGAIN we want to fall through and restart the loop.
11572 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11573 &seen, &reada, &nodes, &extent_cache,
11574 &chunk_cache, &dev_cache, &block_group_cache,
11575 &dev_extent_cache);
11577 if (ret == -EAGAIN)
11581 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11582 &pending, &seen, &reada, &nodes,
11583 &extent_cache, &chunk_cache, &dev_cache,
11584 &block_group_cache, &dev_extent_cache);
11586 if (ret == -EAGAIN)
11591 ret = check_chunks(&chunk_cache, &block_group_cache,
11592 &dev_extent_cache, NULL, NULL, NULL, 0);
11594 if (ret == -EAGAIN)
11599 ret = check_extent_refs(root, &extent_cache);
11601 if (ret == -EAGAIN)
11606 ret = check_devices(&dev_cache, &dev_extent_cache);
11611 task_stop(ctx.info);
11613 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11614 extent_io_tree_cleanup(&excluded_extents);
11615 fs_info->fsck_extent_cache = NULL;
11616 fs_info->free_extent_hook = NULL;
11617 fs_info->corrupt_blocks = NULL;
11618 fs_info->excluded_extents = NULL;
11621 free_chunk_cache_tree(&chunk_cache);
11622 free_device_cache_tree(&dev_cache);
11623 free_block_group_tree(&block_group_cache);
11624 free_device_extent_tree(&dev_extent_cache);
11625 free_extent_cache_tree(&seen);
11626 free_extent_cache_tree(&pending);
11627 free_extent_cache_tree(&reada);
11628 free_extent_cache_tree(&nodes);
11629 free_root_item_list(&normal_trees);
11630 free_root_item_list(&dropping_trees);
11633 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11634 free_extent_cache_tree(&seen);
11635 free_extent_cache_tree(&pending);
11636 free_extent_cache_tree(&reada);
11637 free_extent_cache_tree(&nodes);
11638 free_chunk_cache_tree(&chunk_cache);
11639 free_block_group_tree(&block_group_cache);
11640 free_device_cache_tree(&dev_cache);
11641 free_device_extent_tree(&dev_extent_cache);
11642 free_extent_record_cache(&extent_cache);
11643 free_root_item_list(&normal_trees);
11644 free_root_item_list(&dropping_trees);
11645 extent_io_tree_cleanup(&excluded_extents);
11649 static int check_extent_inline_ref(struct extent_buffer *eb,
11650 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11653 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11656 case BTRFS_TREE_BLOCK_REF_KEY:
11657 case BTRFS_EXTENT_DATA_REF_KEY:
11658 case BTRFS_SHARED_BLOCK_REF_KEY:
11659 case BTRFS_SHARED_DATA_REF_KEY:
11663 error("extent[%llu %u %llu] has unknown ref type: %d",
11664 key->objectid, key->type, key->offset, type);
11665 ret = UNKNOWN_TYPE;
11673 * Check backrefs of a tree block given by @bytenr or @eb.
11675 * @root: the root containing the @bytenr or @eb
11676 * @eb: tree block extent buffer, can be NULL
11677 * @bytenr: bytenr of the tree block to search
11678 * @level: tree level of the tree block
11679 * @owner: owner of the tree block
11681 * Return >0 for any error found and output error message
11682 * Return 0 for no error found
11684 static int check_tree_block_ref(struct btrfs_root *root,
11685 struct extent_buffer *eb, u64 bytenr,
11686 int level, u64 owner, struct node_refs *nrefs)
11688 struct btrfs_key key;
11689 struct btrfs_root *extent_root = root->fs_info->extent_root;
11690 struct btrfs_path path;
11691 struct btrfs_extent_item *ei;
11692 struct btrfs_extent_inline_ref *iref;
11693 struct extent_buffer *leaf;
11698 int root_level = btrfs_header_level(root->node);
11700 u32 nodesize = root->fs_info->nodesize;
11703 int tree_reloc_root = 0;
11710 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11711 btrfs_header_bytenr(root->node) == bytenr)
11712 tree_reloc_root = 1;
11713 btrfs_init_path(&path);
11714 key.objectid = bytenr;
11715 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11716 key.type = BTRFS_METADATA_ITEM_KEY;
11718 key.type = BTRFS_EXTENT_ITEM_KEY;
11719 key.offset = (u64)-1;
11721 /* Search for the backref in extent tree */
11722 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11724 err |= BACKREF_MISSING;
11727 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11729 err |= BACKREF_MISSING;
11733 leaf = path.nodes[0];
11734 slot = path.slots[0];
11735 btrfs_item_key_to_cpu(leaf, &key, slot);
11737 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11739 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11740 skinny_level = (int)key.offset;
11741 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11743 struct btrfs_tree_block_info *info;
11745 info = (struct btrfs_tree_block_info *)(ei + 1);
11746 skinny_level = btrfs_tree_block_level(leaf, info);
11747 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11756 * Due to the feature of shared tree blocks, if the upper node
11757 * is a fs root or shared node, the extent of checked node may
11758 * not be updated until the next CoW.
11761 strict = should_check_extent_strictly(root, nrefs,
11763 if (!(btrfs_extent_flags(leaf, ei) &
11764 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11766 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11767 key.objectid, nodesize,
11768 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11769 err = BACKREF_MISMATCH;
11771 header_gen = btrfs_header_generation(eb);
11772 extent_gen = btrfs_extent_generation(leaf, ei);
11773 if (header_gen != extent_gen) {
11775 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11776 key.objectid, nodesize, header_gen,
11778 err = BACKREF_MISMATCH;
11780 if (level != skinny_level) {
11782 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11783 key.objectid, nodesize, level, skinny_level);
11784 err = BACKREF_MISMATCH;
11786 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11788 "extent[%llu %u] is referred by other roots than %llu",
11789 key.objectid, nodesize, root->objectid);
11790 err = BACKREF_MISMATCH;
11795 * Iterate the extent/metadata item to find the exact backref
11797 item_size = btrfs_item_size_nr(leaf, slot);
11798 ptr = (unsigned long)iref;
11799 end = (unsigned long)ei + item_size;
11801 while (ptr < end) {
11802 iref = (struct btrfs_extent_inline_ref *)ptr;
11803 type = btrfs_extent_inline_ref_type(leaf, iref);
11804 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11806 ret = check_extent_inline_ref(leaf, &key, iref);
11811 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11812 if (offset == root->objectid)
11814 if (!strict && owner == offset)
11816 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11818 * Backref of tree reloc root points to itself, no need
11819 * to check backref any more.
11821 if (tree_reloc_root) {
11825 * Check if the backref points to valid
11828 found_ref = !check_tree_block_ref( root, NULL,
11829 offset, level + 1, owner,
11836 ptr += btrfs_extent_inline_ref_size(type);
11840 * Inlined extent item doesn't have what we need, check
11841 * TREE_BLOCK_REF_KEY
11844 btrfs_release_path(&path);
11845 key.objectid = bytenr;
11846 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11847 key.offset = root->objectid;
11849 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11854 err |= BACKREF_MISSING;
11856 btrfs_release_path(&path);
11857 if (nrefs && strict &&
11858 level < root_level && nrefs->full_backref[level + 1])
11859 parent = nrefs->bytenr[level + 1];
11860 if (eb && (err & BACKREF_MISSING))
11862 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11863 bytenr, nodesize, owner, level,
11864 parent ? "parent" : "root",
11865 parent ? parent : root->objectid);
11870 * If @err contains BACKREF_MISSING then add extent of the
11871 * file_extent_data_item.
11873 * Returns error bits after reapir.
11875 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11876 struct btrfs_root *root,
11877 struct btrfs_path *pathp,
11878 struct node_refs *nrefs,
11881 struct btrfs_file_extent_item *fi;
11882 struct btrfs_key fi_key;
11883 struct btrfs_key key;
11884 struct btrfs_extent_item *ei;
11885 struct btrfs_path path;
11886 struct btrfs_root *extent_root = root->fs_info->extent_root;
11887 struct extent_buffer *eb;
11899 eb = pathp->nodes[0];
11900 slot = pathp->slots[0];
11901 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11902 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11904 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11905 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11908 file_offset = fi_key.offset;
11909 generation = btrfs_file_extent_generation(eb, fi);
11910 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11911 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11912 extent_offset = btrfs_file_extent_offset(eb, fi);
11913 offset = file_offset - extent_offset;
11915 /* now repair only adds backref */
11916 if ((err & BACKREF_MISSING) == 0)
11919 /* search extent item */
11920 key.objectid = disk_bytenr;
11921 key.type = BTRFS_EXTENT_ITEM_KEY;
11922 key.offset = num_bytes;
11924 btrfs_init_path(&path);
11925 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11931 /* insert an extent item */
11933 key.objectid = disk_bytenr;
11934 key.type = BTRFS_EXTENT_ITEM_KEY;
11935 key.offset = num_bytes;
11936 size = sizeof(*ei);
11938 btrfs_release_path(&path);
11939 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11943 eb = path.nodes[0];
11944 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11946 btrfs_set_extent_refs(eb, ei, 0);
11947 btrfs_set_extent_generation(eb, ei, generation);
11948 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11950 btrfs_mark_buffer_dirty(eb);
11951 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11953 btrfs_release_path(&path);
11956 if (nrefs->full_backref[0])
11957 parent = btrfs_header_bytenr(eb);
11961 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11963 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11967 "failed to increase extent data backref[%llu %llu] root %llu",
11968 disk_bytenr, num_bytes, root->objectid);
11971 printf("Add one extent data backref [%llu %llu]\n",
11972 disk_bytenr, num_bytes);
11975 err &= ~BACKREF_MISSING;
11978 error("can't repair root %llu extent data item[%llu %llu]",
11979 root->objectid, disk_bytenr, num_bytes);
11984 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11986 * Return >0 any error found and output error message
11987 * Return 0 for no error found
11989 static int check_extent_data_item(struct btrfs_root *root,
11990 struct btrfs_path *pathp,
11991 struct node_refs *nrefs, int account_bytes)
11993 struct btrfs_file_extent_item *fi;
11994 struct extent_buffer *eb = pathp->nodes[0];
11995 struct btrfs_path path;
11996 struct btrfs_root *extent_root = root->fs_info->extent_root;
11997 struct btrfs_key fi_key;
11998 struct btrfs_key dbref_key;
11999 struct extent_buffer *leaf;
12000 struct btrfs_extent_item *ei;
12001 struct btrfs_extent_inline_ref *iref;
12002 struct btrfs_extent_data_ref *dref;
12005 u64 disk_num_bytes;
12006 u64 extent_num_bytes;
12013 int found_dbackref = 0;
12014 int slot = pathp->slots[0];
12019 btrfs_item_key_to_cpu(eb, &fi_key, slot);
12020 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12022 /* Nothing to check for hole and inline data extents */
12023 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12024 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12027 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12028 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12029 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12030 offset = btrfs_file_extent_offset(eb, fi);
12032 /* Check unaligned disk_num_bytes and num_bytes */
12033 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12035 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12036 fi_key.objectid, fi_key.offset, disk_num_bytes,
12037 root->fs_info->sectorsize);
12038 err |= BYTES_UNALIGNED;
12039 } else if (account_bytes) {
12040 data_bytes_allocated += disk_num_bytes;
12042 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12044 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12045 fi_key.objectid, fi_key.offset, extent_num_bytes,
12046 root->fs_info->sectorsize);
12047 err |= BYTES_UNALIGNED;
12048 } else if (account_bytes) {
12049 data_bytes_referenced += extent_num_bytes;
12051 owner = btrfs_header_owner(eb);
12053 /* Check the extent item of the file extent in extent tree */
12054 btrfs_init_path(&path);
12055 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12056 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12057 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12059 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12063 leaf = path.nodes[0];
12064 slot = path.slots[0];
12065 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12067 extent_flags = btrfs_extent_flags(leaf, ei);
12069 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12071 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12072 disk_bytenr, disk_num_bytes,
12073 BTRFS_EXTENT_FLAG_DATA);
12074 err |= BACKREF_MISMATCH;
12077 /* Check data backref inside that extent item */
12078 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12079 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12080 ptr = (unsigned long)iref;
12081 end = (unsigned long)ei + item_size;
12082 strict = should_check_extent_strictly(root, nrefs, -1);
12084 while (ptr < end) {
12088 bool match = false;
12090 iref = (struct btrfs_extent_inline_ref *)ptr;
12091 type = btrfs_extent_inline_ref_type(leaf, iref);
12092 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12094 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12099 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12100 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12101 ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12102 ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12104 if (ref_objectid == fi_key.objectid &&
12105 ref_offset == fi_key.offset - offset)
12107 if (ref_root == root->objectid && match)
12108 found_dbackref = 1;
12109 else if (!strict && owner == ref_root && match)
12110 found_dbackref = 1;
12111 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12112 found_dbackref = !check_tree_block_ref(root, NULL,
12113 btrfs_extent_inline_ref_offset(leaf, iref),
12117 if (found_dbackref)
12119 ptr += btrfs_extent_inline_ref_size(type);
12122 if (!found_dbackref) {
12123 btrfs_release_path(&path);
12125 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12126 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12127 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12128 dbref_key.offset = hash_extent_data_ref(root->objectid,
12129 fi_key.objectid, fi_key.offset);
12131 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12132 &dbref_key, &path, 0, 0);
12134 found_dbackref = 1;
12138 btrfs_release_path(&path);
12141 * Neither inlined nor EXTENT_DATA_REF found, try
12142 * SHARED_DATA_REF as last chance.
12144 dbref_key.objectid = disk_bytenr;
12145 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12146 dbref_key.offset = eb->start;
12148 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12149 &dbref_key, &path, 0, 0);
12151 found_dbackref = 1;
12157 if (!found_dbackref)
12158 err |= BACKREF_MISSING;
12159 btrfs_release_path(&path);
12160 if (err & BACKREF_MISSING) {
12161 error("data extent[%llu %llu] backref lost",
12162 disk_bytenr, disk_num_bytes);
12168 * Get real tree block level for the case like shared block
12169 * Return >= 0 as tree level
12170 * Return <0 for error
12172 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12174 struct extent_buffer *eb;
12175 struct btrfs_path path;
12176 struct btrfs_key key;
12177 struct btrfs_extent_item *ei;
12184 /* Search extent tree for extent generation and level */
12185 key.objectid = bytenr;
12186 key.type = BTRFS_METADATA_ITEM_KEY;
12187 key.offset = (u64)-1;
12189 btrfs_init_path(&path);
12190 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12193 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12201 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12202 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12203 struct btrfs_extent_item);
12204 flags = btrfs_extent_flags(path.nodes[0], ei);
12205 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12210 /* Get transid for later read_tree_block() check */
12211 transid = btrfs_extent_generation(path.nodes[0], ei);
12213 /* Get backref level as one source */
12214 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12215 backref_level = key.offset;
12217 struct btrfs_tree_block_info *info;
12219 info = (struct btrfs_tree_block_info *)(ei + 1);
12220 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12222 btrfs_release_path(&path);
12224 /* Get level from tree block as an alternative source */
12225 eb = read_tree_block(fs_info, bytenr, transid);
12226 if (!extent_buffer_uptodate(eb)) {
12227 free_extent_buffer(eb);
12230 header_level = btrfs_header_level(eb);
12231 free_extent_buffer(eb);
12233 if (header_level != backref_level)
12235 return header_level;
12238 btrfs_release_path(&path);
12243 * Check if a tree block backref is valid (points to a valid tree block)
12244 * if level == -1, level will be resolved
12245 * Return >0 for any error found and print error message
12247 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12248 u64 bytenr, int level)
12250 struct btrfs_root *root;
12251 struct btrfs_key key;
12252 struct btrfs_path path;
12253 struct extent_buffer *eb;
12254 struct extent_buffer *node;
12255 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12259 /* Query level for level == -1 special case */
12261 level = query_tree_block_level(fs_info, bytenr);
12263 err |= REFERENCER_MISSING;
12267 key.objectid = root_id;
12268 key.type = BTRFS_ROOT_ITEM_KEY;
12269 key.offset = (u64)-1;
12271 root = btrfs_read_fs_root(fs_info, &key);
12272 if (IS_ERR(root)) {
12273 err |= REFERENCER_MISSING;
12277 /* Read out the tree block to get item/node key */
12278 eb = read_tree_block(fs_info, bytenr, 0);
12279 if (!extent_buffer_uptodate(eb)) {
12280 err |= REFERENCER_MISSING;
12281 free_extent_buffer(eb);
12285 /* Empty tree, no need to check key */
12286 if (!btrfs_header_nritems(eb) && !level) {
12287 free_extent_buffer(eb);
12292 btrfs_node_key_to_cpu(eb, &key, 0);
12294 btrfs_item_key_to_cpu(eb, &key, 0);
12296 free_extent_buffer(eb);
12298 btrfs_init_path(&path);
12299 path.lowest_level = level;
12300 /* Search with the first key, to ensure we can reach it */
12301 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12303 err |= REFERENCER_MISSING;
12307 node = path.nodes[level];
12308 if (btrfs_header_bytenr(node) != bytenr) {
12310 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12311 bytenr, nodesize, bytenr,
12312 btrfs_header_bytenr(node));
12313 err |= REFERENCER_MISMATCH;
12315 if (btrfs_header_level(node) != level) {
12317 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12318 bytenr, nodesize, level,
12319 btrfs_header_level(node));
12320 err |= REFERENCER_MISMATCH;
12324 btrfs_release_path(&path);
12326 if (err & REFERENCER_MISSING) {
12328 error("extent [%llu %d] lost referencer (owner: %llu)",
12329 bytenr, nodesize, root_id);
12332 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12333 bytenr, nodesize, root_id, level);
12340 * Check if tree block @eb is tree reloc root.
12341 * Return 0 if it's not or any problem happens
12342 * Return 1 if it's a tree reloc root
12344 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12345 struct extent_buffer *eb)
12347 struct btrfs_root *tree_reloc_root;
12348 struct btrfs_key key;
12349 u64 bytenr = btrfs_header_bytenr(eb);
12350 u64 owner = btrfs_header_owner(eb);
12353 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12354 key.offset = owner;
12355 key.type = BTRFS_ROOT_ITEM_KEY;
12357 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12358 if (IS_ERR(tree_reloc_root))
12361 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12363 btrfs_free_fs_root(tree_reloc_root);
12368 * Check referencer for shared block backref
12369 * If level == -1, this function will resolve the level.
12371 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12372 u64 parent, u64 bytenr, int level)
12374 struct extent_buffer *eb;
12376 int found_parent = 0;
12379 eb = read_tree_block(fs_info, parent, 0);
12380 if (!extent_buffer_uptodate(eb))
12384 level = query_tree_block_level(fs_info, bytenr);
12388 /* It's possible it's a tree reloc root */
12389 if (parent == bytenr) {
12390 if (is_tree_reloc_root(fs_info, eb))
12395 if (level + 1 != btrfs_header_level(eb))
12398 nr = btrfs_header_nritems(eb);
12399 for (i = 0; i < nr; i++) {
12400 if (bytenr == btrfs_node_blockptr(eb, i)) {
12406 free_extent_buffer(eb);
12407 if (!found_parent) {
12409 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12410 bytenr, fs_info->nodesize, parent, level);
12411 return REFERENCER_MISSING;
12417 * Check referencer for normal (inlined) data ref
12418 * If len == 0, it will be resolved by searching in extent tree
12420 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12421 u64 root_id, u64 objectid, u64 offset,
12422 u64 bytenr, u64 len, u32 count)
12424 struct btrfs_root *root;
12425 struct btrfs_root *extent_root = fs_info->extent_root;
12426 struct btrfs_key key;
12427 struct btrfs_path path;
12428 struct extent_buffer *leaf;
12429 struct btrfs_file_extent_item *fi;
12430 u32 found_count = 0;
12435 key.objectid = bytenr;
12436 key.type = BTRFS_EXTENT_ITEM_KEY;
12437 key.offset = (u64)-1;
12439 btrfs_init_path(&path);
12440 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12443 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12446 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12447 if (key.objectid != bytenr ||
12448 key.type != BTRFS_EXTENT_ITEM_KEY)
12451 btrfs_release_path(&path);
12453 key.objectid = root_id;
12454 key.type = BTRFS_ROOT_ITEM_KEY;
12455 key.offset = (u64)-1;
12456 btrfs_init_path(&path);
12458 root = btrfs_read_fs_root(fs_info, &key);
12462 key.objectid = objectid;
12463 key.type = BTRFS_EXTENT_DATA_KEY;
12465 * It can be nasty as data backref offset is
12466 * file offset - file extent offset, which is smaller or
12467 * equal to original backref offset. The only special case is
12468 * overflow. So we need to special check and do further search.
12470 key.offset = offset & (1ULL << 63) ? 0 : offset;
12472 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12477 * Search afterwards to get correct one
12478 * NOTE: As we must do a comprehensive check on the data backref to
12479 * make sure the dref count also matches, we must iterate all file
12480 * extents for that inode.
12483 leaf = path.nodes[0];
12484 slot = path.slots[0];
12486 if (slot >= btrfs_header_nritems(leaf) ||
12487 btrfs_header_owner(leaf) != root_id)
12489 btrfs_item_key_to_cpu(leaf, &key, slot);
12490 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12492 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12494 * Except normal disk bytenr and disk num bytes, we still
12495 * need to do extra check on dbackref offset as
12496 * dbackref offset = file_offset - file_extent_offset
12498 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12499 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12500 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12505 ret = btrfs_next_item(root, &path);
12510 btrfs_release_path(&path);
12511 if (found_count != count) {
12513 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12514 bytenr, len, root_id, objectid, offset, count, found_count);
12515 return REFERENCER_MISSING;
12521 * Check if the referencer of a shared data backref exists
12523 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12524 u64 parent, u64 bytenr)
12526 struct extent_buffer *eb;
12527 struct btrfs_key key;
12528 struct btrfs_file_extent_item *fi;
12530 int found_parent = 0;
12533 eb = read_tree_block(fs_info, parent, 0);
12534 if (!extent_buffer_uptodate(eb))
12537 nr = btrfs_header_nritems(eb);
12538 for (i = 0; i < nr; i++) {
12539 btrfs_item_key_to_cpu(eb, &key, i);
12540 if (key.type != BTRFS_EXTENT_DATA_KEY)
12543 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12544 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12547 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12554 free_extent_buffer(eb);
12555 if (!found_parent) {
12556 error("shared extent %llu referencer lost (parent: %llu)",
12558 return REFERENCER_MISSING;
12564 * Only delete backref if REFERENCER_MISSING now
12566 * Returns <0 the extent was deleted
12567 * Returns >0 the backref was deleted but extent still exists, returned value
12568 * means error after repair
12569 * Returns 0 nothing happened
12571 static int repair_extent_item(struct btrfs_trans_handle *trans,
12572 struct btrfs_root *root, struct btrfs_path *path,
12573 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12574 u64 owner, u64 offset, int err)
12576 struct btrfs_key old_key;
12580 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12582 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12583 /* delete the backref */
12584 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12585 num_bytes, parent, root_objectid, owner, offset);
12588 err &= ~REFERENCER_MISSING;
12589 printf("Delete backref in extent [%llu %llu]\n",
12590 bytenr, num_bytes);
12592 error("fail to delete backref in extent [%llu %llu]",
12593 bytenr, num_bytes);
12597 /* btrfs_free_extent may delete the extent */
12598 btrfs_release_path(path);
12599 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12609 * This function will check a given extent item, including its backref and
12610 * itself (like crossing stripe boundary and type)
12612 * Since we don't use extent_record anymore, introduce new error bit
12614 static int check_extent_item(struct btrfs_trans_handle *trans,
12615 struct btrfs_fs_info *fs_info,
12616 struct btrfs_path *path)
12618 struct btrfs_extent_item *ei;
12619 struct btrfs_extent_inline_ref *iref;
12620 struct btrfs_extent_data_ref *dref;
12621 struct extent_buffer *eb = path->nodes[0];
12624 int slot = path->slots[0];
12626 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12627 u32 item_size = btrfs_item_size_nr(eb, slot);
12637 struct btrfs_key key;
12641 btrfs_item_key_to_cpu(eb, &key, slot);
12642 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12643 bytes_used += key.offset;
12644 num_bytes = key.offset;
12646 bytes_used += nodesize;
12647 num_bytes = nodesize;
12650 if (item_size < sizeof(*ei)) {
12652 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12653 * old thing when on disk format is still un-determined.
12654 * No need to care about it anymore
12656 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12660 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12661 flags = btrfs_extent_flags(eb, ei);
12663 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12665 if (metadata && check_crossing_stripes(global_info, key.objectid,
12667 error("bad metadata [%llu, %llu) crossing stripe boundary",
12668 key.objectid, key.objectid + nodesize);
12669 err |= CROSSING_STRIPE_BOUNDARY;
12672 ptr = (unsigned long)(ei + 1);
12674 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12675 /* Old EXTENT_ITEM metadata */
12676 struct btrfs_tree_block_info *info;
12678 info = (struct btrfs_tree_block_info *)ptr;
12679 level = btrfs_tree_block_level(eb, info);
12680 ptr += sizeof(struct btrfs_tree_block_info);
12682 /* New METADATA_ITEM */
12683 level = key.offset;
12685 end = (unsigned long)ei + item_size;
12688 /* Reached extent item end normally */
12692 /* Beyond extent item end, wrong item size */
12694 err |= ITEM_SIZE_MISMATCH;
12695 error("extent item at bytenr %llu slot %d has wrong size",
12704 /* Now check every backref in this extent item */
12705 iref = (struct btrfs_extent_inline_ref *)ptr;
12706 type = btrfs_extent_inline_ref_type(eb, iref);
12707 offset = btrfs_extent_inline_ref_offset(eb, iref);
12709 case BTRFS_TREE_BLOCK_REF_KEY:
12710 root_objectid = offset;
12712 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12716 case BTRFS_SHARED_BLOCK_REF_KEY:
12718 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12722 case BTRFS_EXTENT_DATA_REF_KEY:
12723 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12724 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12725 owner = btrfs_extent_data_ref_objectid(eb, dref);
12726 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12727 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12728 owner_offset, key.objectid, key.offset,
12729 btrfs_extent_data_ref_count(eb, dref));
12732 case BTRFS_SHARED_DATA_REF_KEY:
12734 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12738 error("extent[%llu %d %llu] has unknown ref type: %d",
12739 key.objectid, key.type, key.offset, type);
12740 ret = UNKNOWN_TYPE;
12745 if (err && repair) {
12746 ret = repair_extent_item(trans, fs_info->extent_root, path,
12747 key.objectid, num_bytes, parent, root_objectid,
12748 owner, owner_offset, ret);
12757 ptr += btrfs_extent_inline_ref_size(type);
12765 * Check if a dev extent item is referred correctly by its chunk
12767 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12768 struct extent_buffer *eb, int slot)
12770 struct btrfs_root *chunk_root = fs_info->chunk_root;
12771 struct btrfs_dev_extent *ptr;
12772 struct btrfs_path path;
12773 struct btrfs_key chunk_key;
12774 struct btrfs_key devext_key;
12775 struct btrfs_chunk *chunk;
12776 struct extent_buffer *l;
12780 int found_chunk = 0;
12783 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12784 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12785 length = btrfs_dev_extent_length(eb, ptr);
12787 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12788 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12789 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12791 btrfs_init_path(&path);
12792 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12797 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12798 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12803 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12806 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12807 for (i = 0; i < num_stripes; i++) {
12808 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12809 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12811 if (devid == devext_key.objectid &&
12812 offset == devext_key.offset) {
12818 btrfs_release_path(&path);
12819 if (!found_chunk) {
12821 "device extent[%llu, %llu, %llu] did not find the related chunk",
12822 devext_key.objectid, devext_key.offset, length);
12823 return REFERENCER_MISSING;
12829 * Check if the used space is correct with the dev item
12831 static int check_dev_item(struct btrfs_fs_info *fs_info,
12832 struct extent_buffer *eb, int slot)
12834 struct btrfs_root *dev_root = fs_info->dev_root;
12835 struct btrfs_dev_item *dev_item;
12836 struct btrfs_path path;
12837 struct btrfs_key key;
12838 struct btrfs_dev_extent *ptr;
12845 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12846 dev_id = btrfs_device_id(eb, dev_item);
12847 used = btrfs_device_bytes_used(eb, dev_item);
12848 total_bytes = btrfs_device_total_bytes(eb, dev_item);
12850 key.objectid = dev_id;
12851 key.type = BTRFS_DEV_EXTENT_KEY;
12854 btrfs_init_path(&path);
12855 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12857 btrfs_item_key_to_cpu(eb, &key, slot);
12858 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12859 key.objectid, key.type, key.offset);
12860 btrfs_release_path(&path);
12861 return REFERENCER_MISSING;
12864 /* Iterate dev_extents to calculate the used space of a device */
12866 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12869 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12870 if (key.objectid > dev_id)
12872 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12875 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12876 struct btrfs_dev_extent);
12877 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12879 ret = btrfs_next_item(dev_root, &path);
12883 btrfs_release_path(&path);
12885 if (used != total) {
12886 btrfs_item_key_to_cpu(eb, &key, slot);
12888 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12889 total, used, BTRFS_ROOT_TREE_OBJECTID,
12890 BTRFS_DEV_EXTENT_KEY, dev_id);
12891 return ACCOUNTING_MISMATCH;
12893 check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12899 * Check a block group item with its referener (chunk) and its used space
12900 * with extent/metadata item
12902 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12903 struct extent_buffer *eb, int slot)
12905 struct btrfs_root *extent_root = fs_info->extent_root;
12906 struct btrfs_root *chunk_root = fs_info->chunk_root;
12907 struct btrfs_block_group_item *bi;
12908 struct btrfs_block_group_item bg_item;
12909 struct btrfs_path path;
12910 struct btrfs_key bg_key;
12911 struct btrfs_key chunk_key;
12912 struct btrfs_key extent_key;
12913 struct btrfs_chunk *chunk;
12914 struct extent_buffer *leaf;
12915 struct btrfs_extent_item *ei;
12916 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12924 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12925 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12926 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12927 used = btrfs_block_group_used(&bg_item);
12928 bg_flags = btrfs_block_group_flags(&bg_item);
12930 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12931 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12932 chunk_key.offset = bg_key.objectid;
12934 btrfs_init_path(&path);
12935 /* Search for the referencer chunk */
12936 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12939 "block group[%llu %llu] did not find the related chunk item",
12940 bg_key.objectid, bg_key.offset);
12941 err |= REFERENCER_MISSING;
12943 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12944 struct btrfs_chunk);
12945 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12948 "block group[%llu %llu] related chunk item length does not match",
12949 bg_key.objectid, bg_key.offset);
12950 err |= REFERENCER_MISMATCH;
12953 btrfs_release_path(&path);
12955 /* Search from the block group bytenr */
12956 extent_key.objectid = bg_key.objectid;
12957 extent_key.type = 0;
12958 extent_key.offset = 0;
12960 btrfs_init_path(&path);
12961 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12965 /* Iterate extent tree to account used space */
12967 leaf = path.nodes[0];
12969 /* Search slot can point to the last item beyond leaf nritems */
12970 if (path.slots[0] >= btrfs_header_nritems(leaf))
12973 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12974 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12977 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12978 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12980 if (extent_key.objectid < bg_key.objectid)
12983 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12986 total += extent_key.offset;
12988 ei = btrfs_item_ptr(leaf, path.slots[0],
12989 struct btrfs_extent_item);
12990 flags = btrfs_extent_flags(leaf, ei);
12991 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12992 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12994 "bad extent[%llu, %llu) type mismatch with chunk",
12995 extent_key.objectid,
12996 extent_key.objectid + extent_key.offset);
12997 err |= CHUNK_TYPE_MISMATCH;
12999 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13000 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13001 BTRFS_BLOCK_GROUP_METADATA))) {
13003 "bad extent[%llu, %llu) type mismatch with chunk",
13004 extent_key.objectid,
13005 extent_key.objectid + nodesize);
13006 err |= CHUNK_TYPE_MISMATCH;
13010 ret = btrfs_next_item(extent_root, &path);
13016 btrfs_release_path(&path);
13018 if (total != used) {
13020 "block group[%llu %llu] used %llu but extent items used %llu",
13021 bg_key.objectid, bg_key.offset, used, total);
13022 err |= BG_ACCOUNTING_ERROR;
13028 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13029 * FIXME: We still need to repair error of dev_item.
13031 * Returns error after repair.
13033 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13034 struct btrfs_root *chunk_root,
13035 struct btrfs_path *path, int err)
13037 struct btrfs_chunk *chunk;
13038 struct btrfs_key chunk_key;
13039 struct extent_buffer *eb = path->nodes[0];
13041 int slot = path->slots[0];
13045 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13046 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13048 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13049 type = btrfs_chunk_type(path->nodes[0], chunk);
13050 length = btrfs_chunk_length(eb, chunk);
13052 if (err & REFERENCER_MISSING) {
13053 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13054 type, chunk_key.objectid, chunk_key.offset, length);
13056 error("fail to add block group item[%llu %llu]",
13057 chunk_key.offset, length);
13060 err &= ~REFERENCER_MISSING;
13061 printf("Added block group item[%llu %llu]\n",
13062 chunk_key.offset, length);
13071 * Check a chunk item.
13072 * Including checking all referred dev_extents and block group
13074 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13075 struct extent_buffer *eb, int slot)
13077 struct btrfs_root *extent_root = fs_info->extent_root;
13078 struct btrfs_root *dev_root = fs_info->dev_root;
13079 struct btrfs_path path;
13080 struct btrfs_key chunk_key;
13081 struct btrfs_key bg_key;
13082 struct btrfs_key devext_key;
13083 struct btrfs_chunk *chunk;
13084 struct extent_buffer *leaf;
13085 struct btrfs_block_group_item *bi;
13086 struct btrfs_block_group_item bg_item;
13087 struct btrfs_dev_extent *ptr;
13099 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13100 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13101 length = btrfs_chunk_length(eb, chunk);
13102 chunk_end = chunk_key.offset + length;
13103 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13106 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13108 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13111 type = btrfs_chunk_type(eb, chunk);
13113 bg_key.objectid = chunk_key.offset;
13114 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13115 bg_key.offset = length;
13117 btrfs_init_path(&path);
13118 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13121 "chunk[%llu %llu) did not find the related block group item",
13122 chunk_key.offset, chunk_end);
13123 err |= REFERENCER_MISSING;
13125 leaf = path.nodes[0];
13126 bi = btrfs_item_ptr(leaf, path.slots[0],
13127 struct btrfs_block_group_item);
13128 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13130 if (btrfs_block_group_flags(&bg_item) != type) {
13132 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13133 chunk_key.offset, chunk_end, type,
13134 btrfs_block_group_flags(&bg_item));
13135 err |= REFERENCER_MISSING;
13139 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13140 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13141 for (i = 0; i < num_stripes; i++) {
13142 btrfs_release_path(&path);
13143 btrfs_init_path(&path);
13144 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13145 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13146 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13148 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13151 goto not_match_dev;
13153 leaf = path.nodes[0];
13154 ptr = btrfs_item_ptr(leaf, path.slots[0],
13155 struct btrfs_dev_extent);
13156 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13157 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13158 if (objectid != chunk_key.objectid ||
13159 offset != chunk_key.offset ||
13160 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13161 goto not_match_dev;
13164 err |= BACKREF_MISSING;
13166 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13167 chunk_key.objectid, chunk_end, i);
13170 btrfs_release_path(&path);
13175 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13176 struct btrfs_root *root,
13177 struct btrfs_path *path)
13179 struct btrfs_key key;
13182 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13183 btrfs_release_path(path);
13184 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13190 ret = btrfs_del_item(trans, root, path);
13194 if (path->slots[0] == 0)
13195 btrfs_prev_leaf(root, path);
13200 error("failed to delete root %llu item[%llu, %u, %llu]",
13201 root->objectid, key.objectid, key.type, key.offset);
13203 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13204 root->objectid, key.objectid, key.type, key.offset);
13209 * Main entry function to check known items and update related accounting info
13211 static int check_leaf_items(struct btrfs_trans_handle *trans,
13212 struct btrfs_root *root, struct btrfs_path *path,
13213 struct node_refs *nrefs, int account_bytes)
13215 struct btrfs_fs_info *fs_info = root->fs_info;
13216 struct btrfs_key key;
13217 struct extent_buffer *eb;
13220 struct btrfs_extent_data_ref *dref;
13225 eb = path->nodes[0];
13226 slot = path->slots[0];
13227 if (slot >= btrfs_header_nritems(eb)) {
13229 error("empty leaf [%llu %u] root %llu", eb->start,
13230 root->fs_info->nodesize, root->objectid);
13236 btrfs_item_key_to_cpu(eb, &key, slot);
13240 case BTRFS_EXTENT_DATA_KEY:
13241 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13243 ret = repair_extent_data_item(trans, root, path, nrefs,
13247 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13248 ret = check_block_group_item(fs_info, eb, slot);
13250 ret & REFERENCER_MISSING)
13251 ret = delete_extent_tree_item(trans, root, path);
13254 case BTRFS_DEV_ITEM_KEY:
13255 ret = check_dev_item(fs_info, eb, slot);
13258 case BTRFS_CHUNK_ITEM_KEY:
13259 ret = check_chunk_item(fs_info, eb, slot);
13261 ret = repair_chunk_item(trans, root, path, ret);
13264 case BTRFS_DEV_EXTENT_KEY:
13265 ret = check_dev_extent_item(fs_info, eb, slot);
13268 case BTRFS_EXTENT_ITEM_KEY:
13269 case BTRFS_METADATA_ITEM_KEY:
13270 ret = check_extent_item(trans, fs_info, path);
13273 case BTRFS_EXTENT_CSUM_KEY:
13274 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13277 case BTRFS_TREE_BLOCK_REF_KEY:
13278 ret = check_tree_block_backref(fs_info, key.offset,
13281 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13282 ret = delete_extent_tree_item(trans, root, path);
13285 case BTRFS_EXTENT_DATA_REF_KEY:
13286 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13287 ret = check_extent_data_backref(fs_info,
13288 btrfs_extent_data_ref_root(eb, dref),
13289 btrfs_extent_data_ref_objectid(eb, dref),
13290 btrfs_extent_data_ref_offset(eb, dref),
13292 btrfs_extent_data_ref_count(eb, dref));
13294 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13295 ret = delete_extent_tree_item(trans, root, path);
13298 case BTRFS_SHARED_BLOCK_REF_KEY:
13299 ret = check_shared_block_backref(fs_info, key.offset,
13302 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13303 ret = delete_extent_tree_item(trans, root, path);
13306 case BTRFS_SHARED_DATA_REF_KEY:
13307 ret = check_shared_data_backref(fs_info, key.offset,
13310 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13311 ret = delete_extent_tree_item(trans, root, path);
13325 * Low memory usage version check_chunks_and_extents.
13327 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13329 struct btrfs_trans_handle *trans = NULL;
13330 struct btrfs_path path;
13331 struct btrfs_key old_key;
13332 struct btrfs_key key;
13333 struct btrfs_root *root1;
13334 struct btrfs_root *root;
13335 struct btrfs_root *cur_root;
13339 root = fs_info->fs_root;
13342 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13343 if (IS_ERR(trans)) {
13344 error("failed to start transaction before check");
13345 return PTR_ERR(trans);
13349 root1 = root->fs_info->chunk_root;
13350 ret = check_btrfs_root(trans, root1, 0, 1);
13353 root1 = root->fs_info->tree_root;
13354 ret = check_btrfs_root(trans, root1, 0, 1);
13357 btrfs_init_path(&path);
13358 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13360 key.type = BTRFS_ROOT_ITEM_KEY;
13362 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13364 error("cannot find extent tree in tree_root");
13369 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13370 if (key.type != BTRFS_ROOT_ITEM_KEY)
13373 key.offset = (u64)-1;
13375 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13376 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13379 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13380 if (IS_ERR(cur_root) || !cur_root) {
13381 error("failed to read tree: %lld", key.objectid);
13385 ret = check_btrfs_root(trans, cur_root, 0, 1);
13388 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13389 btrfs_free_fs_root(cur_root);
13391 btrfs_release_path(&path);
13392 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13393 &old_key, &path, 0, 0);
13397 ret = btrfs_next_item(root1, &path);
13403 /* if repair, update block accounting */
13405 ret = btrfs_fix_block_accounting(trans, root);
13409 err &= ~BG_ACCOUNTING_ERROR;
13413 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13415 btrfs_release_path(&path);
13420 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13424 if (!ctx.progress_enabled)
13425 fprintf(stderr, "checking extents\n");
13426 if (check_mode == CHECK_MODE_LOWMEM)
13427 ret = check_chunks_and_extents_v2(fs_info);
13429 ret = check_chunks_and_extents(fs_info);
13431 /* Also repair device size related problems */
13432 if (repair && !ret) {
13433 ret = btrfs_fix_device_and_super_size(fs_info);
13440 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13441 struct btrfs_root *root, int overwrite)
13443 struct extent_buffer *c;
13444 struct extent_buffer *old = root->node;
13447 struct btrfs_disk_key disk_key = {0,0,0};
13453 extent_buffer_get(c);
13456 c = btrfs_alloc_free_block(trans, root,
13457 root->fs_info->nodesize,
13458 root->root_key.objectid,
13459 &disk_key, level, 0, 0);
13462 extent_buffer_get(c);
13466 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13467 btrfs_set_header_level(c, level);
13468 btrfs_set_header_bytenr(c, c->start);
13469 btrfs_set_header_generation(c, trans->transid);
13470 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13471 btrfs_set_header_owner(c, root->root_key.objectid);
13473 write_extent_buffer(c, root->fs_info->fsid,
13474 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13476 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13477 btrfs_header_chunk_tree_uuid(c),
13480 btrfs_mark_buffer_dirty(c);
13482 * this case can happen in the following case:
13484 * 1.overwrite previous root.
13486 * 2.reinit reloc data root, this is because we skip pin
13487 * down reloc data tree before which means we can allocate
13488 * same block bytenr here.
13490 if (old->start == c->start) {
13491 btrfs_set_root_generation(&root->root_item,
13493 root->root_item.level = btrfs_header_level(root->node);
13494 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13495 &root->root_key, &root->root_item);
13497 free_extent_buffer(c);
13501 free_extent_buffer(old);
13503 add_root_to_dirty_list(root);
13507 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13508 struct extent_buffer *eb, int tree_root)
13510 struct extent_buffer *tmp;
13511 struct btrfs_root_item *ri;
13512 struct btrfs_key key;
13514 int level = btrfs_header_level(eb);
13520 * If we have pinned this block before, don't pin it again.
13521 * This can not only avoid forever loop with broken filesystem
13522 * but also give us some speedups.
13524 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13525 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13528 btrfs_pin_extent(fs_info, eb->start, eb->len);
13530 nritems = btrfs_header_nritems(eb);
13531 for (i = 0; i < nritems; i++) {
13533 btrfs_item_key_to_cpu(eb, &key, i);
13534 if (key.type != BTRFS_ROOT_ITEM_KEY)
13536 /* Skip the extent root and reloc roots */
13537 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13538 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13539 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13541 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13542 bytenr = btrfs_disk_root_bytenr(eb, ri);
13545 * If at any point we start needing the real root we
13546 * will have to build a stump root for the root we are
13547 * in, but for now this doesn't actually use the root so
13548 * just pass in extent_root.
13550 tmp = read_tree_block(fs_info, bytenr, 0);
13551 if (!extent_buffer_uptodate(tmp)) {
13552 fprintf(stderr, "Error reading root block\n");
13555 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13556 free_extent_buffer(tmp);
13560 bytenr = btrfs_node_blockptr(eb, i);
13562 /* If we aren't the tree root don't read the block */
13563 if (level == 1 && !tree_root) {
13564 btrfs_pin_extent(fs_info, bytenr,
13565 fs_info->nodesize);
13569 tmp = read_tree_block(fs_info, bytenr, 0);
13570 if (!extent_buffer_uptodate(tmp)) {
13571 fprintf(stderr, "Error reading tree block\n");
13574 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13575 free_extent_buffer(tmp);
13584 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13588 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13592 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13595 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13597 struct btrfs_block_group_cache *cache;
13598 struct btrfs_path path;
13599 struct extent_buffer *leaf;
13600 struct btrfs_chunk *chunk;
13601 struct btrfs_key key;
13605 btrfs_init_path(&path);
13607 key.type = BTRFS_CHUNK_ITEM_KEY;
13609 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13611 btrfs_release_path(&path);
13616 * We do this in case the block groups were screwed up and had alloc
13617 * bits that aren't actually set on the chunks. This happens with
13618 * restored images every time and could happen in real life I guess.
13620 fs_info->avail_data_alloc_bits = 0;
13621 fs_info->avail_metadata_alloc_bits = 0;
13622 fs_info->avail_system_alloc_bits = 0;
13624 /* First we need to create the in-memory block groups */
13626 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13627 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13629 btrfs_release_path(&path);
13637 leaf = path.nodes[0];
13638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13639 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13644 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13645 btrfs_add_block_group(fs_info, 0,
13646 btrfs_chunk_type(leaf, chunk),
13647 key.objectid, key.offset,
13648 btrfs_chunk_length(leaf, chunk));
13649 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13650 key.offset + btrfs_chunk_length(leaf, chunk));
13655 cache = btrfs_lookup_first_block_group(fs_info, start);
13659 start = cache->key.objectid + cache->key.offset;
13662 btrfs_release_path(&path);
13666 static int reset_balance(struct btrfs_trans_handle *trans,
13667 struct btrfs_fs_info *fs_info)
13669 struct btrfs_root *root = fs_info->tree_root;
13670 struct btrfs_path path;
13671 struct extent_buffer *leaf;
13672 struct btrfs_key key;
13673 int del_slot, del_nr = 0;
13677 btrfs_init_path(&path);
13678 key.objectid = BTRFS_BALANCE_OBJECTID;
13679 key.type = BTRFS_BALANCE_ITEM_KEY;
13681 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13686 goto reinit_data_reloc;
13691 ret = btrfs_del_item(trans, root, &path);
13694 btrfs_release_path(&path);
13696 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13697 key.type = BTRFS_ROOT_ITEM_KEY;
13699 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13703 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13708 ret = btrfs_del_items(trans, root, &path,
13715 btrfs_release_path(&path);
13718 ret = btrfs_search_slot(trans, root, &key, &path,
13725 leaf = path.nodes[0];
13726 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13727 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13729 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13734 del_slot = path.slots[0];
13743 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13747 btrfs_release_path(&path);
13750 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13751 key.type = BTRFS_ROOT_ITEM_KEY;
13752 key.offset = (u64)-1;
13753 root = btrfs_read_fs_root(fs_info, &key);
13754 if (IS_ERR(root)) {
13755 fprintf(stderr, "Error reading data reloc tree\n");
13756 ret = PTR_ERR(root);
13759 record_root_in_trans(trans, root);
13760 ret = btrfs_fsck_reinit_root(trans, root, 0);
13763 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13765 btrfs_release_path(&path);
13769 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13770 struct btrfs_fs_info *fs_info)
13776 * The only reason we don't do this is because right now we're just
13777 * walking the trees we find and pinning down their bytes, we don't look
13778 * at any of the leaves. In order to do mixed groups we'd have to check
13779 * the leaves of any fs roots and pin down the bytes for any file
13780 * extents we find. Not hard but why do it if we don't have to?
13782 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13783 fprintf(stderr, "We don't support re-initing the extent tree "
13784 "for mixed block groups yet, please notify a btrfs "
13785 "developer you want to do this so they can add this "
13786 "functionality.\n");
13791 * first we need to walk all of the trees except the extent tree and pin
13792 * down the bytes that are in use so we don't overwrite any existing
13795 ret = pin_metadata_blocks(fs_info);
13797 fprintf(stderr, "error pinning down used bytes\n");
13802 * Need to drop all the block groups since we're going to recreate all
13805 btrfs_free_block_groups(fs_info);
13806 ret = reset_block_groups(fs_info);
13808 fprintf(stderr, "error resetting the block groups\n");
13812 /* Ok we can allocate now, reinit the extent root */
13813 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13815 fprintf(stderr, "extent root initialization failed\n");
13817 * When the transaction code is updated we should end the
13818 * transaction, but for now progs only knows about commit so
13819 * just return an error.
13825 * Now we have all the in-memory block groups setup so we can make
13826 * allocations properly, and the metadata we care about is safe since we
13827 * pinned all of it above.
13830 struct btrfs_block_group_cache *cache;
13832 cache = btrfs_lookup_first_block_group(fs_info, start);
13835 start = cache->key.objectid + cache->key.offset;
13836 ret = btrfs_insert_item(trans, fs_info->extent_root,
13837 &cache->key, &cache->item,
13838 sizeof(cache->item));
13840 fprintf(stderr, "Error adding block group\n");
13843 btrfs_extent_post_op(trans, fs_info->extent_root);
13846 ret = reset_balance(trans, fs_info);
13848 fprintf(stderr, "error resetting the pending balance\n");
13853 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13855 struct btrfs_path path;
13856 struct btrfs_trans_handle *trans;
13857 struct btrfs_key key;
13860 printf("Recowing metadata block %llu\n", eb->start);
13861 key.objectid = btrfs_header_owner(eb);
13862 key.type = BTRFS_ROOT_ITEM_KEY;
13863 key.offset = (u64)-1;
13865 root = btrfs_read_fs_root(root->fs_info, &key);
13866 if (IS_ERR(root)) {
13867 fprintf(stderr, "Couldn't find owner root %llu\n",
13869 return PTR_ERR(root);
13872 trans = btrfs_start_transaction(root, 1);
13874 return PTR_ERR(trans);
13876 btrfs_init_path(&path);
13877 path.lowest_level = btrfs_header_level(eb);
13878 if (path.lowest_level)
13879 btrfs_node_key_to_cpu(eb, &key, 0);
13881 btrfs_item_key_to_cpu(eb, &key, 0);
13883 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13884 btrfs_commit_transaction(trans, root);
13885 btrfs_release_path(&path);
13889 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13891 struct btrfs_path path;
13892 struct btrfs_trans_handle *trans;
13893 struct btrfs_key key;
13896 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13897 bad->key.type, bad->key.offset);
13898 key.objectid = bad->root_id;
13899 key.type = BTRFS_ROOT_ITEM_KEY;
13900 key.offset = (u64)-1;
13902 root = btrfs_read_fs_root(root->fs_info, &key);
13903 if (IS_ERR(root)) {
13904 fprintf(stderr, "Couldn't find owner root %llu\n",
13906 return PTR_ERR(root);
13909 trans = btrfs_start_transaction(root, 1);
13911 return PTR_ERR(trans);
13913 btrfs_init_path(&path);
13914 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13920 ret = btrfs_del_item(trans, root, &path);
13922 btrfs_commit_transaction(trans, root);
13923 btrfs_release_path(&path);
13927 static int zero_log_tree(struct btrfs_root *root)
13929 struct btrfs_trans_handle *trans;
13932 trans = btrfs_start_transaction(root, 1);
13933 if (IS_ERR(trans)) {
13934 ret = PTR_ERR(trans);
13937 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13938 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13939 ret = btrfs_commit_transaction(trans, root);
13943 static int populate_csum(struct btrfs_trans_handle *trans,
13944 struct btrfs_root *csum_root, char *buf, u64 start,
13947 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13952 while (offset < len) {
13953 sectorsize = fs_info->sectorsize;
13954 ret = read_extent_data(fs_info, buf, start + offset,
13958 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13959 start + offset, buf, sectorsize);
13962 offset += sectorsize;
13967 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13968 struct btrfs_root *csum_root,
13969 struct btrfs_root *cur_root)
13971 struct btrfs_path path;
13972 struct btrfs_key key;
13973 struct extent_buffer *node;
13974 struct btrfs_file_extent_item *fi;
13981 buf = malloc(cur_root->fs_info->sectorsize);
13985 btrfs_init_path(&path);
13989 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13992 /* Iterate all regular file extents and fill its csum */
13994 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13996 if (key.type != BTRFS_EXTENT_DATA_KEY)
13998 node = path.nodes[0];
13999 slot = path.slots[0];
14000 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14001 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14003 start = btrfs_file_extent_disk_bytenr(node, fi);
14004 len = btrfs_file_extent_disk_num_bytes(node, fi);
14006 ret = populate_csum(trans, csum_root, buf, start, len);
14007 if (ret == -EEXIST)
14013 * TODO: if next leaf is corrupted, jump to nearest next valid
14016 ret = btrfs_next_item(cur_root, &path);
14026 btrfs_release_path(&path);
14031 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14032 struct btrfs_root *csum_root)
14034 struct btrfs_fs_info *fs_info = csum_root->fs_info;
14035 struct btrfs_path path;
14036 struct btrfs_root *tree_root = fs_info->tree_root;
14037 struct btrfs_root *cur_root;
14038 struct extent_buffer *node;
14039 struct btrfs_key key;
14043 btrfs_init_path(&path);
14044 key.objectid = BTRFS_FS_TREE_OBJECTID;
14046 key.type = BTRFS_ROOT_ITEM_KEY;
14047 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14056 node = path.nodes[0];
14057 slot = path.slots[0];
14058 btrfs_item_key_to_cpu(node, &key, slot);
14059 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14061 if (key.type != BTRFS_ROOT_ITEM_KEY)
14063 if (!is_fstree(key.objectid))
14065 key.offset = (u64)-1;
14067 cur_root = btrfs_read_fs_root(fs_info, &key);
14068 if (IS_ERR(cur_root) || !cur_root) {
14069 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14073 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14078 ret = btrfs_next_item(tree_root, &path);
14088 btrfs_release_path(&path);
14092 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14093 struct btrfs_root *csum_root)
14095 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14096 struct btrfs_path path;
14097 struct btrfs_extent_item *ei;
14098 struct extent_buffer *leaf;
14100 struct btrfs_key key;
14103 btrfs_init_path(&path);
14105 key.type = BTRFS_EXTENT_ITEM_KEY;
14107 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14109 btrfs_release_path(&path);
14113 buf = malloc(csum_root->fs_info->sectorsize);
14115 btrfs_release_path(&path);
14120 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14121 ret = btrfs_next_leaf(extent_root, &path);
14129 leaf = path.nodes[0];
14131 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14132 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14137 ei = btrfs_item_ptr(leaf, path.slots[0],
14138 struct btrfs_extent_item);
14139 if (!(btrfs_extent_flags(leaf, ei) &
14140 BTRFS_EXTENT_FLAG_DATA)) {
14145 ret = populate_csum(trans, csum_root, buf, key.objectid,
14152 btrfs_release_path(&path);
14158 * Recalculate the csum and put it into the csum tree.
14160 * Extent tree init will wipe out all the extent info, so in that case, we
14161 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14162 * will use fs/subvol trees to init the csum tree.
14164 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14165 struct btrfs_root *csum_root,
14166 int search_fs_tree)
14168 if (search_fs_tree)
14169 return fill_csum_tree_from_fs(trans, csum_root);
14171 return fill_csum_tree_from_extent(trans, csum_root);
14174 static void free_roots_info_cache(void)
14176 if (!roots_info_cache)
14179 while (!cache_tree_empty(roots_info_cache)) {
14180 struct cache_extent *entry;
14181 struct root_item_info *rii;
14183 entry = first_cache_extent(roots_info_cache);
14186 remove_cache_extent(roots_info_cache, entry);
14187 rii = container_of(entry, struct root_item_info, cache_extent);
14191 free(roots_info_cache);
14192 roots_info_cache = NULL;
14195 static int build_roots_info_cache(struct btrfs_fs_info *info)
14198 struct btrfs_key key;
14199 struct extent_buffer *leaf;
14200 struct btrfs_path path;
14202 if (!roots_info_cache) {
14203 roots_info_cache = malloc(sizeof(*roots_info_cache));
14204 if (!roots_info_cache)
14206 cache_tree_init(roots_info_cache);
14209 btrfs_init_path(&path);
14211 key.type = BTRFS_EXTENT_ITEM_KEY;
14213 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14216 leaf = path.nodes[0];
14219 struct btrfs_key found_key;
14220 struct btrfs_extent_item *ei;
14221 struct btrfs_extent_inline_ref *iref;
14222 int slot = path.slots[0];
14227 struct cache_extent *entry;
14228 struct root_item_info *rii;
14230 if (slot >= btrfs_header_nritems(leaf)) {
14231 ret = btrfs_next_leaf(info->extent_root, &path);
14238 leaf = path.nodes[0];
14239 slot = path.slots[0];
14242 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14244 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14245 found_key.type != BTRFS_METADATA_ITEM_KEY)
14248 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14249 flags = btrfs_extent_flags(leaf, ei);
14251 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14252 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14255 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14256 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14257 level = found_key.offset;
14259 struct btrfs_tree_block_info *binfo;
14261 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14262 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14263 level = btrfs_tree_block_level(leaf, binfo);
14267 * For a root extent, it must be of the following type and the
14268 * first (and only one) iref in the item.
14270 type = btrfs_extent_inline_ref_type(leaf, iref);
14271 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14274 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14275 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14277 rii = malloc(sizeof(struct root_item_info));
14282 rii->cache_extent.start = root_id;
14283 rii->cache_extent.size = 1;
14284 rii->level = (u8)-1;
14285 entry = &rii->cache_extent;
14286 ret = insert_cache_extent(roots_info_cache, entry);
14289 rii = container_of(entry, struct root_item_info,
14293 ASSERT(rii->cache_extent.start == root_id);
14294 ASSERT(rii->cache_extent.size == 1);
14296 if (level > rii->level || rii->level == (u8)-1) {
14297 rii->level = level;
14298 rii->bytenr = found_key.objectid;
14299 rii->gen = btrfs_extent_generation(leaf, ei);
14300 rii->node_count = 1;
14301 } else if (level == rii->level) {
14309 btrfs_release_path(&path);
14314 static int maybe_repair_root_item(struct btrfs_path *path,
14315 const struct btrfs_key *root_key,
14316 const int read_only_mode)
14318 const u64 root_id = root_key->objectid;
14319 struct cache_extent *entry;
14320 struct root_item_info *rii;
14321 struct btrfs_root_item ri;
14322 unsigned long offset;
14324 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14327 "Error: could not find extent items for root %llu\n",
14328 root_key->objectid);
14332 rii = container_of(entry, struct root_item_info, cache_extent);
14333 ASSERT(rii->cache_extent.start == root_id);
14334 ASSERT(rii->cache_extent.size == 1);
14336 if (rii->node_count != 1) {
14338 "Error: could not find btree root extent for root %llu\n",
14343 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14344 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14346 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14347 btrfs_root_level(&ri) != rii->level ||
14348 btrfs_root_generation(&ri) != rii->gen) {
14351 * If we're in repair mode but our caller told us to not update
14352 * the root item, i.e. just check if it needs to be updated, don't
14353 * print this message, since the caller will call us again shortly
14354 * for the same root item without read only mode (the caller will
14355 * open a transaction first).
14357 if (!(read_only_mode && repair))
14359 "%sroot item for root %llu,"
14360 " current bytenr %llu, current gen %llu, current level %u,"
14361 " new bytenr %llu, new gen %llu, new level %u\n",
14362 (read_only_mode ? "" : "fixing "),
14364 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14365 btrfs_root_level(&ri),
14366 rii->bytenr, rii->gen, rii->level);
14368 if (btrfs_root_generation(&ri) > rii->gen) {
14370 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14371 root_id, btrfs_root_generation(&ri), rii->gen);
14375 if (!read_only_mode) {
14376 btrfs_set_root_bytenr(&ri, rii->bytenr);
14377 btrfs_set_root_level(&ri, rii->level);
14378 btrfs_set_root_generation(&ri, rii->gen);
14379 write_extent_buffer(path->nodes[0], &ri,
14380 offset, sizeof(ri));
14390 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14391 * caused read-only snapshots to be corrupted if they were created at a moment
14392 * when the source subvolume/snapshot had orphan items. The issue was that the
14393 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14394 * node instead of the post orphan cleanup root node.
14395 * So this function, and its callees, just detects and fixes those cases. Even
14396 * though the regression was for read-only snapshots, this function applies to
14397 * any snapshot/subvolume root.
14398 * This must be run before any other repair code - not doing it so, makes other
14399 * repair code delete or modify backrefs in the extent tree for example, which
14400 * will result in an inconsistent fs after repairing the root items.
14402 static int repair_root_items(struct btrfs_fs_info *info)
14404 struct btrfs_path path;
14405 struct btrfs_key key;
14406 struct extent_buffer *leaf;
14407 struct btrfs_trans_handle *trans = NULL;
14410 int need_trans = 0;
14412 btrfs_init_path(&path);
14414 ret = build_roots_info_cache(info);
14418 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14419 key.type = BTRFS_ROOT_ITEM_KEY;
14424 * Avoid opening and committing transactions if a leaf doesn't have
14425 * any root items that need to be fixed, so that we avoid rotating
14426 * backup roots unnecessarily.
14429 trans = btrfs_start_transaction(info->tree_root, 1);
14430 if (IS_ERR(trans)) {
14431 ret = PTR_ERR(trans);
14436 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14440 leaf = path.nodes[0];
14443 struct btrfs_key found_key;
14445 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14446 int no_more_keys = find_next_key(&path, &key);
14448 btrfs_release_path(&path);
14450 ret = btrfs_commit_transaction(trans,
14462 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14464 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14466 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14469 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14473 if (!trans && repair) {
14476 btrfs_release_path(&path);
14486 free_roots_info_cache();
14487 btrfs_release_path(&path);
14489 btrfs_commit_transaction(trans, info->tree_root);
14496 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14498 struct btrfs_trans_handle *trans;
14499 struct btrfs_block_group_cache *bg_cache;
14503 /* Clear all free space cache inodes and its extent data */
14505 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14508 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14511 current = bg_cache->key.objectid + bg_cache->key.offset;
14514 /* Don't forget to set cache_generation to -1 */
14515 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14516 if (IS_ERR(trans)) {
14517 error("failed to update super block cache generation");
14518 return PTR_ERR(trans);
14520 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14521 btrfs_commit_transaction(trans, fs_info->tree_root);
14526 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14531 if (clear_version == 1) {
14532 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14534 "free space cache v2 detected, use --clear-space-cache v2");
14538 printf("Clearing free space cache\n");
14539 ret = clear_free_space_cache(fs_info);
14541 error("failed to clear free space cache");
14544 printf("Free space cache cleared\n");
14546 } else if (clear_version == 2) {
14547 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14548 printf("no free space cache v2 to clear\n");
14552 printf("Clear free space cache v2\n");
14553 ret = btrfs_clear_free_space_tree(fs_info);
14555 error("failed to clear free space cache v2: %d", ret);
14558 printf("free space cache v2 cleared\n");
14565 const char * const cmd_check_usage[] = {
14566 "btrfs check [options] <device>",
14567 "Check structural integrity of a filesystem (unmounted).",
14568 "Check structural integrity of an unmounted filesystem. Verify internal",
14569 "trees' consistency and item connectivity. In the repair mode try to",
14570 "fix the problems found. ",
14571 "WARNING: the repair mode is considered dangerous",
14573 "-s|--super <superblock> use this superblock copy",
14574 "-b|--backup use the first valid backup root copy",
14575 "--force skip mount checks, repair is not possible",
14576 "--repair try to repair the filesystem",
14577 "--readonly run in read-only mode (default)",
14578 "--init-csum-tree create a new CRC tree",
14579 "--init-extent-tree create a new extent tree",
14580 "--mode <MODE> allows choice of memory/IO trade-offs",
14581 " where MODE is one of:",
14582 " original - read inodes and extents to memory (requires",
14583 " more memory, does less IO)",
14584 " lowmem - try to use less memory but read blocks again",
14586 "--check-data-csum verify checksums of data blocks",
14587 "-Q|--qgroup-report print a report on qgroup consistency",
14588 "-E|--subvol-extents <subvolid>",
14589 " print subvolume extents and sharing state",
14590 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14591 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14592 "-p|--progress indicate progress",
14593 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14597 int cmd_check(int argc, char **argv)
14599 struct cache_tree root_cache;
14600 struct btrfs_root *root;
14601 struct btrfs_fs_info *info;
14604 u64 tree_root_bytenr = 0;
14605 u64 chunk_root_bytenr = 0;
14606 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14610 int init_csum_tree = 0;
14612 int clear_space_cache = 0;
14613 int qgroup_report = 0;
14614 int qgroups_repaired = 0;
14615 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14620 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14621 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14622 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14623 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14624 GETOPT_VAL_FORCE };
14625 static const struct option long_options[] = {
14626 { "super", required_argument, NULL, 's' },
14627 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14628 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14629 { "init-csum-tree", no_argument, NULL,
14630 GETOPT_VAL_INIT_CSUM },
14631 { "init-extent-tree", no_argument, NULL,
14632 GETOPT_VAL_INIT_EXTENT },
14633 { "check-data-csum", no_argument, NULL,
14634 GETOPT_VAL_CHECK_CSUM },
14635 { "backup", no_argument, NULL, 'b' },
14636 { "subvol-extents", required_argument, NULL, 'E' },
14637 { "qgroup-report", no_argument, NULL, 'Q' },
14638 { "tree-root", required_argument, NULL, 'r' },
14639 { "chunk-root", required_argument, NULL,
14640 GETOPT_VAL_CHUNK_TREE },
14641 { "progress", no_argument, NULL, 'p' },
14642 { "mode", required_argument, NULL,
14644 { "clear-space-cache", required_argument, NULL,
14645 GETOPT_VAL_CLEAR_SPACE_CACHE},
14646 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14647 { NULL, 0, NULL, 0}
14650 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14654 case 'a': /* ignored */ break;
14656 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14659 num = arg_strtou64(optarg);
14660 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14662 "super mirror should be less than %d",
14663 BTRFS_SUPER_MIRROR_MAX);
14666 bytenr = btrfs_sb_offset(((int)num));
14667 printf("using SB copy %llu, bytenr %llu\n", num,
14668 (unsigned long long)bytenr);
14674 subvolid = arg_strtou64(optarg);
14677 tree_root_bytenr = arg_strtou64(optarg);
14679 case GETOPT_VAL_CHUNK_TREE:
14680 chunk_root_bytenr = arg_strtou64(optarg);
14683 ctx.progress_enabled = true;
14687 usage(cmd_check_usage);
14688 case GETOPT_VAL_REPAIR:
14689 printf("enabling repair mode\n");
14691 ctree_flags |= OPEN_CTREE_WRITES;
14693 case GETOPT_VAL_READONLY:
14696 case GETOPT_VAL_INIT_CSUM:
14697 printf("Creating a new CRC tree\n");
14698 init_csum_tree = 1;
14700 ctree_flags |= OPEN_CTREE_WRITES;
14702 case GETOPT_VAL_INIT_EXTENT:
14703 init_extent_tree = 1;
14704 ctree_flags |= (OPEN_CTREE_WRITES |
14705 OPEN_CTREE_NO_BLOCK_GROUPS);
14708 case GETOPT_VAL_CHECK_CSUM:
14709 check_data_csum = 1;
14711 case GETOPT_VAL_MODE:
14712 check_mode = parse_check_mode(optarg);
14713 if (check_mode == CHECK_MODE_UNKNOWN) {
14714 error("unknown mode: %s", optarg);
14718 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14719 if (strcmp(optarg, "v1") == 0) {
14720 clear_space_cache = 1;
14721 } else if (strcmp(optarg, "v2") == 0) {
14722 clear_space_cache = 2;
14723 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14726 "invalid argument to --clear-space-cache, must be v1 or v2");
14729 ctree_flags |= OPEN_CTREE_WRITES;
14731 case GETOPT_VAL_FORCE:
14737 if (check_argc_exact(argc - optind, 1))
14738 usage(cmd_check_usage);
14740 if (ctx.progress_enabled) {
14741 ctx.tp = TASK_NOTHING;
14742 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14745 /* This check is the only reason for --readonly to exist */
14746 if (readonly && repair) {
14747 error("repair options are not compatible with --readonly");
14752 * experimental and dangerous
14754 if (repair && check_mode == CHECK_MODE_LOWMEM)
14755 warning("low-memory mode repair support is only partial");
14758 cache_tree_init(&root_cache);
14760 ret = check_mounted(argv[optind]);
14763 error("could not check mount status: %s",
14769 "%s is currently mounted, use --force if you really intend to check the filesystem",
14777 error("repair and --force is not yet supported");
14784 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14788 "filesystem mounted, continuing because of --force");
14790 /* A block device is mounted in exclusive mode by kernel */
14791 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14794 /* only allow partial opening under repair mode */
14796 ctree_flags |= OPEN_CTREE_PARTIAL;
14798 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14799 chunk_root_bytenr, ctree_flags);
14801 error("cannot open file system");
14807 global_info = info;
14808 root = info->fs_root;
14809 uuid_unparse(info->super_copy->fsid, uuidbuf);
14811 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14814 * Check the bare minimum before starting anything else that could rely
14815 * on it, namely the tree roots, any local consistency checks
14817 if (!extent_buffer_uptodate(info->tree_root->node) ||
14818 !extent_buffer_uptodate(info->dev_root->node) ||
14819 !extent_buffer_uptodate(info->chunk_root->node)) {
14820 error("critical roots corrupted, unable to check the filesystem");
14826 if (clear_space_cache) {
14827 ret = do_clear_free_space_cache(info, clear_space_cache);
14833 * repair mode will force us to commit transaction which
14834 * will make us fail to load log tree when mounting.
14836 if (repair && btrfs_super_log_root(info->super_copy)) {
14837 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14843 ret = zero_log_tree(root);
14846 error("failed to zero log tree: %d", ret);
14851 if (qgroup_report) {
14852 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14854 ret = qgroup_verify_all(info);
14861 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14862 subvolid, argv[optind], uuidbuf);
14863 ret = print_extent_state(info, subvolid);
14868 if (init_extent_tree || init_csum_tree) {
14869 struct btrfs_trans_handle *trans;
14871 trans = btrfs_start_transaction(info->extent_root, 0);
14872 if (IS_ERR(trans)) {
14873 error("error starting transaction");
14874 ret = PTR_ERR(trans);
14879 if (init_extent_tree) {
14880 printf("Creating a new extent tree\n");
14881 ret = reinit_extent_tree(trans, info);
14887 if (init_csum_tree) {
14888 printf("Reinitialize checksum tree\n");
14889 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14891 error("checksum tree initialization failed: %d",
14898 ret = fill_csum_tree(trans, info->csum_root,
14902 error("checksum tree refilling failed: %d", ret);
14907 * Ok now we commit and run the normal fsck, which will add
14908 * extent entries for all of the items it finds.
14910 ret = btrfs_commit_transaction(trans, info->extent_root);
14915 if (!extent_buffer_uptodate(info->extent_root->node)) {
14916 error("critical: extent_root, unable to check the filesystem");
14921 if (!extent_buffer_uptodate(info->csum_root->node)) {
14922 error("critical: csum_root, unable to check the filesystem");
14928 if (!init_extent_tree) {
14929 ret = repair_root_items(info);
14932 error("failed to repair root items: %s", strerror(-ret));
14936 fprintf(stderr, "Fixed %d roots.\n", ret);
14938 } else if (ret > 0) {
14940 "Found %d roots with an outdated root item.\n",
14943 "Please run a filesystem check with the option --repair to fix them.\n");
14950 ret = do_check_chunks_and_extents(info);
14954 "errors found in extent allocation tree or chunk allocation");
14956 /* Only re-check super size after we checked and repaired the fs */
14957 err |= !is_super_size_valid(info);
14959 if (!ctx.progress_enabled) {
14960 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14961 fprintf(stderr, "checking free space tree\n");
14963 fprintf(stderr, "checking free space cache\n");
14965 ret = check_space_cache(root);
14968 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14969 error("errors found in free space tree");
14971 error("errors found in free space cache");
14976 * We used to have to have these hole extents in between our real
14977 * extents so if we don't have this flag set we need to make sure there
14978 * are no gaps in the file extents for inodes, otherwise we can just
14979 * ignore it when this happens.
14981 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14982 ret = do_check_fs_roots(info, &root_cache);
14985 error("errors found in fs roots");
14989 fprintf(stderr, "checking csums\n");
14990 ret = check_csums(root);
14993 error("errors found in csum tree");
14997 fprintf(stderr, "checking root refs\n");
14998 /* For low memory mode, check_fs_roots_v2 handles root refs */
14999 if (check_mode != CHECK_MODE_LOWMEM) {
15000 ret = check_root_refs(root, &root_cache);
15003 error("errors found in root refs");
15008 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15009 struct extent_buffer *eb;
15011 eb = list_first_entry(&root->fs_info->recow_ebs,
15012 struct extent_buffer, recow);
15013 list_del_init(&eb->recow);
15014 ret = recow_extent_buffer(root, eb);
15017 error("fails to fix transid errors");
15022 while (!list_empty(&delete_items)) {
15023 struct bad_item *bad;
15025 bad = list_first_entry(&delete_items, struct bad_item, list);
15026 list_del_init(&bad->list);
15028 ret = delete_bad_item(root, bad);
15034 if (info->quota_enabled) {
15035 fprintf(stderr, "checking quota groups\n");
15036 ret = qgroup_verify_all(info);
15039 error("failed to check quota groups");
15043 ret = repair_qgroups(info, &qgroups_repaired);
15046 error("failed to repair quota groups");
15052 if (!list_empty(&root->fs_info->recow_ebs)) {
15053 error("transid errors in file system");
15058 printf("found %llu bytes used, ",
15059 (unsigned long long)bytes_used);
15061 printf("error(s) found\n");
15063 printf("no error found\n");
15064 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15065 printf("total tree bytes: %llu\n",
15066 (unsigned long long)total_btree_bytes);
15067 printf("total fs tree bytes: %llu\n",
15068 (unsigned long long)total_fs_tree_bytes);
15069 printf("total extent tree bytes: %llu\n",
15070 (unsigned long long)total_extent_tree_bytes);
15071 printf("btree space waste bytes: %llu\n",
15072 (unsigned long long)btree_space_waste);
15073 printf("file data blocks allocated: %llu\n referenced %llu\n",
15074 (unsigned long long)data_bytes_allocated,
15075 (unsigned long long)data_bytes_referenced);
15077 free_qgroup_counts();
15078 free_root_recs_tree(&root_cache);
15082 if (ctx.progress_enabled)
15083 task_deinit(ctx.info);