2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2152 if (roots->nnodes == 1)
2155 node = rb_first(&roots->root);
2156 u = rb_entry(node, struct ulist_node, rb_node);
2158 * current root id is not smallest, we skip it and let it be checked
2159 * in the fs or file tree who hash the smallest root id.
2161 if (root->objectid != u->val)
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2170 struct btrfs_root *extent_root = root->fs_info->extent_root;
2171 struct btrfs_root_item *ri = &root->root_item;
2172 struct btrfs_extent_inline_ref *iref;
2173 struct btrfs_extent_item *ei;
2174 struct btrfs_key key;
2175 struct btrfs_path *path = NULL;
2186 * Except file/reloc tree, we can not have FULL BACKREF MODE
2188 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2192 if (eb->start == btrfs_root_bytenr(ri))
2195 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2198 owner = btrfs_header_owner(eb);
2199 if (owner == root->objectid)
2202 path = btrfs_alloc_path();
2206 key.objectid = btrfs_header_bytenr(eb);
2208 key.offset = (u64)-1;
2210 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2217 ret = btrfs_previous_extent_item(extent_root, path,
2223 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2225 eb = path->nodes[0];
2226 slot = path->slots[0];
2227 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2229 flags = btrfs_extent_flags(eb, ei);
2230 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2233 ptr = (unsigned long)(ei + 1);
2234 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2236 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237 ptr += sizeof(struct btrfs_tree_block_info);
2240 /* Reached extent item ends normally */
2244 /* Beyond extent item end, wrong item size */
2246 error("extent item at bytenr %llu slot %d has wrong size",
2251 iref = (struct btrfs_extent_inline_ref *)ptr;
2252 offset = btrfs_extent_inline_ref_offset(eb, iref);
2253 type = btrfs_extent_inline_ref_type(eb, iref);
2255 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2257 ptr += btrfs_extent_inline_ref_size(type);
2261 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267 btrfs_free_path(path);
2272 * for a tree node or leaf, we record its reference count, so later if we still
2273 * process this node or leaf, don't need to compute its reference count again.
2275 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278 struct extent_buffer *eb, struct node_refs *nrefs,
2279 u64 level, int check_all)
2281 struct ulist *roots;
2284 int root_level = btrfs_header_level(root->node);
2288 if (nrefs->bytenr[level] == bytenr)
2291 if (bytenr != (u64)-1) {
2292 /* the return value of this function seems a mistake */
2293 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294 level, 1, &refs, &flags);
2296 if (ret < 0 && !check_all)
2299 nrefs->bytenr[level] = bytenr;
2300 nrefs->refs[level] = refs;
2301 nrefs->full_backref[level] = 0;
2302 nrefs->checked[level] = 0;
2305 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2310 check = need_check(root, roots);
2312 nrefs->need_check[level] = check;
2315 nrefs->need_check[level] = 1;
2317 if (level == root_level) {
2318 nrefs->need_check[level] = 1;
2321 * The node refs may have not been
2322 * updated if upper needs checking (the
2323 * lowest root_objectid) the node can
2326 nrefs->need_check[level] =
2327 nrefs->need_check[level + 1];
2333 if (check_all && eb) {
2334 calc_extent_flag_v2(root, eb, &flags);
2335 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336 nrefs->full_backref[level] = 1;
2343 * @level if @level == -1 means extent data item
2344 * else normal treeblocl.
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347 struct node_refs *nrefs, int level)
2349 int root_level = btrfs_header_level(root->node);
2351 if (level > root_level || level < -1)
2353 if (level == root_level)
2356 * if the upper node is marked full backref, it should contain shared
2357 * backref of the parent (except owner == root->objectid).
2359 while (++level <= root_level)
2360 if (nrefs->refs[level] > 1)
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367 struct walk_control *wc, int *level,
2368 struct node_refs *nrefs)
2370 enum btrfs_tree_block_status status;
2373 struct btrfs_fs_info *fs_info = root->fs_info;
2374 struct extent_buffer *next;
2375 struct extent_buffer *cur;
2379 WARN_ON(*level < 0);
2380 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383 refs = nrefs->refs[*level];
2386 ret = btrfs_lookup_extent_info(NULL, root,
2387 path->nodes[*level]->start,
2388 *level, 1, &refs, NULL);
2393 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394 nrefs->refs[*level] = refs;
2398 ret = enter_shared_node(root, path->nodes[*level]->start,
2406 while (*level >= 0) {
2407 WARN_ON(*level < 0);
2408 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409 cur = path->nodes[*level];
2411 if (btrfs_header_level(cur) != *level)
2414 if (path->slots[*level] >= btrfs_header_nritems(cur))
2417 ret = process_one_leaf(root, cur, wc);
2422 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2425 if (bytenr == nrefs->bytenr[*level - 1]) {
2426 refs = nrefs->refs[*level - 1];
2428 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429 *level - 1, 1, &refs, NULL);
2433 nrefs->bytenr[*level - 1] = bytenr;
2434 nrefs->refs[*level - 1] = refs;
2439 ret = enter_shared_node(root, bytenr, refs,
2442 path->slots[*level]++;
2447 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449 free_extent_buffer(next);
2450 reada_walk_down(root, cur, path->slots[*level]);
2451 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452 if (!extent_buffer_uptodate(next)) {
2453 struct btrfs_key node_key;
2455 btrfs_node_key_to_cpu(path->nodes[*level],
2457 path->slots[*level]);
2458 btrfs_add_corrupt_extent_record(root->fs_info,
2460 path->nodes[*level]->start,
2461 root->fs_info->nodesize,
2468 ret = check_child_node(cur, path->slots[*level], next);
2470 free_extent_buffer(next);
2475 if (btrfs_is_leaf(next))
2476 status = btrfs_check_leaf(root, NULL, next);
2478 status = btrfs_check_node(root, NULL, next);
2479 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480 free_extent_buffer(next);
2485 *level = *level - 1;
2486 free_extent_buffer(path->nodes[*level]);
2487 path->nodes[*level] = next;
2488 path->slots[*level] = 0;
2491 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2495 static int fs_root_objectid(u64 objectid);
2498 * Update global fs information.
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2504 struct extent_buffer *eb = path->nodes[level];
2506 total_btree_bytes += eb->len;
2507 if (fs_root_objectid(root->objectid))
2508 total_fs_tree_bytes += eb->len;
2509 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510 total_extent_tree_bytes += eb->len;
2513 btree_space_waste += btrfs_leaf_free_space(root, eb);
2515 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516 btrfs_header_nritems(eb));
2517 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2522 * This function only handles BACKREF_MISSING,
2523 * If corresponding extent item exists, increase the ref, else insert an extent
2526 * Returns error bits after repair.
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529 struct btrfs_root *root,
2530 struct extent_buffer *node,
2531 struct node_refs *nrefs, int level, int err)
2533 struct btrfs_fs_info *fs_info = root->fs_info;
2534 struct btrfs_root *extent_root = fs_info->extent_root;
2535 struct btrfs_path path;
2536 struct btrfs_extent_item *ei;
2537 struct btrfs_tree_block_info *bi;
2538 struct btrfs_key key;
2539 struct extent_buffer *eb;
2540 u32 size = sizeof(*ei);
2541 u32 node_size = root->fs_info->nodesize;
2542 int insert_extent = 0;
2543 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544 int root_level = btrfs_header_level(root->node);
2549 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2552 if ((err & BACKREF_MISSING) == 0)
2555 WARN_ON(level > BTRFS_MAX_LEVEL);
2558 btrfs_init_path(&path);
2559 bytenr = btrfs_header_bytenr(node);
2560 owner = btrfs_header_owner(node);
2561 generation = btrfs_header_generation(node);
2563 key.objectid = bytenr;
2565 key.offset = (u64)-1;
2567 /* Search for the extent item */
2568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2574 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2578 /* calculate if the extent item flag is full backref or not */
2579 if (nrefs->full_backref[level] != 0)
2580 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2582 /* insert an extent item */
2583 if (insert_extent) {
2584 struct btrfs_disk_key copy_key;
2586 generation = btrfs_header_generation(node);
2588 if (level < root_level && nrefs->full_backref[level + 1] &&
2589 owner != root->objectid) {
2590 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2593 key.objectid = bytenr;
2594 if (!skinny_metadata) {
2595 key.type = BTRFS_EXTENT_ITEM_KEY;
2596 key.offset = node_size;
2597 size += sizeof(*bi);
2599 key.type = BTRFS_METADATA_ITEM_KEY;
2603 btrfs_release_path(&path);
2604 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2610 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2612 btrfs_set_extent_refs(eb, ei, 0);
2613 btrfs_set_extent_generation(eb, ei, generation);
2614 btrfs_set_extent_flags(eb, ei, flags);
2616 if (!skinny_metadata) {
2617 bi = (struct btrfs_tree_block_info *)(ei + 1);
2618 memset_extent_buffer(eb, 0, (unsigned long)bi,
2620 btrfs_set_disk_key_objectid(©_key, root->objectid);
2621 btrfs_set_disk_key_type(©_key, 0);
2622 btrfs_set_disk_key_offset(©_key, 0);
2624 btrfs_set_tree_block_level(eb, bi, level);
2625 btrfs_set_tree_block_key(eb, bi, ©_key);
2627 btrfs_mark_buffer_dirty(eb);
2628 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2632 nrefs->refs[level] = 0;
2633 nrefs->full_backref[level] =
2634 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635 btrfs_release_path(&path);
2638 if (level < root_level && nrefs->full_backref[level + 1] &&
2639 owner != root->objectid)
2640 parent = nrefs->bytenr[level + 1];
2642 /* increase the ref */
2643 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644 parent, root->objectid, level, 0);
2646 nrefs->refs[level]++;
2648 btrfs_release_path(&path);
2651 "failed to repair tree block ref start %llu root %llu due to %s",
2652 bytenr, root->objectid, strerror(-ret));
2654 printf("Added one tree block ref start %llu %s %llu\n",
2655 bytenr, parent ? "parent" : "root",
2656 parent ? parent : root->objectid);
2657 err &= ~BACKREF_MISSING;
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664 unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666 struct extent_buffer *eb, u64 bytenr,
2667 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669 struct btrfs_root *root, struct btrfs_path *path,
2670 struct node_refs *nrefs, int account_bytes);
2673 * @trans just for lowmem repair mode
2674 * @check all if not 0 then check all tree block backrefs and items
2675 * 0 then just check relationship of items in fs tree(s)
2677 * Returns >0 Found error, should continue
2678 * Returns <0 Fatal error, must exit the whole check
2679 * Returns 0 No errors found
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682 struct btrfs_root *root, struct btrfs_path *path,
2683 int *level, struct node_refs *nrefs, int ext_ref,
2687 enum btrfs_tree_block_status status;
2690 struct btrfs_fs_info *fs_info = root->fs_info;
2691 struct extent_buffer *next;
2692 struct extent_buffer *cur;
2696 int account_file_data = 0;
2698 WARN_ON(*level < 0);
2699 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2701 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702 path->nodes[*level], nrefs, *level, check_all);
2706 while (*level >= 0) {
2707 WARN_ON(*level < 0);
2708 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709 cur = path->nodes[*level];
2710 bytenr = btrfs_header_bytenr(cur);
2711 check = nrefs->need_check[*level];
2713 if (btrfs_header_level(cur) != *level)
2716 * Update bytes accounting and check tree block ref
2717 * NOTE: Doing accounting and check before checking nritems
2718 * is necessary because of empty node/leaf.
2720 if ((check_all && !nrefs->checked[*level]) ||
2721 (!check_all && nrefs->need_check[*level])) {
2722 ret = check_tree_block_ref(root, cur,
2723 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724 btrfs_header_owner(cur), nrefs);
2727 ret = repair_tree_block_ref(trans, root,
2728 path->nodes[*level], nrefs, *level, ret);
2731 if (check_all && nrefs->need_check[*level] &&
2732 nrefs->refs[*level]) {
2733 account_bytes(root, path, *level);
2734 account_file_data = 1;
2736 nrefs->checked[*level] = 1;
2739 if (path->slots[*level] >= btrfs_header_nritems(cur))
2742 /* Don't forgot to check leaf/node validation */
2744 /* skip duplicate check */
2745 if (check || !check_all) {
2746 ret = btrfs_check_leaf(root, NULL, cur);
2747 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2755 ret = process_one_leaf_v2(root, path, nrefs,
2758 ret = check_leaf_items(trans, root, path,
2759 nrefs, account_file_data);
2763 if (check || !check_all) {
2764 ret = btrfs_check_node(root, NULL, cur);
2765 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2772 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2775 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2780 * check all trees in check_chunks_and_extent_v2
2781 * check shared node once in check_fs_roots
2783 if (!check_all && !nrefs->need_check[*level - 1]) {
2784 path->slots[*level]++;
2788 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790 free_extent_buffer(next);
2791 reada_walk_down(root, cur, path->slots[*level]);
2792 next = read_tree_block(fs_info, bytenr, ptr_gen);
2793 if (!extent_buffer_uptodate(next)) {
2794 struct btrfs_key node_key;
2796 btrfs_node_key_to_cpu(path->nodes[*level],
2798 path->slots[*level]);
2799 btrfs_add_corrupt_extent_record(fs_info,
2800 &node_key, path->nodes[*level]->start,
2801 fs_info->nodesize, *level);
2807 ret = check_child_node(cur, path->slots[*level], next);
2812 if (btrfs_is_leaf(next))
2813 status = btrfs_check_leaf(root, NULL, next);
2815 status = btrfs_check_node(root, NULL, next);
2816 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817 free_extent_buffer(next);
2822 *level = *level - 1;
2823 free_extent_buffer(path->nodes[*level]);
2824 path->nodes[*level] = next;
2825 path->slots[*level] = 0;
2826 account_file_data = 0;
2828 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834 struct walk_control *wc, int *level)
2837 struct extent_buffer *leaf;
2839 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840 leaf = path->nodes[i];
2841 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2846 free_extent_buffer(path->nodes[*level]);
2847 path->nodes[*level] = NULL;
2848 BUG_ON(*level > wc->active_node);
2849 if (*level == wc->active_node)
2850 leave_shared_node(root, wc, *level);
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2861 struct extent_buffer *leaf;
2863 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864 leaf = path->nodes[i];
2865 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2870 free_extent_buffer(path->nodes[*level]);
2871 path->nodes[*level] = NULL;
2878 static int check_root_dir(struct inode_record *rec)
2880 struct inode_backref *backref;
2883 if (!rec->found_inode_item || rec->errors)
2885 if (rec->nlink != 1 || rec->found_link != 0)
2887 if (list_empty(&rec->backrefs))
2889 backref = to_inode_backref(rec->backrefs.next);
2890 if (!backref->found_inode_ref)
2892 if (backref->index != 0 || backref->namelen != 2 ||
2893 memcmp(backref->name, "..", 2))
2895 if (backref->found_dir_index || backref->found_dir_item)
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903 struct btrfs_root *root, struct btrfs_path *path,
2904 struct inode_record *rec)
2906 struct btrfs_inode_item *ei;
2907 struct btrfs_key key;
2910 key.objectid = rec->ino;
2911 key.type = BTRFS_INODE_ITEM_KEY;
2912 key.offset = (u64)-1;
2914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2918 if (!path->slots[0]) {
2925 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926 if (key.objectid != rec->ino) {
2931 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932 struct btrfs_inode_item);
2933 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934 btrfs_mark_buffer_dirty(path->nodes[0]);
2935 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937 root->root_key.objectid);
2939 btrfs_release_path(path);
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root,
2945 struct btrfs_path *path,
2946 struct inode_record *rec)
2950 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951 btrfs_release_path(path);
2953 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958 struct btrfs_root *root,
2959 struct btrfs_path *path,
2960 struct inode_record *rec)
2962 struct btrfs_inode_item *ei;
2963 struct btrfs_key key;
2966 key.objectid = rec->ino;
2967 key.type = BTRFS_INODE_ITEM_KEY;
2970 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2977 /* Since ret == 0, no need to check anything */
2978 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979 struct btrfs_inode_item);
2980 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981 btrfs_mark_buffer_dirty(path->nodes[0]);
2982 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983 printf("reset nbytes for ino %llu root %llu\n",
2984 rec->ino, root->root_key.objectid);
2986 btrfs_release_path(path);
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991 struct cache_tree *inode_cache,
2992 struct inode_record *rec,
2993 struct inode_backref *backref)
2995 struct btrfs_path path;
2996 struct btrfs_trans_handle *trans;
2997 struct btrfs_dir_item *dir_item;
2998 struct extent_buffer *leaf;
2999 struct btrfs_key key;
3000 struct btrfs_disk_key disk_key;
3001 struct inode_record *dir_rec;
3002 unsigned long name_ptr;
3003 u32 data_size = sizeof(*dir_item) + backref->namelen;
3006 trans = btrfs_start_transaction(root, 1);
3008 return PTR_ERR(trans);
3010 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011 (unsigned long long)rec->ino);
3013 btrfs_init_path(&path);
3014 key.objectid = backref->dir;
3015 key.type = BTRFS_DIR_INDEX_KEY;
3016 key.offset = backref->index;
3017 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3020 leaf = path.nodes[0];
3021 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3023 disk_key.objectid = cpu_to_le64(rec->ino);
3024 disk_key.type = BTRFS_INODE_ITEM_KEY;
3025 disk_key.offset = 0;
3027 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029 btrfs_set_dir_data_len(leaf, dir_item, 0);
3030 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031 name_ptr = (unsigned long)(dir_item + 1);
3032 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033 btrfs_mark_buffer_dirty(leaf);
3034 btrfs_release_path(&path);
3035 btrfs_commit_transaction(trans, root);
3037 backref->found_dir_index = 1;
3038 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039 BUG_ON(IS_ERR(dir_rec));
3042 dir_rec->found_size += backref->namelen;
3043 if (dir_rec->found_size == dir_rec->isize &&
3044 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046 if (dir_rec->found_size != dir_rec->isize)
3047 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3052 static int delete_dir_index(struct btrfs_root *root,
3053 struct inode_backref *backref)
3055 struct btrfs_trans_handle *trans;
3056 struct btrfs_dir_item *di;
3057 struct btrfs_path path;
3060 trans = btrfs_start_transaction(root, 1);
3062 return PTR_ERR(trans);
3064 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065 (unsigned long long)backref->dir,
3066 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067 (unsigned long long)root->objectid);
3069 btrfs_init_path(&path);
3070 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071 backref->name, backref->namelen,
3072 backref->index, -1);
3075 btrfs_release_path(&path);
3076 btrfs_commit_transaction(trans, root);
3083 ret = btrfs_del_item(trans, root, &path);
3085 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3087 btrfs_release_path(&path);
3088 btrfs_commit_transaction(trans, root);
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093 struct btrfs_root *root, u64 ino, u64 size,
3094 u64 nbytes, u64 nlink, u32 mode)
3096 struct btrfs_inode_item ii;
3097 time_t now = time(NULL);
3100 btrfs_set_stack_inode_size(&ii, size);
3101 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102 btrfs_set_stack_inode_nlink(&ii, nlink);
3103 btrfs_set_stack_inode_mode(&ii, mode);
3104 btrfs_set_stack_inode_generation(&ii, trans->transid);
3105 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3113 ret = btrfs_insert_inode(trans, root, ino, &ii);
3116 warning("root %llu inode %llu recreating inode item, this may "
3117 "be incomplete, please check permissions and content after "
3118 "the fsck completes.\n", (unsigned long long)root->objectid,
3119 (unsigned long long)ino);
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125 struct btrfs_root *root, u64 ino,
3128 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3130 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3133 static int create_inode_item(struct btrfs_root *root,
3134 struct inode_record *rec, int root_dir)
3136 struct btrfs_trans_handle *trans;
3142 trans = btrfs_start_transaction(root, 1);
3143 if (IS_ERR(trans)) {
3144 ret = PTR_ERR(trans);
3148 nlink = root_dir ? 1 : rec->found_link;
3149 if (rec->found_dir_item) {
3150 if (rec->found_file_extent)
3151 fprintf(stderr, "root %llu inode %llu has both a dir "
3152 "item and extents, unsure if it is a dir or a "
3153 "regular file so setting it as a directory\n",
3154 (unsigned long long)root->objectid,
3155 (unsigned long long)rec->ino);
3156 mode = S_IFDIR | 0755;
3157 size = rec->found_size;
3158 } else if (!rec->found_dir_item) {
3159 size = rec->extent_end;
3160 mode = S_IFREG | 0755;
3163 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3165 btrfs_commit_transaction(trans, root);
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170 struct inode_record *rec,
3171 struct cache_tree *inode_cache,
3174 struct inode_backref *tmp, *backref;
3175 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3179 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180 if (!delete && rec->ino == root_dirid) {
3181 if (!rec->found_inode_item) {
3182 ret = create_inode_item(root, rec, 1);
3189 /* Index 0 for root dir's are special, don't mess with it */
3190 if (rec->ino == root_dirid && backref->index == 0)
3194 ((backref->found_dir_index && !backref->found_inode_ref) ||
3195 (backref->found_dir_index && backref->found_inode_ref &&
3196 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197 ret = delete_dir_index(root, backref);
3201 list_del(&backref->list);
3206 if (!delete && !backref->found_dir_index &&
3207 backref->found_dir_item && backref->found_inode_ref) {
3208 ret = add_missing_dir_index(root, inode_cache, rec,
3213 if (backref->found_dir_item &&
3214 backref->found_dir_index) {
3215 if (!backref->errors &&
3216 backref->found_inode_ref) {
3217 list_del(&backref->list);
3224 if (!delete && (!backref->found_dir_index &&
3225 !backref->found_dir_item &&
3226 backref->found_inode_ref)) {
3227 struct btrfs_trans_handle *trans;
3228 struct btrfs_key location;
3230 ret = check_dir_conflict(root, backref->name,
3236 * let nlink fixing routine to handle it,
3237 * which can do it better.
3242 location.objectid = rec->ino;
3243 location.type = BTRFS_INODE_ITEM_KEY;
3244 location.offset = 0;
3246 trans = btrfs_start_transaction(root, 1);
3247 if (IS_ERR(trans)) {
3248 ret = PTR_ERR(trans);
3251 fprintf(stderr, "adding missing dir index/item pair "
3253 (unsigned long long)rec->ino);
3254 ret = btrfs_insert_dir_item(trans, root, backref->name,
3256 backref->dir, &location,
3257 imode_to_type(rec->imode),
3260 btrfs_commit_transaction(trans, root);
3264 if (!delete && (backref->found_inode_ref &&
3265 backref->found_dir_index &&
3266 backref->found_dir_item &&
3267 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268 !rec->found_inode_item)) {
3269 ret = create_inode_item(root, rec, 0);
3276 return ret ? ret : repaired;
3280 * To determine the file type for nlink/inode_item repair
3282 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283 * Return -ENOENT if file type is not found.
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3287 struct inode_backref *backref;
3289 /* For inode item recovered case */
3290 if (rec->found_inode_item) {
3291 *type = imode_to_type(rec->imode);
3295 list_for_each_entry(backref, &rec->backrefs, list) {
3296 if (backref->found_dir_index || backref->found_dir_item) {
3297 *type = backref->filetype;
3305 * To determine the file name for nlink repair
3307 * Return 0 if file name is found, set name and namelen.
3308 * Return -ENOENT if file name is not found.
3310 static int find_file_name(struct inode_record *rec,
3311 char *name, int *namelen)
3313 struct inode_backref *backref;
3315 list_for_each_entry(backref, &rec->backrefs, list) {
3316 if (backref->found_dir_index || backref->found_dir_item ||
3317 backref->found_inode_ref) {
3318 memcpy(name, backref->name, backref->namelen);
3319 *namelen = backref->namelen;
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root,
3329 struct btrfs_path *path,
3330 struct inode_record *rec)
3332 struct inode_backref *backref;
3333 struct inode_backref *tmp;
3334 struct btrfs_key key;
3335 struct btrfs_inode_item *inode_item;
3338 /* We don't believe this either, reset it and iterate backref */
3339 rec->found_link = 0;
3341 /* Remove all backref including the valid ones */
3342 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344 backref->index, backref->name,
3345 backref->namelen, 0);
3349 /* remove invalid backref, so it won't be added back */
3350 if (!(backref->found_dir_index &&
3351 backref->found_dir_item &&
3352 backref->found_inode_ref)) {
3353 list_del(&backref->list);
3360 /* Set nlink to 0 */
3361 key.objectid = rec->ino;
3362 key.type = BTRFS_INODE_ITEM_KEY;
3364 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3371 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372 struct btrfs_inode_item);
3373 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374 btrfs_mark_buffer_dirty(path->nodes[0]);
3375 btrfs_release_path(path);
3378 * Add back valid inode_ref/dir_item/dir_index,
3379 * add_link() will handle the nlink inc, so new nlink must be correct
3381 list_for_each_entry(backref, &rec->backrefs, list) {
3382 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383 backref->name, backref->namelen,
3384 backref->filetype, &backref->index, 1, 0);
3389 btrfs_release_path(path);
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394 struct btrfs_root *root,
3395 struct btrfs_path *path,
3398 struct btrfs_key key, found_key;
3401 btrfs_init_path(path);
3402 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3404 key.type = BTRFS_INODE_ITEM_KEY;
3405 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3407 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408 path->slots[0] - 1);
3409 *highest_ino = found_key.objectid;
3412 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3414 btrfs_release_path(path);
3419 * Link inode to dir 'lost+found'. Increase @ref_count.
3421 * Returns 0 means success.
3422 * Returns <0 means failure.
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425 struct btrfs_root *root,
3426 struct btrfs_path *path,
3427 u64 ino, char *namebuf, u32 name_len,
3428 u8 filetype, u64 *ref_count)
3430 char *dir_name = "lost+found";
3435 btrfs_release_path(path);
3436 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3441 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3445 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3448 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449 namebuf, name_len, filetype, NULL, 1, 0);
3451 * Add ".INO" suffix several times to handle case where
3452 * "FILENAME.INO" is already taken by another file.
3454 while (ret == -EEXIST) {
3456 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3458 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3462 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3464 name_len += count_digits(ino) + 1;
3465 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466 name_len, filetype, NULL, 1, 0);
3469 error("failed to link the inode %llu to %s dir: %s",
3470 ino, dir_name, strerror(-ret));
3475 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476 name_len, namebuf, dir_name);
3478 btrfs_release_path(path);
3480 error("failed to move file '%.*s' to '%s' dir", name_len,
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486 struct btrfs_root *root,
3487 struct btrfs_path *path,
3488 struct inode_record *rec)
3490 char namebuf[BTRFS_NAME_LEN] = {0};
3493 int name_recovered = 0;
3494 int type_recovered = 0;
3498 * Get file name and type first before these invalid inode ref
3499 * are deleted by remove_all_invalid_backref()
3501 name_recovered = !find_file_name(rec, namebuf, &namelen);
3502 type_recovered = !find_file_type(rec, &type);
3504 if (!name_recovered) {
3505 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506 rec->ino, rec->ino);
3507 namelen = count_digits(rec->ino);
3508 sprintf(namebuf, "%llu", rec->ino);
3511 if (!type_recovered) {
3512 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3514 type = BTRFS_FT_REG_FILE;
3518 ret = reset_nlink(trans, root, path, rec);
3521 "Failed to reset nlink for inode %llu: %s\n",
3522 rec->ino, strerror(-ret));
3526 if (rec->found_link == 0) {
3527 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528 namebuf, namelen, type,
3529 (u64 *)&rec->found_link);
3533 printf("Fixed the nlink of inode %llu\n", rec->ino);
3536 * Clear the flag anyway, or we will loop forever for the same inode
3537 * as it will not be removed from the bad inode list and the dead loop
3540 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541 btrfs_release_path(path);
3546 * Check if there is any normal(reg or prealloc) file extent for given
3548 * This is used to determine the file type when neither its dir_index/item or
3549 * inode_item exists.
3551 * This will *NOT* report error, if any error happens, just consider it does
3552 * not have any normal file extent.
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3556 struct btrfs_path path;
3557 struct btrfs_key key;
3558 struct btrfs_key found_key;
3559 struct btrfs_file_extent_item *fi;
3563 btrfs_init_path(&path);
3565 key.type = BTRFS_EXTENT_DATA_KEY;
3568 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3573 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574 ret = btrfs_next_leaf(root, &path);
3581 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3583 if (found_key.objectid != ino ||
3584 found_key.type != BTRFS_EXTENT_DATA_KEY)
3586 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587 struct btrfs_file_extent_item);
3588 type = btrfs_file_extent_type(path.nodes[0], fi);
3589 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595 btrfs_release_path(&path);
3599 static u32 btrfs_type_to_imode(u8 type)
3601 static u32 imode_by_btrfs_type[] = {
3602 [BTRFS_FT_REG_FILE] = S_IFREG,
3603 [BTRFS_FT_DIR] = S_IFDIR,
3604 [BTRFS_FT_CHRDEV] = S_IFCHR,
3605 [BTRFS_FT_BLKDEV] = S_IFBLK,
3606 [BTRFS_FT_FIFO] = S_IFIFO,
3607 [BTRFS_FT_SOCK] = S_IFSOCK,
3608 [BTRFS_FT_SYMLINK] = S_IFLNK,
3611 return imode_by_btrfs_type[(type)];
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_path *path,
3617 struct inode_record *rec)
3621 int type_recovered = 0;
3624 printf("Trying to rebuild inode:%llu\n", rec->ino);
3626 type_recovered = !find_file_type(rec, &filetype);
3629 * Try to determine inode type if type not found.
3631 * For found regular file extent, it must be FILE.
3632 * For found dir_item/index, it must be DIR.
3634 * For undetermined one, use FILE as fallback.
3637 * 1. If found backref(inode_index/item is already handled) to it,
3639 * Need new inode-inode ref structure to allow search for that.
3641 if (!type_recovered) {
3642 if (rec->found_file_extent &&
3643 find_normal_file_extent(root, rec->ino)) {
3645 filetype = BTRFS_FT_REG_FILE;
3646 } else if (rec->found_dir_item) {
3648 filetype = BTRFS_FT_DIR;
3649 } else if (!list_empty(&rec->orphan_extents)) {
3651 filetype = BTRFS_FT_REG_FILE;
3653 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3656 filetype = BTRFS_FT_REG_FILE;
3660 ret = btrfs_new_inode(trans, root, rec->ino,
3661 mode | btrfs_type_to_imode(filetype));
3666 * Here inode rebuild is done, we only rebuild the inode item,
3667 * don't repair the nlink(like move to lost+found).
3668 * That is the job of nlink repair.
3670 * We just fill the record and return
3672 rec->found_dir_item = 1;
3673 rec->imode = mode | btrfs_type_to_imode(filetype);
3675 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676 /* Ensure the inode_nlinks repair function will be called */
3677 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683 struct btrfs_root *root,
3684 struct btrfs_path *path,
3685 struct inode_record *rec)
3687 struct orphan_data_extent *orphan;
3688 struct orphan_data_extent *tmp;
3691 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3693 * Check for conflicting file extents
3695 * Here we don't know whether the extents is compressed or not,
3696 * so we can only assume it not compressed nor data offset,
3697 * and use its disk_len as extent length.
3699 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700 orphan->offset, orphan->disk_len, 0);
3701 btrfs_release_path(path);
3706 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707 orphan->disk_bytenr, orphan->disk_len);
3708 ret = btrfs_free_extent(trans,
3709 root->fs_info->extent_root,
3710 orphan->disk_bytenr, orphan->disk_len,
3711 0, root->objectid, orphan->objectid,
3716 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717 orphan->offset, orphan->disk_bytenr,
3718 orphan->disk_len, orphan->disk_len);
3722 /* Update file size info */
3723 rec->found_size += orphan->disk_len;
3724 if (rec->found_size == rec->nbytes)
3725 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3727 /* Update the file extent hole info too */
3728 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3732 if (RB_EMPTY_ROOT(&rec->holes))
3733 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3735 list_del(&orphan->list);
3738 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744 struct btrfs_root *root,
3745 struct btrfs_path *path,
3746 struct inode_record *rec)
3748 struct rb_node *node;
3749 struct file_extent_hole *hole;
3753 node = rb_first(&rec->holes);
3757 hole = rb_entry(node, struct file_extent_hole, node);
3758 ret = btrfs_punch_hole(trans, root, rec->ino,
3759 hole->start, hole->len);
3762 ret = del_file_extent_hole(&rec->holes, hole->start,
3766 if (RB_EMPTY_ROOT(&rec->holes))
3767 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768 node = rb_first(&rec->holes);
3770 /* special case for a file losing all its file extent */
3772 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773 round_up(rec->isize,
3774 root->fs_info->sectorsize));
3778 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779 rec->ino, root->objectid);
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3786 struct btrfs_trans_handle *trans;
3787 struct btrfs_path path;
3790 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791 I_ERR_NO_ORPHAN_ITEM |
3792 I_ERR_LINK_COUNT_WRONG |
3793 I_ERR_NO_INODE_ITEM |
3794 I_ERR_FILE_EXTENT_ORPHAN |
3795 I_ERR_FILE_EXTENT_DISCOUNT|
3796 I_ERR_FILE_NBYTES_WRONG)))
3800 * For nlink repair, it may create a dir and add link, so
3801 * 2 for parent(256)'s dir_index and dir_item
3802 * 2 for lost+found dir's inode_item and inode_ref
3803 * 1 for the new inode_ref of the file
3804 * 2 for lost+found dir's dir_index and dir_item for the file
3806 trans = btrfs_start_transaction(root, 7);
3808 return PTR_ERR(trans);
3810 btrfs_init_path(&path);
3811 if (rec->errors & I_ERR_NO_INODE_ITEM)
3812 ret = repair_inode_no_item(trans, root, &path, rec);
3813 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818 ret = repair_inode_isize(trans, root, &path, rec);
3819 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822 ret = repair_inode_nlinks(trans, root, &path, rec);
3823 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824 ret = repair_inode_nbytes(trans, root, &path, rec);
3825 btrfs_commit_transaction(trans, root);
3826 btrfs_release_path(&path);
3830 static int check_inode_recs(struct btrfs_root *root,
3831 struct cache_tree *inode_cache)
3833 struct cache_extent *cache;
3834 struct ptr_node *node;
3835 struct inode_record *rec;
3836 struct inode_backref *backref;
3841 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3843 if (btrfs_root_refs(&root->root_item) == 0) {
3844 if (!cache_tree_empty(inode_cache))
3845 fprintf(stderr, "warning line %d\n", __LINE__);
3850 * We need to repair backrefs first because we could change some of the
3851 * errors in the inode recs.
3853 * We also need to go through and delete invalid backrefs first and then
3854 * add the correct ones second. We do this because we may get EEXIST
3855 * when adding back the correct index because we hadn't yet deleted the
3858 * For example, if we were missing a dir index then the directories
3859 * isize would be wrong, so if we fixed the isize to what we thought it
3860 * would be and then fixed the backref we'd still have a invalid fs, so
3861 * we need to add back the dir index and then check to see if the isize
3866 if (stage == 3 && !err)
3869 cache = search_cache_extent(inode_cache, 0);
3870 while (repair && cache) {
3871 node = container_of(cache, struct ptr_node, cache);
3873 cache = next_cache_extent(cache);
3875 /* Need to free everything up and rescan */
3877 remove_cache_extent(inode_cache, &node->cache);
3879 free_inode_rec(rec);
3883 if (list_empty(&rec->backrefs))
3886 ret = repair_inode_backrefs(root, rec, inode_cache,
3900 rec = get_inode_rec(inode_cache, root_dirid, 0);
3901 BUG_ON(IS_ERR(rec));
3903 ret = check_root_dir(rec);
3905 fprintf(stderr, "root %llu root dir %llu error\n",
3906 (unsigned long long)root->root_key.objectid,
3907 (unsigned long long)root_dirid);
3908 print_inode_error(root, rec);
3913 struct btrfs_trans_handle *trans;
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 err = PTR_ERR(trans);
3922 "root %llu missing its root dir, recreating\n",
3923 (unsigned long long)root->objectid);
3925 ret = btrfs_make_root_dir(trans, root, root_dirid);
3928 btrfs_commit_transaction(trans, root);
3932 fprintf(stderr, "root %llu root dir %llu not found\n",
3933 (unsigned long long)root->root_key.objectid,
3934 (unsigned long long)root_dirid);
3938 cache = search_cache_extent(inode_cache, 0);
3941 node = container_of(cache, struct ptr_node, cache);
3943 remove_cache_extent(inode_cache, &node->cache);
3945 if (rec->ino == root_dirid ||
3946 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947 free_inode_rec(rec);
3951 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952 ret = check_orphan_item(root, rec->ino);
3954 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955 if (can_free_inode_rec(rec)) {
3956 free_inode_rec(rec);
3961 if (!rec->found_inode_item)
3962 rec->errors |= I_ERR_NO_INODE_ITEM;
3963 if (rec->found_link != rec->nlink)
3964 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3966 ret = try_repair_inode(root, rec);
3967 if (ret == 0 && can_free_inode_rec(rec)) {
3968 free_inode_rec(rec);
3974 if (!(repair && ret == 0))
3976 print_inode_error(root, rec);
3977 list_for_each_entry(backref, &rec->backrefs, list) {
3978 if (!backref->found_dir_item)
3979 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980 if (!backref->found_dir_index)
3981 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982 if (!backref->found_inode_ref)
3983 backref->errors |= REF_ERR_NO_INODE_REF;
3984 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985 " namelen %u name %s filetype %d errors %x",
3986 (unsigned long long)backref->dir,
3987 (unsigned long long)backref->index,
3988 backref->namelen, backref->name,
3989 backref->filetype, backref->errors);
3990 print_ref_error(backref->errors);
3992 free_inode_rec(rec);
3994 return (error > 0) ? -1 : 0;
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4000 struct cache_extent *cache;
4001 struct root_record *rec = NULL;
4004 cache = lookup_cache_extent(root_cache, objectid, 1);
4006 rec = container_of(cache, struct root_record, cache);
4008 rec = calloc(1, sizeof(*rec));
4010 return ERR_PTR(-ENOMEM);
4011 rec->objectid = objectid;
4012 INIT_LIST_HEAD(&rec->backrefs);
4013 rec->cache.start = objectid;
4014 rec->cache.size = 1;
4016 ret = insert_cache_extent(root_cache, &rec->cache);
4018 return ERR_PTR(-EEXIST);
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024 u64 ref_root, u64 dir, u64 index,
4025 const char *name, int namelen)
4027 struct root_backref *backref;
4029 list_for_each_entry(backref, &rec->backrefs, list) {
4030 if (backref->ref_root != ref_root || backref->dir != dir ||
4031 backref->namelen != namelen)
4033 if (memcmp(name, backref->name, namelen))
4038 backref = calloc(1, sizeof(*backref) + namelen + 1);
4041 backref->ref_root = ref_root;
4043 backref->index = index;
4044 backref->namelen = namelen;
4045 memcpy(backref->name, name, namelen);
4046 backref->name[namelen] = '\0';
4047 list_add_tail(&backref->list, &rec->backrefs);
4051 static void free_root_record(struct cache_extent *cache)
4053 struct root_record *rec;
4054 struct root_backref *backref;
4056 rec = container_of(cache, struct root_record, cache);
4057 while (!list_empty(&rec->backrefs)) {
4058 backref = to_root_backref(rec->backrefs.next);
4059 list_del(&backref->list);
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4068 static int add_root_backref(struct cache_tree *root_cache,
4069 u64 root_id, u64 ref_root, u64 dir, u64 index,
4070 const char *name, int namelen,
4071 int item_type, int errors)
4073 struct root_record *rec;
4074 struct root_backref *backref;
4076 rec = get_root_rec(root_cache, root_id);
4077 BUG_ON(IS_ERR(rec));
4078 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4081 backref->errors |= errors;
4083 if (item_type != BTRFS_DIR_ITEM_KEY) {
4084 if (backref->found_dir_index || backref->found_back_ref ||
4085 backref->found_forward_ref) {
4086 if (backref->index != index)
4087 backref->errors |= REF_ERR_INDEX_UNMATCH;
4089 backref->index = index;
4093 if (item_type == BTRFS_DIR_ITEM_KEY) {
4094 if (backref->found_forward_ref)
4096 backref->found_dir_item = 1;
4097 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098 backref->found_dir_index = 1;
4099 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100 if (backref->found_forward_ref)
4101 backref->errors |= REF_ERR_DUP_ROOT_REF;
4102 else if (backref->found_dir_item)
4104 backref->found_forward_ref = 1;
4105 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106 if (backref->found_back_ref)
4107 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108 backref->found_back_ref = 1;
4113 if (backref->found_forward_ref && backref->found_dir_item)
4114 backref->reachable = 1;
4118 static int merge_root_recs(struct btrfs_root *root,
4119 struct cache_tree *src_cache,
4120 struct cache_tree *dst_cache)
4122 struct cache_extent *cache;
4123 struct ptr_node *node;
4124 struct inode_record *rec;
4125 struct inode_backref *backref;
4128 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129 free_inode_recs_tree(src_cache);
4134 cache = search_cache_extent(src_cache, 0);
4137 node = container_of(cache, struct ptr_node, cache);
4139 remove_cache_extent(src_cache, &node->cache);
4142 ret = is_child_root(root, root->objectid, rec->ino);
4148 list_for_each_entry(backref, &rec->backrefs, list) {
4149 BUG_ON(backref->found_inode_ref);
4150 if (backref->found_dir_item)
4151 add_root_backref(dst_cache, rec->ino,
4152 root->root_key.objectid, backref->dir,
4153 backref->index, backref->name,
4154 backref->namelen, BTRFS_DIR_ITEM_KEY,
4156 if (backref->found_dir_index)
4157 add_root_backref(dst_cache, rec->ino,
4158 root->root_key.objectid, backref->dir,
4159 backref->index, backref->name,
4160 backref->namelen, BTRFS_DIR_INDEX_KEY,
4164 free_inode_rec(rec);
4171 static int check_root_refs(struct btrfs_root *root,
4172 struct cache_tree *root_cache)
4174 struct root_record *rec;
4175 struct root_record *ref_root;
4176 struct root_backref *backref;
4177 struct cache_extent *cache;
4183 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184 BUG_ON(IS_ERR(rec));
4187 /* fixme: this can not detect circular references */
4190 cache = search_cache_extent(root_cache, 0);
4194 rec = container_of(cache, struct root_record, cache);
4195 cache = next_cache_extent(cache);
4197 if (rec->found_ref == 0)
4200 list_for_each_entry(backref, &rec->backrefs, list) {
4201 if (!backref->reachable)
4204 ref_root = get_root_rec(root_cache,
4206 BUG_ON(IS_ERR(ref_root));
4207 if (ref_root->found_ref > 0)
4210 backref->reachable = 0;
4212 if (rec->found_ref == 0)
4218 cache = search_cache_extent(root_cache, 0);
4222 rec = container_of(cache, struct root_record, cache);
4223 cache = next_cache_extent(cache);
4225 if (rec->found_ref == 0 &&
4226 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228 ret = check_orphan_item(root->fs_info->tree_root,
4234 * If we don't have a root item then we likely just have
4235 * a dir item in a snapshot for this root but no actual
4236 * ref key or anything so it's meaningless.
4238 if (!rec->found_root_item)
4241 fprintf(stderr, "fs tree %llu not referenced\n",
4242 (unsigned long long)rec->objectid);
4246 if (rec->found_ref > 0 && !rec->found_root_item)
4248 list_for_each_entry(backref, &rec->backrefs, list) {
4249 if (!backref->found_dir_item)
4250 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251 if (!backref->found_dir_index)
4252 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253 if (!backref->found_back_ref)
4254 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255 if (!backref->found_forward_ref)
4256 backref->errors |= REF_ERR_NO_ROOT_REF;
4257 if (backref->reachable && backref->errors)
4264 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265 (unsigned long long)rec->objectid, rec->found_ref,
4266 rec->found_root_item ? "" : "not found");
4268 list_for_each_entry(backref, &rec->backrefs, list) {
4269 if (!backref->reachable)
4271 if (!backref->errors && rec->found_root_item)
4273 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274 " index %llu namelen %u name %s errors %x\n",
4275 (unsigned long long)backref->ref_root,
4276 (unsigned long long)backref->dir,
4277 (unsigned long long)backref->index,
4278 backref->namelen, backref->name,
4280 print_ref_error(backref->errors);
4283 return errors > 0 ? 1 : 0;
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287 struct btrfs_key *key,
4288 struct cache_tree *root_cache)
4294 struct btrfs_root_ref *ref;
4295 char namebuf[BTRFS_NAME_LEN];
4298 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4300 dirid = btrfs_root_ref_dirid(eb, ref);
4301 index = btrfs_root_ref_sequence(eb, ref);
4302 name_len = btrfs_root_ref_name_len(eb, ref);
4304 if (name_len <= BTRFS_NAME_LEN) {
4308 len = BTRFS_NAME_LEN;
4309 error = REF_ERR_NAME_TOO_LONG;
4311 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4313 if (key->type == BTRFS_ROOT_REF_KEY) {
4314 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315 index, namebuf, len, key->type, error);
4317 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318 index, namebuf, len, key->type, error);
4323 static void free_corrupt_block(struct cache_extent *cache)
4325 struct btrfs_corrupt_block *corrupt;
4327 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4334 * Repair the btree of the given root.
4336 * The fix is to remove the node key in corrupt_blocks cache_tree.
4337 * and rebalance the tree.
4338 * After the fix, the btree should be writeable.
4340 static int repair_btree(struct btrfs_root *root,
4341 struct cache_tree *corrupt_blocks)
4343 struct btrfs_trans_handle *trans;
4344 struct btrfs_path path;
4345 struct btrfs_corrupt_block *corrupt;
4346 struct cache_extent *cache;
4347 struct btrfs_key key;
4352 if (cache_tree_empty(corrupt_blocks))
4355 trans = btrfs_start_transaction(root, 1);
4356 if (IS_ERR(trans)) {
4357 ret = PTR_ERR(trans);
4358 fprintf(stderr, "Error starting transaction: %s\n",
4362 btrfs_init_path(&path);
4363 cache = first_cache_extent(corrupt_blocks);
4365 corrupt = container_of(cache, struct btrfs_corrupt_block,
4367 level = corrupt->level;
4368 path.lowest_level = level;
4369 key.objectid = corrupt->key.objectid;
4370 key.type = corrupt->key.type;
4371 key.offset = corrupt->key.offset;
4374 * Here we don't want to do any tree balance, since it may
4375 * cause a balance with corrupted brother leaf/node,
4376 * so ins_len set to 0 here.
4377 * Balance will be done after all corrupt node/leaf is deleted.
4379 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4382 offset = btrfs_node_blockptr(path.nodes[level],
4385 /* Remove the ptr */
4386 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4390 * Remove the corresponding extent
4391 * return value is not concerned.
4393 btrfs_release_path(&path);
4394 ret = btrfs_free_extent(trans, root, offset,
4395 root->fs_info->nodesize, 0,
4396 root->root_key.objectid, level - 1, 0);
4397 cache = next_cache_extent(cache);
4400 /* Balance the btree using btrfs_search_slot() */
4401 cache = first_cache_extent(corrupt_blocks);
4403 corrupt = container_of(cache, struct btrfs_corrupt_block,
4405 memcpy(&key, &corrupt->key, sizeof(key));
4406 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4409 /* return will always >0 since it won't find the item */
4411 btrfs_release_path(&path);
4412 cache = next_cache_extent(cache);
4415 btrfs_commit_transaction(trans, root);
4416 btrfs_release_path(&path);
4420 static int check_fs_root(struct btrfs_root *root,
4421 struct cache_tree *root_cache,
4422 struct walk_control *wc)
4428 struct btrfs_path path;
4429 struct shared_node root_node;
4430 struct root_record *rec;
4431 struct btrfs_root_item *root_item = &root->root_item;
4432 struct cache_tree corrupt_blocks;
4433 struct orphan_data_extent *orphan;
4434 struct orphan_data_extent *tmp;
4435 enum btrfs_tree_block_status status;
4436 struct node_refs nrefs;
4439 * Reuse the corrupt_block cache tree to record corrupted tree block
4441 * Unlike the usage in extent tree check, here we do it in a per
4442 * fs/subvol tree base.
4444 cache_tree_init(&corrupt_blocks);
4445 root->fs_info->corrupt_blocks = &corrupt_blocks;
4447 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448 rec = get_root_rec(root_cache, root->root_key.objectid);
4449 BUG_ON(IS_ERR(rec));
4450 if (btrfs_root_refs(root_item) > 0)
4451 rec->found_root_item = 1;
4454 btrfs_init_path(&path);
4455 memset(&root_node, 0, sizeof(root_node));
4456 cache_tree_init(&root_node.root_cache);
4457 cache_tree_init(&root_node.inode_cache);
4458 memset(&nrefs, 0, sizeof(nrefs));
4460 /* Move the orphan extent record to corresponding inode_record */
4461 list_for_each_entry_safe(orphan, tmp,
4462 &root->orphan_data_extents, list) {
4463 struct inode_record *inode;
4465 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4467 BUG_ON(IS_ERR(inode));
4468 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469 list_move(&orphan->list, &inode->orphan_extents);
4472 level = btrfs_header_level(root->node);
4473 memset(wc->nodes, 0, sizeof(wc->nodes));
4474 wc->nodes[level] = &root_node;
4475 wc->active_node = level;
4476 wc->root_level = level;
4478 /* We may not have checked the root block, lets do that now */
4479 if (btrfs_is_leaf(root->node))
4480 status = btrfs_check_leaf(root, NULL, root->node);
4482 status = btrfs_check_node(root, NULL, root->node);
4483 if (status != BTRFS_TREE_BLOCK_CLEAN)
4486 if (btrfs_root_refs(root_item) > 0 ||
4487 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488 path.nodes[level] = root->node;
4489 extent_buffer_get(root->node);
4490 path.slots[level] = 0;
4492 struct btrfs_key key;
4493 struct btrfs_disk_key found_key;
4495 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496 level = root_item->drop_level;
4497 path.lowest_level = level;
4498 if (level > btrfs_header_level(root->node) ||
4499 level >= BTRFS_MAX_LEVEL) {
4500 error("ignoring invalid drop level: %u", level);
4503 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4506 btrfs_node_key(path.nodes[level], &found_key,
4508 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509 sizeof(found_key)));
4513 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519 wret = walk_up_tree(root, &path, wc, &level);
4526 btrfs_release_path(&path);
4528 if (!cache_tree_empty(&corrupt_blocks)) {
4529 struct cache_extent *cache;
4530 struct btrfs_corrupt_block *corrupt;
4532 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533 root->root_key.objectid);
4534 cache = first_cache_extent(&corrupt_blocks);
4536 corrupt = container_of(cache,
4537 struct btrfs_corrupt_block,
4539 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540 cache->start, corrupt->level,
4541 corrupt->key.objectid, corrupt->key.type,
4542 corrupt->key.offset);
4543 cache = next_cache_extent(cache);
4546 printf("Try to repair the btree for root %llu\n",
4547 root->root_key.objectid);
4548 ret = repair_btree(root, &corrupt_blocks);
4550 fprintf(stderr, "Failed to repair btree: %s\n",
4553 printf("Btree for root %llu is fixed\n",
4554 root->root_key.objectid);
4558 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4562 if (root_node.current) {
4563 root_node.current->checked = 1;
4564 maybe_free_inode_rec(&root_node.inode_cache,
4568 err = check_inode_recs(root, &root_node.inode_cache);
4572 free_corrupt_blocks_tree(&corrupt_blocks);
4573 root->fs_info->corrupt_blocks = NULL;
4574 free_orphan_data_extents(&root->orphan_data_extents);
4578 static int fs_root_objectid(u64 objectid)
4580 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4583 return is_fstree(objectid);
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587 struct cache_tree *root_cache)
4589 struct btrfs_path path;
4590 struct btrfs_key key;
4591 struct walk_control wc;
4592 struct extent_buffer *leaf, *tree_node;
4593 struct btrfs_root *tmp_root;
4594 struct btrfs_root *tree_root = fs_info->tree_root;
4598 if (ctx.progress_enabled) {
4599 ctx.tp = TASK_FS_ROOTS;
4600 task_start(ctx.info);
4604 * Just in case we made any changes to the extent tree that weren't
4605 * reflected into the free space cache yet.
4608 reset_cached_block_groups(fs_info);
4609 memset(&wc, 0, sizeof(wc));
4610 cache_tree_init(&wc.shared);
4611 btrfs_init_path(&path);
4616 key.type = BTRFS_ROOT_ITEM_KEY;
4617 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4622 tree_node = tree_root->node;
4624 if (tree_node != tree_root->node) {
4625 free_root_recs_tree(root_cache);
4626 btrfs_release_path(&path);
4629 leaf = path.nodes[0];
4630 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631 ret = btrfs_next_leaf(tree_root, &path);
4637 leaf = path.nodes[0];
4639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641 fs_root_objectid(key.objectid)) {
4642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643 tmp_root = btrfs_read_fs_root_no_cache(
4646 key.offset = (u64)-1;
4647 tmp_root = btrfs_read_fs_root(
4650 if (IS_ERR(tmp_root)) {
4654 ret = check_fs_root(tmp_root, root_cache, &wc);
4655 if (ret == -EAGAIN) {
4656 free_root_recs_tree(root_cache);
4657 btrfs_release_path(&path);
4662 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663 btrfs_free_fs_root(tmp_root);
4664 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665 key.type == BTRFS_ROOT_BACKREF_KEY) {
4666 process_root_ref(leaf, path.slots[0], &key,
4673 btrfs_release_path(&path);
4675 free_extent_cache_tree(&wc.shared);
4676 if (!cache_tree_empty(&wc.shared))
4677 fprintf(stderr, "warning line %d\n", __LINE__);
4679 task_stop(ctx.info);
4685 * Find the @index according by @ino and name.
4686 * Notice:time efficiency is O(N)
4688 * @root: the root of the fs/file tree
4689 * @index_ret: the index as return value
4690 * @namebuf: the name to match
4691 * @name_len: the length of name to match
4692 * @file_type: the file_type of INODE_ITEM to match
4694 * Returns 0 if found and *@index_ret will be modified with right value
4695 * Returns< 0 not found and *@index_ret will be (u64)-1
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698 u64 *index_ret, char *namebuf, u32 name_len,
4701 struct btrfs_path path;
4702 struct extent_buffer *node;
4703 struct btrfs_dir_item *di;
4704 struct btrfs_key key;
4705 struct btrfs_key location;
4706 char name[BTRFS_NAME_LEN] = {0};
4718 /* search from the last index */
4719 key.objectid = dirid;
4720 key.offset = (u64)-1;
4721 key.type = BTRFS_DIR_INDEX_KEY;
4723 btrfs_init_path(&path);
4724 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4729 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4732 *index_ret = (64)-1;
4735 /* Check whether inode_id/filetype/name match */
4736 node = path.nodes[0];
4737 slot = path.slots[0];
4738 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4742 len = btrfs_dir_name_len(node, di);
4743 data_len = btrfs_dir_data_len(node, di);
4745 btrfs_dir_item_key_to_cpu(node, di, &location);
4746 if (location.objectid != location_id ||
4747 location.type != BTRFS_INODE_ITEM_KEY ||
4748 location.offset != 0)
4751 filetype = btrfs_dir_type(node, di);
4752 if (file_type != filetype)
4755 if (len > BTRFS_NAME_LEN)
4756 len = BTRFS_NAME_LEN;
4758 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759 if (len != name_len || strncmp(namebuf, name, len))
4762 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763 *index_ret = key.offset;
4767 len += sizeof(*di) + data_len;
4768 di = (struct btrfs_dir_item *)((char *)di + len);
4774 btrfs_release_path(&path);
4779 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780 * INODE_REF/INODE_EXTREF match.
4782 * @root: the root of the fs/file tree
4783 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784 * value while find index
4785 * @location_key: location key of the struct btrfs_dir_item to match
4786 * @name: the name to match
4787 * @namelen: the length of name
4788 * @file_type: the type of file to math
4790 * Return 0 if no error occurred.
4791 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792 * DIR_ITEM/DIR_INDEX
4793 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794 * and DIR_ITEM/DIR_INDEX mismatch
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797 struct btrfs_key *location_key, char *name,
4798 u32 namelen, u8 file_type)
4800 struct btrfs_path path;
4801 struct extent_buffer *node;
4802 struct btrfs_dir_item *di;
4803 struct btrfs_key location;
4804 char namebuf[BTRFS_NAME_LEN] = {0};
4813 /* get the index by traversing all index */
4814 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815 ret = find_dir_index(root, key->objectid,
4816 location_key->objectid, &key->offset,
4817 name, namelen, file_type);
4819 ret = DIR_INDEX_MISSING;
4823 btrfs_init_path(&path);
4824 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4826 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4831 /* Check whether inode_id/filetype/name match */
4832 node = path.nodes[0];
4833 slot = path.slots[0];
4834 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835 total = btrfs_item_size_nr(node, slot);
4836 while (cur < total) {
4837 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4840 len = btrfs_dir_name_len(node, di);
4841 data_len = btrfs_dir_data_len(node, di);
4843 btrfs_dir_item_key_to_cpu(node, di, &location);
4844 if (location.objectid != location_key->objectid ||
4845 location.type != location_key->type ||
4846 location.offset != location_key->offset)
4849 filetype = btrfs_dir_type(node, di);
4850 if (file_type != filetype)
4853 if (len > BTRFS_NAME_LEN) {
4854 len = BTRFS_NAME_LEN;
4855 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4857 key->type == BTRFS_DIR_ITEM_KEY ?
4858 "DIR_ITEM" : "DIR_INDEX",
4859 key->objectid, key->offset, len);
4861 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4863 if (len != namelen || strncmp(namebuf, name, len))
4869 len += sizeof(*di) + data_len;
4870 di = (struct btrfs_dir_item *)((char *)di + len);
4875 btrfs_release_path(&path);
4880 * Prints inode ref error message
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883 u64 index, const char *namebuf, int name_len,
4884 u8 filetype, int err)
4889 /* root dir error */
4890 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4892 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893 root->objectid, key->objectid, key->offset, namebuf);
4898 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900 root->objectid, key->offset,
4901 btrfs_name_hash(namebuf, name_len),
4902 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4904 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906 root->objectid, key->offset, index,
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4912 * Insert the missing inode item.
4914 * Returns 0 means success.
4915 * Returns <0 means error.
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4920 struct btrfs_key key;
4921 struct btrfs_trans_handle *trans;
4922 struct btrfs_path path;
4926 key.type = BTRFS_INODE_ITEM_KEY;
4929 btrfs_init_path(&path);
4930 trans = btrfs_start_transaction(root, 1);
4931 if (IS_ERR(trans)) {
4936 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937 if (ret < 0 || !ret)
4940 /* insert inode item */
4941 create_inode_item_lowmem(trans, root, ino, filetype);
4944 btrfs_commit_transaction(trans, root);
4947 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948 root->objectid, ino);
4949 btrfs_release_path(&path);
4954 * The ternary means dir item, dir index and relative inode ref.
4955 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4958 * If two of three is missing or mismatched, delete the existing one.
4959 * If one of three is missing or mismatched, add the missing one.
4961 * returns 0 means success.
4962 * returns not 0 means on error;
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965 u64 index, char *name, int name_len, u8 filetype,
4968 struct btrfs_trans_handle *trans;
4973 * stage shall be one of following valild values:
4974 * 0: Fine, nothing to do.
4975 * 1: One of three is wrong, so add missing one.
4976 * 2: Two of three is wrong, so delete existed one.
4978 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4980 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4982 if (err & (INODE_REF_MISSING))
4985 /* stage must be smllarer than 3 */
4988 trans = btrfs_start_transaction(root, 1);
4990 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4995 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996 filetype, &index, 1, 1);
5000 btrfs_commit_transaction(trans, root);
5003 error("fail to repair inode %llu name %s filetype %u",
5004 ino, name, filetype);
5006 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007 stage == 2 ? "Delete" : "Add",
5008 ino, name, filetype);
5014 * Traverse the given INODE_REF and call find_dir_item() to find related
5015 * DIR_ITEM/DIR_INDEX.
5017 * @root: the root of the fs/file tree
5018 * @ref_key: the key of the INODE_REF
5019 * @path the path provides node and slot
5020 * @refs: the count of INODE_REF
5021 * @mode: the st_mode of INODE_ITEM
5022 * @name_ret: returns with the first ref's name
5023 * @name_len_ret: len of the name_ret
5025 * Return 0 if no error occurred.
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028 struct btrfs_path *path, char *name_ret,
5029 u32 *namelen_ret, u64 *refs_ret, int mode)
5031 struct btrfs_key key;
5032 struct btrfs_key location;
5033 struct btrfs_inode_ref *ref;
5034 struct extent_buffer *node;
5035 char namebuf[BTRFS_NAME_LEN] = {0};
5045 int need_research = 0;
5053 /* since after repair, path and the dir item may be changed */
5054 if (need_research) {
5056 btrfs_release_path(path);
5057 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058 /* the item was deleted, let path point to the last checked item */
5060 if (path->slots[0] == 0)
5061 btrfs_prev_leaf(root, path);
5069 location.objectid = ref_key->objectid;
5070 location.type = BTRFS_INODE_ITEM_KEY;
5071 location.offset = 0;
5072 node = path->nodes[0];
5073 slot = path->slots[0];
5075 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077 total = btrfs_item_size_nr(node, slot);
5080 /* Update inode ref count */
5083 index = btrfs_inode_ref_index(node, ref);
5084 name_len = btrfs_inode_ref_name_len(node, ref);
5086 if (name_len <= BTRFS_NAME_LEN) {
5089 len = BTRFS_NAME_LEN;
5090 warning("root %llu INODE_REF[%llu %llu] name too long",
5091 root->objectid, ref_key->objectid, ref_key->offset);
5094 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5096 /* copy the first name found to name_ret */
5097 if (refs == 1 && name_ret) {
5098 memcpy(name_ret, namebuf, len);
5102 /* Check root dir ref */
5103 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104 if (index != 0 || len != strlen("..") ||
5105 strncmp("..", namebuf, len) ||
5106 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107 /* set err bits then repair will delete the ref */
5108 err |= DIR_INDEX_MISSING;
5109 err |= DIR_ITEM_MISSING;
5114 /* Find related DIR_INDEX */
5115 key.objectid = ref_key->offset;
5116 key.type = BTRFS_DIR_INDEX_KEY;
5118 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119 imode_to_type(mode));
5121 /* Find related dir_item */
5122 key.objectid = ref_key->offset;
5123 key.type = BTRFS_DIR_ITEM_KEY;
5124 key.offset = btrfs_name_hash(namebuf, len);
5125 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126 imode_to_type(mode));
5128 if (tmp_err && repair) {
5129 ret = repair_ternary_lowmem(root, ref_key->offset,
5130 ref_key->objectid, index, namebuf,
5131 name_len, imode_to_type(mode),
5138 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139 imode_to_type(mode), tmp_err);
5141 len = sizeof(*ref) + name_len;
5142 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5153 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154 * DIR_ITEM/DIR_INDEX.
5156 * @root: the root of the fs/file tree
5157 * @ref_key: the key of the INODE_EXTREF
5158 * @refs: the count of INODE_EXTREF
5159 * @mode: the st_mode of INODE_ITEM
5161 * Return 0 if no error occurred.
5163 static int check_inode_extref(struct btrfs_root *root,
5164 struct btrfs_key *ref_key,
5165 struct extent_buffer *node, int slot, u64 *refs,
5168 struct btrfs_key key;
5169 struct btrfs_key location;
5170 struct btrfs_inode_extref *extref;
5171 char namebuf[BTRFS_NAME_LEN] = {0};
5181 location.objectid = ref_key->objectid;
5182 location.type = BTRFS_INODE_ITEM_KEY;
5183 location.offset = 0;
5185 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186 total = btrfs_item_size_nr(node, slot);
5189 /* update inode ref count */
5191 name_len = btrfs_inode_extref_name_len(node, extref);
5192 index = btrfs_inode_extref_index(node, extref);
5193 parent = btrfs_inode_extref_parent(node, extref);
5194 if (name_len <= BTRFS_NAME_LEN) {
5197 len = BTRFS_NAME_LEN;
5198 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199 root->objectid, ref_key->objectid, ref_key->offset);
5201 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5203 /* Check root dir ref name */
5204 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206 root->objectid, ref_key->objectid, ref_key->offset,
5208 err |= ROOT_DIR_ERROR;
5211 /* find related dir_index */
5212 key.objectid = parent;
5213 key.type = BTRFS_DIR_INDEX_KEY;
5215 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5218 /* find related dir_item */
5219 key.objectid = parent;
5220 key.type = BTRFS_DIR_ITEM_KEY;
5221 key.offset = btrfs_name_hash(namebuf, len);
5222 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5225 len = sizeof(*extref) + name_len;
5226 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5236 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237 * DIR_ITEM/DIR_INDEX match.
5238 * Return with @index_ret.
5240 * @root: the root of the fs/file tree
5241 * @key: the key of the INODE_REF/INODE_EXTREF
5242 * @name: the name in the INODE_REF/INODE_EXTREF
5243 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5244 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5245 * value (64)-1 means do not check index
5246 * @ext_ref: the EXTENDED_IREF feature
5248 * Return 0 if no error occurred.
5249 * Return >0 for error bitmap
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252 char *name, int namelen, u64 *index_ret,
5253 unsigned int ext_ref)
5255 struct btrfs_path path;
5256 struct btrfs_inode_ref *ref;
5257 struct btrfs_inode_extref *extref;
5258 struct extent_buffer *node;
5259 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5272 btrfs_init_path(&path);
5273 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5275 ret = INODE_REF_MISSING;
5279 node = path.nodes[0];
5280 slot = path.slots[0];
5282 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283 total = btrfs_item_size_nr(node, slot);
5285 /* Iterate all entry of INODE_REF */
5286 while (cur < total) {
5287 ret = INODE_REF_MISSING;
5289 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290 ref_index = btrfs_inode_ref_index(node, ref);
5291 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5294 if (cur + sizeof(*ref) + ref_namelen > total ||
5295 ref_namelen > BTRFS_NAME_LEN) {
5296 warning("root %llu INODE %s[%llu %llu] name too long",
5298 key->type == BTRFS_INODE_REF_KEY ?
5300 key->objectid, key->offset);
5302 if (cur + sizeof(*ref) > total)
5304 len = min_t(u32, total - cur - sizeof(*ref),
5310 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5313 if (len != namelen || strncmp(ref_namebuf, name, len))
5316 *index_ret = ref_index;
5320 len = sizeof(*ref) + ref_namelen;
5321 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5326 /* Skip if not support EXTENDED_IREF feature */
5330 btrfs_release_path(&path);
5331 btrfs_init_path(&path);
5333 dir_id = key->offset;
5334 key->type = BTRFS_INODE_EXTREF_KEY;
5335 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5337 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5339 ret = INODE_REF_MISSING;
5343 node = path.nodes[0];
5344 slot = path.slots[0];
5346 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5348 total = btrfs_item_size_nr(node, slot);
5350 /* Iterate all entry of INODE_EXTREF */
5351 while (cur < total) {
5352 ret = INODE_REF_MISSING;
5354 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355 ref_index = btrfs_inode_extref_index(node, extref);
5356 parent = btrfs_inode_extref_parent(node, extref);
5357 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5360 if (parent != dir_id)
5363 if (ref_namelen <= BTRFS_NAME_LEN) {
5366 len = BTRFS_NAME_LEN;
5367 warning("root %llu INODE %s[%llu %llu] name too long",
5369 key->type == BTRFS_INODE_REF_KEY ?
5371 key->objectid, key->offset);
5373 read_extent_buffer(node, ref_namebuf,
5374 (unsigned long)(extref + 1), len);
5376 if (len != namelen || strncmp(ref_namebuf, name, len))
5379 *index_ret = ref_index;
5384 len = sizeof(*extref) + ref_namelen;
5385 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5390 btrfs_release_path(&path);
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395 u64 ino, u64 index, const char *namebuf,
5396 int name_len, u8 filetype, int err)
5398 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400 root->objectid, key->objectid, key->offset, namebuf,
5402 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5405 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407 root->objectid, key->objectid, index, namebuf, filetype,
5408 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5411 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5413 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414 root->objectid, ino, index, namebuf, filetype,
5415 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5418 if (err & INODE_REF_MISSING)
5420 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421 root->objectid, ino, key->objectid, namebuf, filetype);
5426 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5428 * Returns error after repair
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431 u64 index, u8 filetype, char *namebuf, u32 name_len,
5436 if (err & INODE_ITEM_MISSING) {
5437 ret = repair_inode_item_missing(root, ino, filetype);
5439 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5442 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444 name_len, filetype, err);
5446 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448 err &= ~(INODE_REF_MISSING);
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5457 struct btrfs_key key;
5458 struct btrfs_path path;
5460 struct btrfs_dir_item *di;
5470 key.offset = (u64)-1;
5472 btrfs_init_path(&path);
5473 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5478 /* if found, go to spacial case */
5483 ret = btrfs_previous_item(root, &path, ino, type);
5491 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5493 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5495 while (cur < total) {
5496 len = btrfs_dir_name_len(path.nodes[0], di);
5497 if (len > BTRFS_NAME_LEN)
5498 len = BTRFS_NAME_LEN;
5501 len += btrfs_dir_data_len(path.nodes[0], di);
5503 di = (struct btrfs_dir_item *)((char *)di + len);
5509 btrfs_release_path(&path);
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5520 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5524 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5528 *size = item_size + index_size;
5532 error("failed to count root %llu INODE[%llu] root size",
5533 root->objectid, ino);
5538 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5541 * @root: the root of the fs/file tree
5542 * @key: the key of the INODE_REF/INODE_EXTREF
5544 * @size: the st_size of the INODE_ITEM
5545 * @ext_ref: the EXTENDED_IREF feature
5547 * Return 0 if no error occurred.
5548 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551 struct btrfs_path *path, u64 *size,
5552 unsigned int ext_ref)
5554 struct btrfs_dir_item *di;
5555 struct btrfs_inode_item *ii;
5556 struct btrfs_key key;
5557 struct btrfs_key location;
5558 struct extent_buffer *node;
5560 char namebuf[BTRFS_NAME_LEN] = {0};
5572 int need_research = 0;
5575 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576 * ignore index check.
5578 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579 index = di_key->offset;
5586 /* since after repair, path and the dir item may be changed */
5587 if (need_research) {
5589 err |= DIR_COUNT_AGAIN;
5590 btrfs_release_path(path);
5591 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592 /* the item was deleted, let path point the last checked item */
5594 if (path->slots[0] == 0)
5595 btrfs_prev_leaf(root, path);
5603 node = path->nodes[0];
5604 slot = path->slots[0];
5606 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607 total = btrfs_item_size_nr(node, slot);
5608 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5610 while (cur < total) {
5611 data_len = btrfs_dir_data_len(node, di);
5614 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5616 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617 di_key->objectid, di_key->offset, data_len);
5619 name_len = btrfs_dir_name_len(node, di);
5620 if (name_len <= BTRFS_NAME_LEN) {
5623 len = BTRFS_NAME_LEN;
5624 warning("root %llu %s[%llu %llu] name too long",
5626 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627 di_key->objectid, di_key->offset);
5629 (*size) += name_len;
5630 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5632 filetype = btrfs_dir_type(node, di);
5634 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635 di_key->offset != btrfs_name_hash(namebuf, len)) {
5637 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638 root->objectid, di_key->objectid, di_key->offset,
5639 namebuf, len, filetype, di_key->offset,
5640 btrfs_name_hash(namebuf, len));
5643 btrfs_dir_item_key_to_cpu(node, di, &location);
5644 /* Ignore related ROOT_ITEM check */
5645 if (location.type == BTRFS_ROOT_ITEM_KEY)
5648 btrfs_release_path(path);
5649 /* Check relative INODE_ITEM(existence/filetype) */
5650 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5652 tmp_err |= INODE_ITEM_MISSING;
5656 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657 struct btrfs_inode_item);
5658 mode = btrfs_inode_mode(path->nodes[0], ii);
5659 if (imode_to_type(mode) != filetype) {
5660 tmp_err |= INODE_ITEM_MISMATCH;
5664 /* Check relative INODE_REF/INODE_EXTREF */
5665 key.objectid = location.objectid;
5666 key.type = BTRFS_INODE_REF_KEY;
5667 key.offset = di_key->objectid;
5668 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5671 /* check relative INDEX/ITEM */
5672 key.objectid = di_key->objectid;
5673 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674 key.type = BTRFS_DIR_INDEX_KEY;
5677 key.type = BTRFS_DIR_ITEM_KEY;
5678 key.offset = btrfs_name_hash(namebuf, name_len);
5681 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682 name_len, filetype);
5683 /* find_dir_item may find index */
5684 if (key.type == BTRFS_DIR_INDEX_KEY)
5688 if (tmp_err && repair) {
5689 ret = repair_dir_item(root, di_key->objectid,
5690 location.objectid, index,
5691 imode_to_type(mode), namebuf,
5693 if (ret != tmp_err) {
5698 btrfs_release_path(path);
5699 print_dir_item_err(root, di_key, location.objectid, index,
5700 namebuf, name_len, filetype, tmp_err);
5702 len = sizeof(*di) + name_len + data_len;
5703 di = (struct btrfs_dir_item *)((char *)di + len);
5706 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708 root->objectid, di_key->objectid,
5715 btrfs_release_path(path);
5716 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5718 err |= ret > 0 ? -ENOENT : ret;
5723 * Wrapper function of btrfs_punch_hole.
5725 * Returns 0 means success.
5726 * Returns not 0 means error.
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5731 struct btrfs_trans_handle *trans;
5734 trans = btrfs_start_transaction(root, 1);
5736 return PTR_ERR(trans);
5738 ret = btrfs_punch_hole(trans, root, ino, start, len);
5740 error("failed to add hole [%llu, %llu] in inode [%llu]",
5743 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5746 btrfs_commit_transaction(trans, root);
5751 * Check file extent datasum/hole, update the size of the file extents,
5752 * check and update the last offset of the file extent.
5754 * @root: the root of fs/file tree.
5755 * @fkey: the key of the file extent.
5756 * @nodatasum: INODE_NODATASUM feature.
5757 * @size: the sum of all EXTENT_DATA items size for this inode.
5758 * @end: the offset of the last extent.
5760 * Return 0 if no error occurred.
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763 struct extent_buffer *node, int slot,
5764 unsigned int nodatasum, u64 *size, u64 *end)
5766 struct btrfs_file_extent_item *fi;
5769 u64 extent_num_bytes;
5771 u64 csum_found; /* In byte size, sectorsize aligned */
5772 u64 search_start; /* Logical range start we search for csum */
5773 u64 search_len; /* Logical range len we search for csum */
5774 unsigned int extent_type;
5775 unsigned int is_hole;
5780 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5782 /* Check inline extent */
5783 extent_type = btrfs_file_extent_type(node, fi);
5784 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785 struct btrfs_item *e = btrfs_item_nr(slot);
5786 u32 item_inline_len;
5788 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790 compressed = btrfs_file_extent_compression(node, fi);
5791 if (extent_num_bytes == 0) {
5793 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794 root->objectid, fkey->objectid, fkey->offset);
5795 err |= FILE_EXTENT_ERROR;
5797 if (!compressed && extent_num_bytes != item_inline_len) {
5799 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800 root->objectid, fkey->objectid, fkey->offset,
5801 extent_num_bytes, item_inline_len);
5802 err |= FILE_EXTENT_ERROR;
5804 *end += extent_num_bytes;
5805 *size += extent_num_bytes;
5809 /* Check extent type */
5810 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812 err |= FILE_EXTENT_ERROR;
5813 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814 root->objectid, fkey->objectid, fkey->offset);
5818 /* Check REG_EXTENT/PREALLOC_EXTENT */
5819 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822 extent_offset = btrfs_file_extent_offset(node, fi);
5823 compressed = btrfs_file_extent_compression(node, fi);
5824 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5827 * Check EXTENT_DATA csum
5829 * For plain (uncompressed) extent, we should only check the range
5830 * we're referring to, as it's possible that part of prealloc extent
5831 * has been written, and has csum:
5833 * |<--- Original large preallocated extent A ---->|
5834 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5837 * For compressed extent, we should check the whole range.
5840 search_start = disk_bytenr + extent_offset;
5841 search_len = extent_num_bytes;
5843 search_start = disk_bytenr;
5844 search_len = disk_num_bytes;
5846 ret = count_csum_range(root, search_start, search_len, &csum_found);
5847 if (csum_found > 0 && nodatasum) {
5848 err |= ODD_CSUM_ITEM;
5849 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850 root->objectid, fkey->objectid, fkey->offset);
5851 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852 !is_hole && (ret < 0 || csum_found < search_len)) {
5853 err |= CSUM_ITEM_MISSING;
5854 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855 root->objectid, fkey->objectid, fkey->offset,
5856 csum_found, search_len);
5857 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858 err |= ODD_CSUM_ITEM;
5859 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860 root->objectid, fkey->objectid, fkey->offset, csum_found);
5863 /* Check EXTENT_DATA hole */
5864 if (!no_holes && *end != fkey->offset) {
5866 ret = punch_extent_hole(root, fkey->objectid,
5867 *end, fkey->offset - *end);
5868 if (!repair || ret) {
5869 err |= FILE_EXTENT_ERROR;
5870 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5871 root->objectid, fkey->objectid, fkey->offset);
5875 *end += extent_num_bytes;
5877 *size += extent_num_bytes;
5883 * Set inode item nbytes to @nbytes
5885 * Returns 0 on success
5886 * Returns != 0 on error
5888 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5889 struct btrfs_path *path,
5890 u64 ino, u64 nbytes)
5892 struct btrfs_trans_handle *trans;
5893 struct btrfs_inode_item *ii;
5894 struct btrfs_key key;
5895 struct btrfs_key research_key;
5899 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5902 key.type = BTRFS_INODE_ITEM_KEY;
5905 trans = btrfs_start_transaction(root, 1);
5906 if (IS_ERR(trans)) {
5907 ret = PTR_ERR(trans);
5912 btrfs_release_path(path);
5913 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5921 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5922 struct btrfs_inode_item);
5923 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5924 btrfs_mark_buffer_dirty(path->nodes[0]);
5926 btrfs_commit_transaction(trans, root);
5929 error("failed to set nbytes in inode %llu root %llu",
5930 ino, root->root_key.objectid);
5932 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5933 root->root_key.objectid, nbytes);
5936 btrfs_release_path(path);
5937 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5944 * Set directory inode isize to @isize.
5946 * Returns 0 on success.
5947 * Returns != 0 on error.
5949 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5950 struct btrfs_path *path,
5953 struct btrfs_trans_handle *trans;
5954 struct btrfs_inode_item *ii;
5955 struct btrfs_key key;
5956 struct btrfs_key research_key;
5960 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5963 key.type = BTRFS_INODE_ITEM_KEY;
5966 trans = btrfs_start_transaction(root, 1);
5967 if (IS_ERR(trans)) {
5968 ret = PTR_ERR(trans);
5973 btrfs_release_path(path);
5974 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5982 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5983 struct btrfs_inode_item);
5984 btrfs_set_inode_size(path->nodes[0], ii, isize);
5985 btrfs_mark_buffer_dirty(path->nodes[0]);
5987 btrfs_commit_transaction(trans, root);
5990 error("failed to set isize in inode %llu root %llu",
5991 ino, root->root_key.objectid);
5993 printf("Set isize in inode %llu root %llu to %llu\n",
5994 ino, root->root_key.objectid, isize);
5996 btrfs_release_path(path);
5997 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6004 * Wrapper function for btrfs_add_orphan_item().
6006 * Returns 0 on success.
6007 * Returns != 0 on error.
6009 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6010 struct btrfs_path *path, u64 ino)
6012 struct btrfs_trans_handle *trans;
6013 struct btrfs_key research_key;
6017 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6019 trans = btrfs_start_transaction(root, 1);
6020 if (IS_ERR(trans)) {
6021 ret = PTR_ERR(trans);
6026 btrfs_release_path(path);
6027 ret = btrfs_add_orphan_item(trans, root, path, ino);
6029 btrfs_commit_transaction(trans, root);
6032 error("failed to add inode %llu as orphan item root %llu",
6033 ino, root->root_key.objectid);
6035 printf("Added inode %llu as orphan item root %llu\n",
6036 ino, root->root_key.objectid);
6038 btrfs_release_path(path);
6039 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6045 /* Set inode_item nlink to @ref_count.
6046 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6048 * Returns 0 on success
6050 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6051 struct btrfs_path *path, u64 ino,
6052 const char *name, u32 namelen,
6053 u64 ref_count, u8 filetype, u64 *nlink)
6055 struct btrfs_trans_handle *trans;
6056 struct btrfs_inode_item *ii;
6057 struct btrfs_key key;
6058 struct btrfs_key old_key;
6059 char namebuf[BTRFS_NAME_LEN] = {0};
6065 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6067 if (name && namelen) {
6068 ASSERT(namelen <= BTRFS_NAME_LEN);
6069 memcpy(namebuf, name, namelen);
6072 sprintf(namebuf, "%llu", ino);
6073 name_len = count_digits(ino);
6074 printf("Can't find file name for inode %llu, use %s instead\n",
6078 trans = btrfs_start_transaction(root, 1);
6079 if (IS_ERR(trans)) {
6080 ret = PTR_ERR(trans);
6084 btrfs_release_path(path);
6085 /* if refs is 0, put it into lostfound */
6086 if (ref_count == 0) {
6087 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6088 name_len, filetype, &ref_count);
6093 /* reset inode_item's nlink to ref_count */
6095 key.type = BTRFS_INODE_ITEM_KEY;
6098 btrfs_release_path(path);
6099 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6105 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6106 struct btrfs_inode_item);
6107 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6108 btrfs_mark_buffer_dirty(path->nodes[0]);
6113 btrfs_commit_transaction(trans, root);
6117 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6118 root->objectid, ino, namebuf, filetype);
6120 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6121 root->objectid, ino, namebuf, filetype);
6124 btrfs_release_path(path);
6125 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6132 * Check INODE_ITEM and related ITEMs (the same inode number)
6133 * 1. check link count
6134 * 2. check inode ref/extref
6135 * 3. check dir item/index
6137 * @ext_ref: the EXTENDED_IREF feature
6139 * Return 0 if no error occurred.
6140 * Return >0 for error or hit the traversal is done(by error bitmap)
6142 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6143 unsigned int ext_ref)
6145 struct extent_buffer *node;
6146 struct btrfs_inode_item *ii;
6147 struct btrfs_key key;
6148 struct btrfs_key last_key;
6157 u64 extent_size = 0;
6159 unsigned int nodatasum;
6163 char namebuf[BTRFS_NAME_LEN] = {0};
6166 node = path->nodes[0];
6167 slot = path->slots[0];
6169 btrfs_item_key_to_cpu(node, &key, slot);
6170 inode_id = key.objectid;
6172 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6173 ret = btrfs_next_item(root, path);
6179 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6180 isize = btrfs_inode_size(node, ii);
6181 nbytes = btrfs_inode_nbytes(node, ii);
6182 mode = btrfs_inode_mode(node, ii);
6183 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6184 nlink = btrfs_inode_nlink(node, ii);
6185 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6188 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6189 ret = btrfs_next_item(root, path);
6191 /* out will fill 'err' rusing current statistics */
6193 } else if (ret > 0) {
6198 node = path->nodes[0];
6199 slot = path->slots[0];
6200 btrfs_item_key_to_cpu(node, &key, slot);
6201 if (key.objectid != inode_id)
6205 case BTRFS_INODE_REF_KEY:
6206 ret = check_inode_ref(root, &key, path, namebuf,
6207 &name_len, &refs, mode);
6210 case BTRFS_INODE_EXTREF_KEY:
6211 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6212 warning("root %llu EXTREF[%llu %llu] isn't supported",
6213 root->objectid, key.objectid,
6215 ret = check_inode_extref(root, &key, node, slot, &refs,
6219 case BTRFS_DIR_ITEM_KEY:
6220 case BTRFS_DIR_INDEX_KEY:
6222 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6223 root->objectid, inode_id,
6224 imode_to_type(mode), key.objectid,
6227 ret = check_dir_item(root, &key, path, &size, ext_ref);
6230 case BTRFS_EXTENT_DATA_KEY:
6232 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6233 root->objectid, inode_id, key.objectid,
6236 ret = check_file_extent(root, &key, node, slot,
6237 nodatasum, &extent_size,
6241 case BTRFS_XATTR_ITEM_KEY:
6244 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6245 key.objectid, key.type, key.offset);
6250 if (err & LAST_ITEM) {
6251 btrfs_release_path(path);
6252 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6257 /* verify INODE_ITEM nlink/isize/nbytes */
6259 if (repair && (err & DIR_COUNT_AGAIN)) {
6260 err &= ~DIR_COUNT_AGAIN;
6261 count_dir_isize(root, inode_id, &size);
6264 if ((nlink != 1 || refs != 1) && repair) {
6265 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6266 namebuf, name_len, refs, imode_to_type(mode),
6271 err |= LINK_COUNT_ERROR;
6272 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6273 root->objectid, inode_id, nlink);
6277 * Just a warning, as dir inode nbytes is just an
6278 * instructive value.
6280 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6281 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6282 root->objectid, inode_id,
6283 root->fs_info->nodesize);
6286 if (isize != size) {
6288 ret = repair_dir_isize_lowmem(root, path,
6290 if (!repair || ret) {
6293 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6294 root->objectid, inode_id, isize, size);
6298 if (nlink != refs) {
6300 ret = repair_inode_nlinks_lowmem(root, path,
6301 inode_id, namebuf, name_len, refs,
6302 imode_to_type(mode), &nlink);
6303 if (!repair || ret) {
6304 err |= LINK_COUNT_ERROR;
6306 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6307 root->objectid, inode_id, nlink, refs);
6309 } else if (!nlink) {
6311 ret = repair_inode_orphan_item_lowmem(root,
6313 if (!repair || ret) {
6315 error("root %llu INODE[%llu] is orphan item",
6316 root->objectid, inode_id);
6320 if (!nbytes && !no_holes && extent_end < isize) {
6322 ret = punch_extent_hole(root, inode_id,
6323 extent_end, isize - extent_end);
6324 if (!repair || ret) {
6325 err |= NBYTES_ERROR;
6327 "root %llu INODE[%llu] size %llu should have a file extent hole",
6328 root->objectid, inode_id, isize);
6332 if (nbytes != extent_size) {
6334 ret = repair_inode_nbytes_lowmem(root, path,
6335 inode_id, extent_size);
6336 if (!repair || ret) {
6337 err |= NBYTES_ERROR;
6339 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6340 root->objectid, inode_id, nbytes,
6346 if (err & LAST_ITEM)
6347 btrfs_next_item(root, path);
6352 * Insert the missing inode item and inode ref.
6354 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6355 * Root dir should be handled specially because root dir is the root of fs.
6357 * returns err (>0 or 0) after repair
6359 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6361 struct btrfs_trans_handle *trans;
6362 struct btrfs_key key;
6363 struct btrfs_path path;
6364 int filetype = BTRFS_FT_DIR;
6367 btrfs_init_path(&path);
6369 if (err & INODE_REF_MISSING) {
6370 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6371 key.type = BTRFS_INODE_REF_KEY;
6372 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6374 trans = btrfs_start_transaction(root, 1);
6375 if (IS_ERR(trans)) {
6376 ret = PTR_ERR(trans);
6380 btrfs_release_path(&path);
6381 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6385 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6386 BTRFS_FIRST_FREE_OBJECTID,
6387 BTRFS_FIRST_FREE_OBJECTID, 0);
6391 printf("Add INODE_REF[%llu %llu] name %s\n",
6392 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6394 err &= ~INODE_REF_MISSING;
6397 error("fail to insert first inode's ref");
6398 btrfs_commit_transaction(trans, root);
6401 if (err & INODE_ITEM_MISSING) {
6402 ret = repair_inode_item_missing(root,
6403 BTRFS_FIRST_FREE_OBJECTID, filetype);
6406 err &= ~INODE_ITEM_MISSING;
6410 error("fail to repair first inode");
6411 btrfs_release_path(&path);
6416 * check first root dir's inode_item and inode_ref
6418 * returns 0 means no error
6419 * returns >0 means error
6420 * returns <0 means fatal error
6422 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6424 struct btrfs_path path;
6425 struct btrfs_key key;
6426 struct btrfs_inode_item *ii;
6432 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6433 key.type = BTRFS_INODE_ITEM_KEY;
6436 /* For root being dropped, we don't need to check first inode */
6437 if (btrfs_root_refs(&root->root_item) == 0 &&
6438 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6439 BTRFS_FIRST_FREE_OBJECTID)
6442 btrfs_init_path(&path);
6443 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6448 err |= INODE_ITEM_MISSING;
6450 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6451 struct btrfs_inode_item);
6452 mode = btrfs_inode_mode(path.nodes[0], ii);
6453 if (imode_to_type(mode) != BTRFS_FT_DIR)
6454 err |= INODE_ITEM_MISMATCH;
6457 /* lookup first inode ref */
6458 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6459 key.type = BTRFS_INODE_REF_KEY;
6460 /* special index value */
6463 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6469 btrfs_release_path(&path);
6472 err = repair_fs_first_inode(root, err);
6474 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6475 error("root dir INODE_ITEM is %s",
6476 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6477 if (err & INODE_REF_MISSING)
6478 error("root dir INODE_REF is missing");
6480 return ret < 0 ? ret : err;
6483 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6484 u64 parent, u64 root)
6486 struct rb_node *node;
6487 struct tree_backref *back = NULL;
6488 struct tree_backref match = {
6495 match.parent = parent;
6496 match.node.full_backref = 1;
6501 node = rb_search(&rec->backref_tree, &match.node.node,
6502 (rb_compare_keys)compare_extent_backref, NULL);
6504 back = to_tree_backref(rb_node_to_extent_backref(node));
6509 static struct data_backref *find_data_backref(struct extent_record *rec,
6510 u64 parent, u64 root,
6511 u64 owner, u64 offset,
6513 u64 disk_bytenr, u64 bytes)
6515 struct rb_node *node;
6516 struct data_backref *back = NULL;
6517 struct data_backref match = {
6524 .found_ref = found_ref,
6525 .disk_bytenr = disk_bytenr,
6529 match.parent = parent;
6530 match.node.full_backref = 1;
6535 node = rb_search(&rec->backref_tree, &match.node.node,
6536 (rb_compare_keys)compare_extent_backref, NULL);
6538 back = to_data_backref(rb_node_to_extent_backref(node));
6543 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6544 * blocks and integrity of fs tree items.
6546 * @root: the root of the tree to be checked.
6547 * @ext_ref feature EXTENDED_IREF is enable or not.
6548 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6549 * otherwise means check fs tree(s) items relationship and
6550 * @root MUST be a fs tree root.
6551 * Returns 0 represents OK.
6552 * Returns not 0 represents error.
6554 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6555 struct btrfs_root *root, unsigned int ext_ref,
6559 struct btrfs_path path;
6560 struct node_refs nrefs;
6561 struct btrfs_root_item *root_item = &root->root_item;
6566 memset(&nrefs, 0, sizeof(nrefs));
6569 * We need to manually check the first inode item (256)
6570 * As the following traversal function will only start from
6571 * the first inode item in the leaf, if inode item (256) is
6572 * missing we will skip it forever.
6574 ret = check_fs_first_inode(root, ext_ref);
6580 level = btrfs_header_level(root->node);
6581 btrfs_init_path(&path);
6583 if (btrfs_root_refs(root_item) > 0 ||
6584 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6585 path.nodes[level] = root->node;
6586 path.slots[level] = 0;
6587 extent_buffer_get(root->node);
6589 struct btrfs_key key;
6591 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6592 level = root_item->drop_level;
6593 path.lowest_level = level;
6594 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6601 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6602 ext_ref, check_all);
6606 /* if ret is negative, walk shall stop */
6612 ret = walk_up_tree_v2(root, &path, &level);
6614 /* Normal exit, reset ret to err */
6621 btrfs_release_path(&path);
6625 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6628 * Iterate all items in the tree and call check_inode_item() to check.
6630 * @root: the root of the tree to be checked.
6631 * @ext_ref: the EXTENDED_IREF feature
6633 * Return 0 if no error found.
6634 * Return <0 for error.
6636 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6638 reset_cached_block_groups(root->fs_info);
6639 return check_btrfs_root(NULL, root, ext_ref, 0);
6643 * Find the relative ref for root_ref and root_backref.
6645 * @root: the root of the root tree.
6646 * @ref_key: the key of the root ref.
6648 * Return 0 if no error occurred.
6650 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6651 struct extent_buffer *node, int slot)
6653 struct btrfs_path path;
6654 struct btrfs_key key;
6655 struct btrfs_root_ref *ref;
6656 struct btrfs_root_ref *backref;
6657 char ref_name[BTRFS_NAME_LEN] = {0};
6658 char backref_name[BTRFS_NAME_LEN] = {0};
6664 u32 backref_namelen;
6669 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6670 ref_dirid = btrfs_root_ref_dirid(node, ref);
6671 ref_seq = btrfs_root_ref_sequence(node, ref);
6672 ref_namelen = btrfs_root_ref_name_len(node, ref);
6674 if (ref_namelen <= BTRFS_NAME_LEN) {
6677 len = BTRFS_NAME_LEN;
6678 warning("%s[%llu %llu] ref_name too long",
6679 ref_key->type == BTRFS_ROOT_REF_KEY ?
6680 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6683 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6685 /* Find relative root_ref */
6686 key.objectid = ref_key->offset;
6687 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6688 key.offset = ref_key->objectid;
6690 btrfs_init_path(&path);
6691 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6693 err |= ROOT_REF_MISSING;
6694 error("%s[%llu %llu] couldn't find relative ref",
6695 ref_key->type == BTRFS_ROOT_REF_KEY ?
6696 "ROOT_REF" : "ROOT_BACKREF",
6697 ref_key->objectid, ref_key->offset);
6701 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6702 struct btrfs_root_ref);
6703 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6704 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6705 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6707 if (backref_namelen <= BTRFS_NAME_LEN) {
6708 len = backref_namelen;
6710 len = BTRFS_NAME_LEN;
6711 warning("%s[%llu %llu] ref_name too long",
6712 key.type == BTRFS_ROOT_REF_KEY ?
6713 "ROOT_REF" : "ROOT_BACKREF",
6714 key.objectid, key.offset);
6716 read_extent_buffer(path.nodes[0], backref_name,
6717 (unsigned long)(backref + 1), len);
6719 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6720 ref_namelen != backref_namelen ||
6721 strncmp(ref_name, backref_name, len)) {
6722 err |= ROOT_REF_MISMATCH;
6723 error("%s[%llu %llu] mismatch relative ref",
6724 ref_key->type == BTRFS_ROOT_REF_KEY ?
6725 "ROOT_REF" : "ROOT_BACKREF",
6726 ref_key->objectid, ref_key->offset);
6729 btrfs_release_path(&path);
6734 * Check all fs/file tree in low_memory mode.
6736 * 1. for fs tree root item, call check_fs_root_v2()
6737 * 2. for fs tree root ref/backref, call check_root_ref()
6739 * Return 0 if no error occurred.
6741 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6743 struct btrfs_root *tree_root = fs_info->tree_root;
6744 struct btrfs_root *cur_root = NULL;
6745 struct btrfs_path path;
6746 struct btrfs_key key;
6747 struct extent_buffer *node;
6748 unsigned int ext_ref;
6753 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6755 btrfs_init_path(&path);
6756 key.objectid = BTRFS_FS_TREE_OBJECTID;
6758 key.type = BTRFS_ROOT_ITEM_KEY;
6760 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6764 } else if (ret > 0) {
6770 node = path.nodes[0];
6771 slot = path.slots[0];
6772 btrfs_item_key_to_cpu(node, &key, slot);
6773 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6775 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6776 fs_root_objectid(key.objectid)) {
6777 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6778 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6781 key.offset = (u64)-1;
6782 cur_root = btrfs_read_fs_root(fs_info, &key);
6785 if (IS_ERR(cur_root)) {
6786 error("Fail to read fs/subvol tree: %lld",
6792 ret = check_fs_root_v2(cur_root, ext_ref);
6795 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6796 btrfs_free_fs_root(cur_root);
6797 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6798 key.type == BTRFS_ROOT_BACKREF_KEY) {
6799 ret = check_root_ref(tree_root, &key, node, slot);
6803 ret = btrfs_next_item(tree_root, &path);
6813 btrfs_release_path(&path);
6817 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6818 struct cache_tree *root_cache)
6822 if (!ctx.progress_enabled)
6823 fprintf(stderr, "checking fs roots\n");
6824 if (check_mode == CHECK_MODE_LOWMEM)
6825 ret = check_fs_roots_v2(fs_info);
6827 ret = check_fs_roots(fs_info, root_cache);
6832 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6834 struct extent_backref *back, *tmp;
6835 struct tree_backref *tback;
6836 struct data_backref *dback;
6840 rbtree_postorder_for_each_entry_safe(back, tmp,
6841 &rec->backref_tree, node) {
6842 if (!back->found_extent_tree) {
6846 if (back->is_data) {
6847 dback = to_data_backref(back);
6848 fprintf(stderr, "Data backref %llu %s %llu"
6849 " owner %llu offset %llu num_refs %lu"
6850 " not found in extent tree\n",
6851 (unsigned long long)rec->start,
6852 back->full_backref ?
6854 back->full_backref ?
6855 (unsigned long long)dback->parent:
6856 (unsigned long long)dback->root,
6857 (unsigned long long)dback->owner,
6858 (unsigned long long)dback->offset,
6859 (unsigned long)dback->num_refs);
6861 tback = to_tree_backref(back);
6862 fprintf(stderr, "Tree backref %llu parent %llu"
6863 " root %llu not found in extent tree\n",
6864 (unsigned long long)rec->start,
6865 (unsigned long long)tback->parent,
6866 (unsigned long long)tback->root);
6869 if (!back->is_data && !back->found_ref) {
6873 tback = to_tree_backref(back);
6874 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6875 (unsigned long long)rec->start,
6876 back->full_backref ? "parent" : "root",
6877 back->full_backref ?
6878 (unsigned long long)tback->parent :
6879 (unsigned long long)tback->root, back);
6881 if (back->is_data) {
6882 dback = to_data_backref(back);
6883 if (dback->found_ref != dback->num_refs) {
6887 fprintf(stderr, "Incorrect local backref count"
6888 " on %llu %s %llu owner %llu"
6889 " offset %llu found %u wanted %u back %p\n",
6890 (unsigned long long)rec->start,
6891 back->full_backref ?
6893 back->full_backref ?
6894 (unsigned long long)dback->parent:
6895 (unsigned long long)dback->root,
6896 (unsigned long long)dback->owner,
6897 (unsigned long long)dback->offset,
6898 dback->found_ref, dback->num_refs, back);
6900 if (dback->disk_bytenr != rec->start) {
6904 fprintf(stderr, "Backref disk bytenr does not"
6905 " match extent record, bytenr=%llu, "
6906 "ref bytenr=%llu\n",
6907 (unsigned long long)rec->start,
6908 (unsigned long long)dback->disk_bytenr);
6911 if (dback->bytes != rec->nr) {
6915 fprintf(stderr, "Backref bytes do not match "
6916 "extent backref, bytenr=%llu, ref "
6917 "bytes=%llu, backref bytes=%llu\n",
6918 (unsigned long long)rec->start,
6919 (unsigned long long)rec->nr,
6920 (unsigned long long)dback->bytes);
6923 if (!back->is_data) {
6926 dback = to_data_backref(back);
6927 found += dback->found_ref;
6930 if (found != rec->refs) {
6934 fprintf(stderr, "Incorrect global backref count "
6935 "on %llu found %llu wanted %llu\n",
6936 (unsigned long long)rec->start,
6937 (unsigned long long)found,
6938 (unsigned long long)rec->refs);
6944 static void __free_one_backref(struct rb_node *node)
6946 struct extent_backref *back = rb_node_to_extent_backref(node);
6951 static void free_all_extent_backrefs(struct extent_record *rec)
6953 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6956 static void free_extent_record_cache(struct cache_tree *extent_cache)
6958 struct cache_extent *cache;
6959 struct extent_record *rec;
6962 cache = first_cache_extent(extent_cache);
6965 rec = container_of(cache, struct extent_record, cache);
6966 remove_cache_extent(extent_cache, cache);
6967 free_all_extent_backrefs(rec);
6972 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6973 struct extent_record *rec)
6975 if (rec->content_checked && rec->owner_ref_checked &&
6976 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6977 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6978 !rec->bad_full_backref && !rec->crossing_stripes &&
6979 !rec->wrong_chunk_type) {
6980 remove_cache_extent(extent_cache, &rec->cache);
6981 free_all_extent_backrefs(rec);
6982 list_del_init(&rec->list);
6988 static int check_owner_ref(struct btrfs_root *root,
6989 struct extent_record *rec,
6990 struct extent_buffer *buf)
6992 struct extent_backref *node, *tmp;
6993 struct tree_backref *back;
6994 struct btrfs_root *ref_root;
6995 struct btrfs_key key;
6996 struct btrfs_path path;
6997 struct extent_buffer *parent;
7002 rbtree_postorder_for_each_entry_safe(node, tmp,
7003 &rec->backref_tree, node) {
7006 if (!node->found_ref)
7008 if (node->full_backref)
7010 back = to_tree_backref(node);
7011 if (btrfs_header_owner(buf) == back->root)
7014 BUG_ON(rec->is_root);
7016 /* try to find the block by search corresponding fs tree */
7017 key.objectid = btrfs_header_owner(buf);
7018 key.type = BTRFS_ROOT_ITEM_KEY;
7019 key.offset = (u64)-1;
7021 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7022 if (IS_ERR(ref_root))
7025 level = btrfs_header_level(buf);
7027 btrfs_item_key_to_cpu(buf, &key, 0);
7029 btrfs_node_key_to_cpu(buf, &key, 0);
7031 btrfs_init_path(&path);
7032 path.lowest_level = level + 1;
7033 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7037 parent = path.nodes[level + 1];
7038 if (parent && buf->start == btrfs_node_blockptr(parent,
7039 path.slots[level + 1]))
7042 btrfs_release_path(&path);
7043 return found ? 0 : 1;
7046 static int is_extent_tree_record(struct extent_record *rec)
7048 struct extent_backref *node, *tmp;
7049 struct tree_backref *back;
7052 rbtree_postorder_for_each_entry_safe(node, tmp,
7053 &rec->backref_tree, node) {
7056 back = to_tree_backref(node);
7057 if (node->full_backref)
7059 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7066 static int record_bad_block_io(struct btrfs_fs_info *info,
7067 struct cache_tree *extent_cache,
7070 struct extent_record *rec;
7071 struct cache_extent *cache;
7072 struct btrfs_key key;
7074 cache = lookup_cache_extent(extent_cache, start, len);
7078 rec = container_of(cache, struct extent_record, cache);
7079 if (!is_extent_tree_record(rec))
7082 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7083 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7086 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7087 struct extent_buffer *buf, int slot)
7089 if (btrfs_header_level(buf)) {
7090 struct btrfs_key_ptr ptr1, ptr2;
7092 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7093 sizeof(struct btrfs_key_ptr));
7094 read_extent_buffer(buf, &ptr2,
7095 btrfs_node_key_ptr_offset(slot + 1),
7096 sizeof(struct btrfs_key_ptr));
7097 write_extent_buffer(buf, &ptr1,
7098 btrfs_node_key_ptr_offset(slot + 1),
7099 sizeof(struct btrfs_key_ptr));
7100 write_extent_buffer(buf, &ptr2,
7101 btrfs_node_key_ptr_offset(slot),
7102 sizeof(struct btrfs_key_ptr));
7104 struct btrfs_disk_key key;
7105 btrfs_node_key(buf, &key, 0);
7106 btrfs_fixup_low_keys(root, path, &key,
7107 btrfs_header_level(buf) + 1);
7110 struct btrfs_item *item1, *item2;
7111 struct btrfs_key k1, k2;
7112 char *item1_data, *item2_data;
7113 u32 item1_offset, item2_offset, item1_size, item2_size;
7115 item1 = btrfs_item_nr(slot);
7116 item2 = btrfs_item_nr(slot + 1);
7117 btrfs_item_key_to_cpu(buf, &k1, slot);
7118 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7119 item1_offset = btrfs_item_offset(buf, item1);
7120 item2_offset = btrfs_item_offset(buf, item2);
7121 item1_size = btrfs_item_size(buf, item1);
7122 item2_size = btrfs_item_size(buf, item2);
7124 item1_data = malloc(item1_size);
7127 item2_data = malloc(item2_size);
7133 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7134 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7136 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7137 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7141 btrfs_set_item_offset(buf, item1, item2_offset);
7142 btrfs_set_item_offset(buf, item2, item1_offset);
7143 btrfs_set_item_size(buf, item1, item2_size);
7144 btrfs_set_item_size(buf, item2, item1_size);
7146 path->slots[0] = slot;
7147 btrfs_set_item_key_unsafe(root, path, &k2);
7148 path->slots[0] = slot + 1;
7149 btrfs_set_item_key_unsafe(root, path, &k1);
7154 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7156 struct extent_buffer *buf;
7157 struct btrfs_key k1, k2;
7159 int level = path->lowest_level;
7162 buf = path->nodes[level];
7163 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7165 btrfs_node_key_to_cpu(buf, &k1, i);
7166 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7168 btrfs_item_key_to_cpu(buf, &k1, i);
7169 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7171 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7173 ret = swap_values(root, path, buf, i);
7176 btrfs_mark_buffer_dirty(buf);
7182 static int delete_bogus_item(struct btrfs_root *root,
7183 struct btrfs_path *path,
7184 struct extent_buffer *buf, int slot)
7186 struct btrfs_key key;
7187 int nritems = btrfs_header_nritems(buf);
7189 btrfs_item_key_to_cpu(buf, &key, slot);
7191 /* These are all the keys we can deal with missing. */
7192 if (key.type != BTRFS_DIR_INDEX_KEY &&
7193 key.type != BTRFS_EXTENT_ITEM_KEY &&
7194 key.type != BTRFS_METADATA_ITEM_KEY &&
7195 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7196 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7199 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7200 (unsigned long long)key.objectid, key.type,
7201 (unsigned long long)key.offset, slot, buf->start);
7202 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7203 btrfs_item_nr_offset(slot + 1),
7204 sizeof(struct btrfs_item) *
7205 (nritems - slot - 1));
7206 btrfs_set_header_nritems(buf, nritems - 1);
7208 struct btrfs_disk_key disk_key;
7210 btrfs_item_key(buf, &disk_key, 0);
7211 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7213 btrfs_mark_buffer_dirty(buf);
7217 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7219 struct extent_buffer *buf;
7223 /* We should only get this for leaves */
7224 BUG_ON(path->lowest_level);
7225 buf = path->nodes[0];
7227 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7228 unsigned int shift = 0, offset;
7230 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7231 BTRFS_LEAF_DATA_SIZE(root)) {
7232 if (btrfs_item_end_nr(buf, i) >
7233 BTRFS_LEAF_DATA_SIZE(root)) {
7234 ret = delete_bogus_item(root, path, buf, i);
7237 fprintf(stderr, "item is off the end of the "
7238 "leaf, can't fix\n");
7242 shift = BTRFS_LEAF_DATA_SIZE(root) -
7243 btrfs_item_end_nr(buf, i);
7244 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7245 btrfs_item_offset_nr(buf, i - 1)) {
7246 if (btrfs_item_end_nr(buf, i) >
7247 btrfs_item_offset_nr(buf, i - 1)) {
7248 ret = delete_bogus_item(root, path, buf, i);
7251 fprintf(stderr, "items overlap, can't fix\n");
7255 shift = btrfs_item_offset_nr(buf, i - 1) -
7256 btrfs_item_end_nr(buf, i);
7261 printf("Shifting item nr %d by %u bytes in block %llu\n",
7262 i, shift, (unsigned long long)buf->start);
7263 offset = btrfs_item_offset_nr(buf, i);
7264 memmove_extent_buffer(buf,
7265 btrfs_leaf_data(buf) + offset + shift,
7266 btrfs_leaf_data(buf) + offset,
7267 btrfs_item_size_nr(buf, i));
7268 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7270 btrfs_mark_buffer_dirty(buf);
7274 * We may have moved things, in which case we want to exit so we don't
7275 * write those changes out. Once we have proper abort functionality in
7276 * progs this can be changed to something nicer.
7283 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7284 * then just return -EIO.
7286 static int try_to_fix_bad_block(struct btrfs_root *root,
7287 struct extent_buffer *buf,
7288 enum btrfs_tree_block_status status)
7290 struct btrfs_trans_handle *trans;
7291 struct ulist *roots;
7292 struct ulist_node *node;
7293 struct btrfs_root *search_root;
7294 struct btrfs_path path;
7295 struct ulist_iterator iter;
7296 struct btrfs_key root_key, key;
7299 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7300 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7303 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7307 btrfs_init_path(&path);
7308 ULIST_ITER_INIT(&iter);
7309 while ((node = ulist_next(roots, &iter))) {
7310 root_key.objectid = node->val;
7311 root_key.type = BTRFS_ROOT_ITEM_KEY;
7312 root_key.offset = (u64)-1;
7314 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7321 trans = btrfs_start_transaction(search_root, 0);
7322 if (IS_ERR(trans)) {
7323 ret = PTR_ERR(trans);
7327 path.lowest_level = btrfs_header_level(buf);
7328 path.skip_check_block = 1;
7329 if (path.lowest_level)
7330 btrfs_node_key_to_cpu(buf, &key, 0);
7332 btrfs_item_key_to_cpu(buf, &key, 0);
7333 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7336 btrfs_commit_transaction(trans, search_root);
7339 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7340 ret = fix_key_order(search_root, &path);
7341 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7342 ret = fix_item_offset(search_root, &path);
7344 btrfs_commit_transaction(trans, search_root);
7347 btrfs_release_path(&path);
7348 btrfs_commit_transaction(trans, search_root);
7351 btrfs_release_path(&path);
7355 static int check_block(struct btrfs_root *root,
7356 struct cache_tree *extent_cache,
7357 struct extent_buffer *buf, u64 flags)
7359 struct extent_record *rec;
7360 struct cache_extent *cache;
7361 struct btrfs_key key;
7362 enum btrfs_tree_block_status status;
7366 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7369 rec = container_of(cache, struct extent_record, cache);
7370 rec->generation = btrfs_header_generation(buf);
7372 level = btrfs_header_level(buf);
7373 if (btrfs_header_nritems(buf) > 0) {
7376 btrfs_item_key_to_cpu(buf, &key, 0);
7378 btrfs_node_key_to_cpu(buf, &key, 0);
7380 rec->info_objectid = key.objectid;
7382 rec->info_level = level;
7384 if (btrfs_is_leaf(buf))
7385 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7387 status = btrfs_check_node(root, &rec->parent_key, buf);
7389 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7391 status = try_to_fix_bad_block(root, buf, status);
7392 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7394 fprintf(stderr, "bad block %llu\n",
7395 (unsigned long long)buf->start);
7398 * Signal to callers we need to start the scan over
7399 * again since we'll have cowed blocks.
7404 rec->content_checked = 1;
7405 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7406 rec->owner_ref_checked = 1;
7408 ret = check_owner_ref(root, rec, buf);
7410 rec->owner_ref_checked = 1;
7414 maybe_free_extent_rec(extent_cache, rec);
7419 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7420 u64 parent, u64 root)
7422 struct list_head *cur = rec->backrefs.next;
7423 struct extent_backref *node;
7424 struct tree_backref *back;
7426 while(cur != &rec->backrefs) {
7427 node = to_extent_backref(cur);
7431 back = to_tree_backref(node);
7433 if (!node->full_backref)
7435 if (parent == back->parent)
7438 if (node->full_backref)
7440 if (back->root == root)
7448 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7449 u64 parent, u64 root)
7451 struct tree_backref *ref = malloc(sizeof(*ref));
7455 memset(&ref->node, 0, sizeof(ref->node));
7457 ref->parent = parent;
7458 ref->node.full_backref = 1;
7461 ref->node.full_backref = 0;
7468 static struct data_backref *find_data_backref(struct extent_record *rec,
7469 u64 parent, u64 root,
7470 u64 owner, u64 offset,
7472 u64 disk_bytenr, u64 bytes)
7474 struct list_head *cur = rec->backrefs.next;
7475 struct extent_backref *node;
7476 struct data_backref *back;
7478 while(cur != &rec->backrefs) {
7479 node = to_extent_backref(cur);
7483 back = to_data_backref(node);
7485 if (!node->full_backref)
7487 if (parent == back->parent)
7490 if (node->full_backref)
7492 if (back->root == root && back->owner == owner &&
7493 back->offset == offset) {
7494 if (found_ref && node->found_ref &&
7495 (back->bytes != bytes ||
7496 back->disk_bytenr != disk_bytenr))
7506 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7507 u64 parent, u64 root,
7508 u64 owner, u64 offset,
7511 struct data_backref *ref = malloc(sizeof(*ref));
7515 memset(&ref->node, 0, sizeof(ref->node));
7516 ref->node.is_data = 1;
7519 ref->parent = parent;
7522 ref->node.full_backref = 1;
7526 ref->offset = offset;
7527 ref->node.full_backref = 0;
7529 ref->bytes = max_size;
7532 if (max_size > rec->max_size)
7533 rec->max_size = max_size;
7537 /* Check if the type of extent matches with its chunk */
7538 static void check_extent_type(struct extent_record *rec)
7540 struct btrfs_block_group_cache *bg_cache;
7542 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7546 /* data extent, check chunk directly*/
7547 if (!rec->metadata) {
7548 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7549 rec->wrong_chunk_type = 1;
7553 /* metadata extent, check the obvious case first */
7554 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7555 BTRFS_BLOCK_GROUP_METADATA))) {
7556 rec->wrong_chunk_type = 1;
7561 * Check SYSTEM extent, as it's also marked as metadata, we can only
7562 * make sure it's a SYSTEM extent by its backref
7564 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7565 struct extent_backref *node;
7566 struct tree_backref *tback;
7569 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7570 if (node->is_data) {
7571 /* tree block shouldn't have data backref */
7572 rec->wrong_chunk_type = 1;
7575 tback = container_of(node, struct tree_backref, node);
7577 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7578 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7580 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7581 if (!(bg_cache->flags & bg_type))
7582 rec->wrong_chunk_type = 1;
7587 * Allocate a new extent record, fill default values from @tmpl and insert int
7588 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7589 * the cache, otherwise it fails.
7591 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7592 struct extent_record *tmpl)
7594 struct extent_record *rec;
7597 BUG_ON(tmpl->max_size == 0);
7598 rec = malloc(sizeof(*rec));
7601 rec->start = tmpl->start;
7602 rec->max_size = tmpl->max_size;
7603 rec->nr = max(tmpl->nr, tmpl->max_size);
7604 rec->found_rec = tmpl->found_rec;
7605 rec->content_checked = tmpl->content_checked;
7606 rec->owner_ref_checked = tmpl->owner_ref_checked;
7607 rec->num_duplicates = 0;
7608 rec->metadata = tmpl->metadata;
7609 rec->flag_block_full_backref = FLAG_UNSET;
7610 rec->bad_full_backref = 0;
7611 rec->crossing_stripes = 0;
7612 rec->wrong_chunk_type = 0;
7613 rec->is_root = tmpl->is_root;
7614 rec->refs = tmpl->refs;
7615 rec->extent_item_refs = tmpl->extent_item_refs;
7616 rec->parent_generation = tmpl->parent_generation;
7617 INIT_LIST_HEAD(&rec->backrefs);
7618 INIT_LIST_HEAD(&rec->dups);
7619 INIT_LIST_HEAD(&rec->list);
7620 rec->backref_tree = RB_ROOT;
7621 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7622 rec->cache.start = tmpl->start;
7623 rec->cache.size = tmpl->nr;
7624 ret = insert_cache_extent(extent_cache, &rec->cache);
7629 bytes_used += rec->nr;
7632 rec->crossing_stripes = check_crossing_stripes(global_info,
7633 rec->start, global_info->nodesize);
7634 check_extent_type(rec);
7639 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7641 * - refs - if found, increase refs
7642 * - is_root - if found, set
7643 * - content_checked - if found, set
7644 * - owner_ref_checked - if found, set
7646 * If not found, create a new one, initialize and insert.
7648 static int add_extent_rec(struct cache_tree *extent_cache,
7649 struct extent_record *tmpl)
7651 struct extent_record *rec;
7652 struct cache_extent *cache;
7656 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7658 rec = container_of(cache, struct extent_record, cache);
7662 rec->nr = max(tmpl->nr, tmpl->max_size);
7665 * We need to make sure to reset nr to whatever the extent
7666 * record says was the real size, this way we can compare it to
7669 if (tmpl->found_rec) {
7670 if (tmpl->start != rec->start || rec->found_rec) {
7671 struct extent_record *tmp;
7674 if (list_empty(&rec->list))
7675 list_add_tail(&rec->list,
7676 &duplicate_extents);
7679 * We have to do this song and dance in case we
7680 * find an extent record that falls inside of
7681 * our current extent record but does not have
7682 * the same objectid.
7684 tmp = malloc(sizeof(*tmp));
7687 tmp->start = tmpl->start;
7688 tmp->max_size = tmpl->max_size;
7691 tmp->metadata = tmpl->metadata;
7692 tmp->extent_item_refs = tmpl->extent_item_refs;
7693 INIT_LIST_HEAD(&tmp->list);
7694 list_add_tail(&tmp->list, &rec->dups);
7695 rec->num_duplicates++;
7702 if (tmpl->extent_item_refs && !dup) {
7703 if (rec->extent_item_refs) {
7704 fprintf(stderr, "block %llu rec "
7705 "extent_item_refs %llu, passed %llu\n",
7706 (unsigned long long)tmpl->start,
7707 (unsigned long long)
7708 rec->extent_item_refs,
7709 (unsigned long long)tmpl->extent_item_refs);
7711 rec->extent_item_refs = tmpl->extent_item_refs;
7715 if (tmpl->content_checked)
7716 rec->content_checked = 1;
7717 if (tmpl->owner_ref_checked)
7718 rec->owner_ref_checked = 1;
7719 memcpy(&rec->parent_key, &tmpl->parent_key,
7720 sizeof(tmpl->parent_key));
7721 if (tmpl->parent_generation)
7722 rec->parent_generation = tmpl->parent_generation;
7723 if (rec->max_size < tmpl->max_size)
7724 rec->max_size = tmpl->max_size;
7727 * A metadata extent can't cross stripe_len boundary, otherwise
7728 * kernel scrub won't be able to handle it.
7729 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7733 rec->crossing_stripes = check_crossing_stripes(
7734 global_info, rec->start,
7735 global_info->nodesize);
7736 check_extent_type(rec);
7737 maybe_free_extent_rec(extent_cache, rec);
7741 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7746 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7747 u64 parent, u64 root, int found_ref)
7749 struct extent_record *rec;
7750 struct tree_backref *back;
7751 struct cache_extent *cache;
7753 bool insert = false;
7755 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7757 struct extent_record tmpl;
7759 memset(&tmpl, 0, sizeof(tmpl));
7760 tmpl.start = bytenr;
7765 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7769 /* really a bug in cache_extent implement now */
7770 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7775 rec = container_of(cache, struct extent_record, cache);
7776 if (rec->start != bytenr) {
7778 * Several cause, from unaligned bytenr to over lapping extents
7783 back = find_tree_backref(rec, parent, root);
7785 back = alloc_tree_backref(rec, parent, root);
7792 if (back->node.found_ref) {
7793 fprintf(stderr, "Extent back ref already exists "
7794 "for %llu parent %llu root %llu \n",
7795 (unsigned long long)bytenr,
7796 (unsigned long long)parent,
7797 (unsigned long long)root);
7799 back->node.found_ref = 1;
7801 if (back->node.found_extent_tree) {
7802 fprintf(stderr, "Extent back ref already exists "
7803 "for %llu parent %llu root %llu \n",
7804 (unsigned long long)bytenr,
7805 (unsigned long long)parent,
7806 (unsigned long long)root);
7808 back->node.found_extent_tree = 1;
7811 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7812 compare_extent_backref));
7813 check_extent_type(rec);
7814 maybe_free_extent_rec(extent_cache, rec);
7818 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7819 u64 parent, u64 root, u64 owner, u64 offset,
7820 u32 num_refs, int found_ref, u64 max_size)
7822 struct extent_record *rec;
7823 struct data_backref *back;
7824 struct cache_extent *cache;
7826 bool insert = false;
7828 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7830 struct extent_record tmpl;
7832 memset(&tmpl, 0, sizeof(tmpl));
7833 tmpl.start = bytenr;
7835 tmpl.max_size = max_size;
7837 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7841 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7846 rec = container_of(cache, struct extent_record, cache);
7847 if (rec->max_size < max_size)
7848 rec->max_size = max_size;
7851 * If found_ref is set then max_size is the real size and must match the
7852 * existing refs. So if we have already found a ref then we need to
7853 * make sure that this ref matches the existing one, otherwise we need
7854 * to add a new backref so we can notice that the backrefs don't match
7855 * and we need to figure out who is telling the truth. This is to
7856 * account for that awful fsync bug I introduced where we'd end up with
7857 * a btrfs_file_extent_item that would have its length include multiple
7858 * prealloc extents or point inside of a prealloc extent.
7860 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7863 back = alloc_data_backref(rec, parent, root, owner, offset,
7870 BUG_ON(num_refs != 1);
7871 if (back->node.found_ref)
7872 BUG_ON(back->bytes != max_size);
7873 back->node.found_ref = 1;
7874 back->found_ref += 1;
7875 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7876 back->bytes = max_size;
7877 back->disk_bytenr = bytenr;
7879 /* Need to reinsert if not already in the tree */
7881 rb_erase(&back->node.node, &rec->backref_tree);
7886 rec->content_checked = 1;
7887 rec->owner_ref_checked = 1;
7889 if (back->node.found_extent_tree) {
7890 fprintf(stderr, "Extent back ref already exists "
7891 "for %llu parent %llu root %llu "
7892 "owner %llu offset %llu num_refs %lu\n",
7893 (unsigned long long)bytenr,
7894 (unsigned long long)parent,
7895 (unsigned long long)root,
7896 (unsigned long long)owner,
7897 (unsigned long long)offset,
7898 (unsigned long)num_refs);
7900 back->num_refs = num_refs;
7901 back->node.found_extent_tree = 1;
7904 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7905 compare_extent_backref));
7907 maybe_free_extent_rec(extent_cache, rec);
7911 static int add_pending(struct cache_tree *pending,
7912 struct cache_tree *seen, u64 bytenr, u32 size)
7915 ret = add_cache_extent(seen, bytenr, size);
7918 add_cache_extent(pending, bytenr, size);
7922 static int pick_next_pending(struct cache_tree *pending,
7923 struct cache_tree *reada,
7924 struct cache_tree *nodes,
7925 u64 last, struct block_info *bits, int bits_nr,
7928 unsigned long node_start = last;
7929 struct cache_extent *cache;
7932 cache = search_cache_extent(reada, 0);
7934 bits[0].start = cache->start;
7935 bits[0].size = cache->size;
7940 if (node_start > 32768)
7941 node_start -= 32768;
7943 cache = search_cache_extent(nodes, node_start);
7945 cache = search_cache_extent(nodes, 0);
7948 cache = search_cache_extent(pending, 0);
7953 bits[ret].start = cache->start;
7954 bits[ret].size = cache->size;
7955 cache = next_cache_extent(cache);
7957 } while (cache && ret < bits_nr);
7963 bits[ret].start = cache->start;
7964 bits[ret].size = cache->size;
7965 cache = next_cache_extent(cache);
7967 } while (cache && ret < bits_nr);
7969 if (bits_nr - ret > 8) {
7970 u64 lookup = bits[0].start + bits[0].size;
7971 struct cache_extent *next;
7972 next = search_cache_extent(pending, lookup);
7974 if (next->start - lookup > 32768)
7976 bits[ret].start = next->start;
7977 bits[ret].size = next->size;
7978 lookup = next->start + next->size;
7982 next = next_cache_extent(next);
7990 static void free_chunk_record(struct cache_extent *cache)
7992 struct chunk_record *rec;
7994 rec = container_of(cache, struct chunk_record, cache);
7995 list_del_init(&rec->list);
7996 list_del_init(&rec->dextents);
8000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8002 cache_tree_free_extents(chunk_cache, free_chunk_record);
8005 static void free_device_record(struct rb_node *node)
8007 struct device_record *rec;
8009 rec = container_of(node, struct device_record, node);
8013 FREE_RB_BASED_TREE(device_cache, free_device_record);
8015 int insert_block_group_record(struct block_group_tree *tree,
8016 struct block_group_record *bg_rec)
8020 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8024 list_add_tail(&bg_rec->list, &tree->block_groups);
8028 static void free_block_group_record(struct cache_extent *cache)
8030 struct block_group_record *rec;
8032 rec = container_of(cache, struct block_group_record, cache);
8033 list_del_init(&rec->list);
8037 void free_block_group_tree(struct block_group_tree *tree)
8039 cache_tree_free_extents(&tree->tree, free_block_group_record);
8042 int insert_device_extent_record(struct device_extent_tree *tree,
8043 struct device_extent_record *de_rec)
8048 * Device extent is a bit different from the other extents, because
8049 * the extents which belong to the different devices may have the
8050 * same start and size, so we need use the special extent cache
8051 * search/insert functions.
8053 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8057 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8058 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8062 static void free_device_extent_record(struct cache_extent *cache)
8064 struct device_extent_record *rec;
8066 rec = container_of(cache, struct device_extent_record, cache);
8067 if (!list_empty(&rec->chunk_list))
8068 list_del_init(&rec->chunk_list);
8069 if (!list_empty(&rec->device_list))
8070 list_del_init(&rec->device_list);
8074 void free_device_extent_tree(struct device_extent_tree *tree)
8076 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8081 struct extent_buffer *leaf, int slot)
8083 struct btrfs_extent_ref_v0 *ref0;
8084 struct btrfs_key key;
8087 btrfs_item_key_to_cpu(leaf, &key, slot);
8088 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8089 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8090 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8093 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8094 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8101 struct btrfs_key *key,
8104 struct btrfs_chunk *ptr;
8105 struct chunk_record *rec;
8108 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8109 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8111 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8113 fprintf(stderr, "memory allocation failed\n");
8117 INIT_LIST_HEAD(&rec->list);
8118 INIT_LIST_HEAD(&rec->dextents);
8121 rec->cache.start = key->offset;
8122 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8124 rec->generation = btrfs_header_generation(leaf);
8126 rec->objectid = key->objectid;
8127 rec->type = key->type;
8128 rec->offset = key->offset;
8130 rec->length = rec->cache.size;
8131 rec->owner = btrfs_chunk_owner(leaf, ptr);
8132 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8133 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8134 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8135 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8136 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8137 rec->num_stripes = num_stripes;
8138 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8140 for (i = 0; i < rec->num_stripes; ++i) {
8141 rec->stripes[i].devid =
8142 btrfs_stripe_devid_nr(leaf, ptr, i);
8143 rec->stripes[i].offset =
8144 btrfs_stripe_offset_nr(leaf, ptr, i);
8145 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8146 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8153 static int process_chunk_item(struct cache_tree *chunk_cache,
8154 struct btrfs_key *key, struct extent_buffer *eb,
8157 struct chunk_record *rec;
8158 struct btrfs_chunk *chunk;
8161 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8163 * Do extra check for this chunk item,
8165 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8166 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8167 * and owner<->key_type check.
8169 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8172 error("chunk(%llu, %llu) is not valid, ignore it",
8173 key->offset, btrfs_chunk_length(eb, chunk));
8176 rec = btrfs_new_chunk_record(eb, key, slot);
8177 ret = insert_cache_extent(chunk_cache, &rec->cache);
8179 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8180 rec->offset, rec->length);
8187 static int process_device_item(struct rb_root *dev_cache,
8188 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8190 struct btrfs_dev_item *ptr;
8191 struct device_record *rec;
8194 ptr = btrfs_item_ptr(eb,
8195 slot, struct btrfs_dev_item);
8197 rec = malloc(sizeof(*rec));
8199 fprintf(stderr, "memory allocation failed\n");
8203 rec->devid = key->offset;
8204 rec->generation = btrfs_header_generation(eb);
8206 rec->objectid = key->objectid;
8207 rec->type = key->type;
8208 rec->offset = key->offset;
8210 rec->devid = btrfs_device_id(eb, ptr);
8211 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8212 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8214 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8216 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8223 struct block_group_record *
8224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8227 struct btrfs_block_group_item *ptr;
8228 struct block_group_record *rec;
8230 rec = calloc(1, sizeof(*rec));
8232 fprintf(stderr, "memory allocation failed\n");
8236 rec->cache.start = key->objectid;
8237 rec->cache.size = key->offset;
8239 rec->generation = btrfs_header_generation(leaf);
8241 rec->objectid = key->objectid;
8242 rec->type = key->type;
8243 rec->offset = key->offset;
8245 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8246 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8248 INIT_LIST_HEAD(&rec->list);
8253 static int process_block_group_item(struct block_group_tree *block_group_cache,
8254 struct btrfs_key *key,
8255 struct extent_buffer *eb, int slot)
8257 struct block_group_record *rec;
8260 rec = btrfs_new_block_group_record(eb, key, slot);
8261 ret = insert_block_group_record(block_group_cache, rec);
8263 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8264 rec->objectid, rec->offset);
8271 struct device_extent_record *
8272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8273 struct btrfs_key *key, int slot)
8275 struct device_extent_record *rec;
8276 struct btrfs_dev_extent *ptr;
8278 rec = calloc(1, sizeof(*rec));
8280 fprintf(stderr, "memory allocation failed\n");
8284 rec->cache.objectid = key->objectid;
8285 rec->cache.start = key->offset;
8287 rec->generation = btrfs_header_generation(leaf);
8289 rec->objectid = key->objectid;
8290 rec->type = key->type;
8291 rec->offset = key->offset;
8293 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8294 rec->chunk_objecteid =
8295 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8297 btrfs_dev_extent_chunk_offset(leaf, ptr);
8298 rec->length = btrfs_dev_extent_length(leaf, ptr);
8299 rec->cache.size = rec->length;
8301 INIT_LIST_HEAD(&rec->chunk_list);
8302 INIT_LIST_HEAD(&rec->device_list);
8308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8309 struct btrfs_key *key, struct extent_buffer *eb,
8312 struct device_extent_record *rec;
8315 rec = btrfs_new_device_extent_record(eb, key, slot);
8316 ret = insert_device_extent_record(dev_extent_cache, rec);
8319 "Device extent[%llu, %llu, %llu] existed.\n",
8320 rec->objectid, rec->offset, rec->length);
8327 static int process_extent_item(struct btrfs_root *root,
8328 struct cache_tree *extent_cache,
8329 struct extent_buffer *eb, int slot)
8331 struct btrfs_extent_item *ei;
8332 struct btrfs_extent_inline_ref *iref;
8333 struct btrfs_extent_data_ref *dref;
8334 struct btrfs_shared_data_ref *sref;
8335 struct btrfs_key key;
8336 struct extent_record tmpl;
8341 u32 item_size = btrfs_item_size_nr(eb, slot);
8347 btrfs_item_key_to_cpu(eb, &key, slot);
8349 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8351 num_bytes = root->fs_info->nodesize;
8353 num_bytes = key.offset;
8356 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8357 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8358 key.objectid, root->fs_info->sectorsize);
8361 if (item_size < sizeof(*ei)) {
8362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8363 struct btrfs_extent_item_v0 *ei0;
8364 BUG_ON(item_size != sizeof(*ei0));
8365 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8366 refs = btrfs_extent_refs_v0(eb, ei0);
8370 memset(&tmpl, 0, sizeof(tmpl));
8371 tmpl.start = key.objectid;
8372 tmpl.nr = num_bytes;
8373 tmpl.extent_item_refs = refs;
8374 tmpl.metadata = metadata;
8376 tmpl.max_size = num_bytes;
8378 return add_extent_rec(extent_cache, &tmpl);
8381 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8382 refs = btrfs_extent_refs(eb, ei);
8383 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8387 if (metadata && num_bytes != root->fs_info->nodesize) {
8388 error("ignore invalid metadata extent, length %llu does not equal to %u",
8389 num_bytes, root->fs_info->nodesize);
8392 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8393 error("ignore invalid data extent, length %llu is not aligned to %u",
8394 num_bytes, root->fs_info->sectorsize);
8398 memset(&tmpl, 0, sizeof(tmpl));
8399 tmpl.start = key.objectid;
8400 tmpl.nr = num_bytes;
8401 tmpl.extent_item_refs = refs;
8402 tmpl.metadata = metadata;
8404 tmpl.max_size = num_bytes;
8405 add_extent_rec(extent_cache, &tmpl);
8407 ptr = (unsigned long)(ei + 1);
8408 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8409 key.type == BTRFS_EXTENT_ITEM_KEY)
8410 ptr += sizeof(struct btrfs_tree_block_info);
8412 end = (unsigned long)ei + item_size;
8414 iref = (struct btrfs_extent_inline_ref *)ptr;
8415 type = btrfs_extent_inline_ref_type(eb, iref);
8416 offset = btrfs_extent_inline_ref_offset(eb, iref);
8418 case BTRFS_TREE_BLOCK_REF_KEY:
8419 ret = add_tree_backref(extent_cache, key.objectid,
8423 "add_tree_backref failed (extent items tree block): %s",
8426 case BTRFS_SHARED_BLOCK_REF_KEY:
8427 ret = add_tree_backref(extent_cache, key.objectid,
8431 "add_tree_backref failed (extent items shared block): %s",
8434 case BTRFS_EXTENT_DATA_REF_KEY:
8435 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8436 add_data_backref(extent_cache, key.objectid, 0,
8437 btrfs_extent_data_ref_root(eb, dref),
8438 btrfs_extent_data_ref_objectid(eb,
8440 btrfs_extent_data_ref_offset(eb, dref),
8441 btrfs_extent_data_ref_count(eb, dref),
8444 case BTRFS_SHARED_DATA_REF_KEY:
8445 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8446 add_data_backref(extent_cache, key.objectid, offset,
8448 btrfs_shared_data_ref_count(eb, sref),
8452 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8453 key.objectid, key.type, num_bytes);
8456 ptr += btrfs_extent_inline_ref_size(type);
8463 static int check_cache_range(struct btrfs_root *root,
8464 struct btrfs_block_group_cache *cache,
8465 u64 offset, u64 bytes)
8467 struct btrfs_free_space *entry;
8473 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8474 bytenr = btrfs_sb_offset(i);
8475 ret = btrfs_rmap_block(root->fs_info,
8476 cache->key.objectid, bytenr, 0,
8477 &logical, &nr, &stripe_len);
8482 if (logical[nr] + stripe_len <= offset)
8484 if (offset + bytes <= logical[nr])
8486 if (logical[nr] == offset) {
8487 if (stripe_len >= bytes) {
8491 bytes -= stripe_len;
8492 offset += stripe_len;
8493 } else if (logical[nr] < offset) {
8494 if (logical[nr] + stripe_len >=
8499 bytes = (offset + bytes) -
8500 (logical[nr] + stripe_len);
8501 offset = logical[nr] + stripe_len;
8504 * Could be tricky, the super may land in the
8505 * middle of the area we're checking. First
8506 * check the easiest case, it's at the end.
8508 if (logical[nr] + stripe_len >=
8510 bytes = logical[nr] - offset;
8514 /* Check the left side */
8515 ret = check_cache_range(root, cache,
8517 logical[nr] - offset);
8523 /* Now we continue with the right side */
8524 bytes = (offset + bytes) -
8525 (logical[nr] + stripe_len);
8526 offset = logical[nr] + stripe_len;
8533 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8535 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8536 offset, offset+bytes);
8540 if (entry->offset != offset) {
8541 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8546 if (entry->bytes != bytes) {
8547 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8548 bytes, entry->bytes, offset);
8552 unlink_free_space(cache->free_space_ctl, entry);
8557 static int verify_space_cache(struct btrfs_root *root,
8558 struct btrfs_block_group_cache *cache)
8560 struct btrfs_path path;
8561 struct extent_buffer *leaf;
8562 struct btrfs_key key;
8566 root = root->fs_info->extent_root;
8568 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8570 btrfs_init_path(&path);
8571 key.objectid = last;
8573 key.type = BTRFS_EXTENT_ITEM_KEY;
8574 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8579 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8580 ret = btrfs_next_leaf(root, &path);
8588 leaf = path.nodes[0];
8589 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8590 if (key.objectid >= cache->key.offset + cache->key.objectid)
8592 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8593 key.type != BTRFS_METADATA_ITEM_KEY) {
8598 if (last == key.objectid) {
8599 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8600 last = key.objectid + key.offset;
8602 last = key.objectid + root->fs_info->nodesize;
8607 ret = check_cache_range(root, cache, last,
8608 key.objectid - last);
8611 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8612 last = key.objectid + key.offset;
8614 last = key.objectid + root->fs_info->nodesize;
8618 if (last < cache->key.objectid + cache->key.offset)
8619 ret = check_cache_range(root, cache, last,
8620 cache->key.objectid +
8621 cache->key.offset - last);
8624 btrfs_release_path(&path);
8627 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8628 fprintf(stderr, "There are still entries left in the space "
8636 static int check_space_cache(struct btrfs_root *root)
8638 struct btrfs_block_group_cache *cache;
8639 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8643 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8644 btrfs_super_generation(root->fs_info->super_copy) !=
8645 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8646 printf("cache and super generation don't match, space cache "
8647 "will be invalidated\n");
8651 if (ctx.progress_enabled) {
8652 ctx.tp = TASK_FREE_SPACE;
8653 task_start(ctx.info);
8657 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8661 start = cache->key.objectid + cache->key.offset;
8662 if (!cache->free_space_ctl) {
8663 if (btrfs_init_free_space_ctl(cache,
8664 root->fs_info->sectorsize)) {
8669 btrfs_remove_free_space_cache(cache);
8672 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8673 ret = exclude_super_stripes(root, cache);
8675 fprintf(stderr, "could not exclude super stripes: %s\n",
8680 ret = load_free_space_tree(root->fs_info, cache);
8681 free_excluded_extents(root, cache);
8683 fprintf(stderr, "could not load free space tree: %s\n",
8690 ret = load_free_space_cache(root->fs_info, cache);
8695 ret = verify_space_cache(root, cache);
8697 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8698 cache->key.objectid);
8703 task_stop(ctx.info);
8705 return error ? -EINVAL : 0;
8708 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8709 u64 num_bytes, unsigned long leaf_offset,
8710 struct extent_buffer *eb) {
8712 struct btrfs_fs_info *fs_info = root->fs_info;
8714 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8716 unsigned long csum_offset;
8720 u64 data_checked = 0;
8726 if (num_bytes % fs_info->sectorsize)
8729 data = malloc(num_bytes);
8733 while (offset < num_bytes) {
8736 read_len = num_bytes - offset;
8737 /* read as much space once a time */
8738 ret = read_extent_data(fs_info, data + offset,
8739 bytenr + offset, &read_len, mirror);
8743 /* verify every 4k data's checksum */
8744 while (data_checked < read_len) {
8746 tmp = offset + data_checked;
8748 csum = btrfs_csum_data((char *)data + tmp,
8749 csum, fs_info->sectorsize);
8750 btrfs_csum_final(csum, (u8 *)&csum);
8752 csum_offset = leaf_offset +
8753 tmp / fs_info->sectorsize * csum_size;
8754 read_extent_buffer(eb, (char *)&csum_expected,
8755 csum_offset, csum_size);
8756 /* try another mirror */
8757 if (csum != csum_expected) {
8758 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8759 mirror, bytenr + tmp,
8760 csum, csum_expected);
8761 num_copies = btrfs_num_copies(root->fs_info,
8763 if (mirror < num_copies - 1) {
8768 data_checked += fs_info->sectorsize;
8777 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8780 struct btrfs_path path;
8781 struct extent_buffer *leaf;
8782 struct btrfs_key key;
8785 btrfs_init_path(&path);
8786 key.objectid = bytenr;
8787 key.type = BTRFS_EXTENT_ITEM_KEY;
8788 key.offset = (u64)-1;
8791 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8794 fprintf(stderr, "Error looking up extent record %d\n", ret);
8795 btrfs_release_path(&path);
8798 if (path.slots[0] > 0) {
8801 ret = btrfs_prev_leaf(root, &path);
8804 } else if (ret > 0) {
8811 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8814 * Block group items come before extent items if they have the same
8815 * bytenr, so walk back one more just in case. Dear future traveller,
8816 * first congrats on mastering time travel. Now if it's not too much
8817 * trouble could you go back to 2006 and tell Chris to make the
8818 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8819 * EXTENT_ITEM_KEY please?
8821 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8822 if (path.slots[0] > 0) {
8825 ret = btrfs_prev_leaf(root, &path);
8828 } else if (ret > 0) {
8833 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8837 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8838 ret = btrfs_next_leaf(root, &path);
8840 fprintf(stderr, "Error going to next leaf "
8842 btrfs_release_path(&path);
8848 leaf = path.nodes[0];
8849 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8850 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8854 if (key.objectid + key.offset < bytenr) {
8858 if (key.objectid > bytenr + num_bytes)
8861 if (key.objectid == bytenr) {
8862 if (key.offset >= num_bytes) {
8866 num_bytes -= key.offset;
8867 bytenr += key.offset;
8868 } else if (key.objectid < bytenr) {
8869 if (key.objectid + key.offset >= bytenr + num_bytes) {
8873 num_bytes = (bytenr + num_bytes) -
8874 (key.objectid + key.offset);
8875 bytenr = key.objectid + key.offset;
8877 if (key.objectid + key.offset < bytenr + num_bytes) {
8878 u64 new_start = key.objectid + key.offset;
8879 u64 new_bytes = bytenr + num_bytes - new_start;
8882 * Weird case, the extent is in the middle of
8883 * our range, we'll have to search one side
8884 * and then the other. Not sure if this happens
8885 * in real life, but no harm in coding it up
8886 * anyway just in case.
8888 btrfs_release_path(&path);
8889 ret = check_extent_exists(root, new_start,
8892 fprintf(stderr, "Right section didn't "
8896 num_bytes = key.objectid - bytenr;
8899 num_bytes = key.objectid - bytenr;
8906 if (num_bytes && !ret) {
8907 fprintf(stderr, "There are no extents for csum range "
8908 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8912 btrfs_release_path(&path);
8916 static int check_csums(struct btrfs_root *root)
8918 struct btrfs_path path;
8919 struct extent_buffer *leaf;
8920 struct btrfs_key key;
8921 u64 offset = 0, num_bytes = 0;
8922 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8926 unsigned long leaf_offset;
8928 root = root->fs_info->csum_root;
8929 if (!extent_buffer_uptodate(root->node)) {
8930 fprintf(stderr, "No valid csum tree found\n");
8934 btrfs_init_path(&path);
8935 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8936 key.type = BTRFS_EXTENT_CSUM_KEY;
8938 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8940 fprintf(stderr, "Error searching csum tree %d\n", ret);
8941 btrfs_release_path(&path);
8945 if (ret > 0 && path.slots[0])
8950 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8951 ret = btrfs_next_leaf(root, &path);
8953 fprintf(stderr, "Error going to next leaf "
8960 leaf = path.nodes[0];
8962 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8963 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8968 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8969 csum_size) * root->fs_info->sectorsize;
8970 if (!check_data_csum)
8971 goto skip_csum_check;
8972 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8973 ret = check_extent_csums(root, key.offset, data_len,
8979 offset = key.offset;
8980 } else if (key.offset != offset + num_bytes) {
8981 ret = check_extent_exists(root, offset, num_bytes);
8983 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8984 "there is no extent record\n",
8985 offset, offset+num_bytes);
8988 offset = key.offset;
8991 num_bytes += data_len;
8995 btrfs_release_path(&path);
8999 static int is_dropped_key(struct btrfs_key *key,
9000 struct btrfs_key *drop_key) {
9001 if (key->objectid < drop_key->objectid)
9003 else if (key->objectid == drop_key->objectid) {
9004 if (key->type < drop_key->type)
9006 else if (key->type == drop_key->type) {
9007 if (key->offset < drop_key->offset)
9015 * Here are the rules for FULL_BACKREF.
9017 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9018 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9020 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9021 * if it happened after the relocation occurred since we'll have dropped the
9022 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9023 * have no real way to know for sure.
9025 * We process the blocks one root at a time, and we start from the lowest root
9026 * objectid and go to the highest. So we can just lookup the owner backref for
9027 * the record and if we don't find it then we know it doesn't exist and we have
9030 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9031 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9032 * be set or not and then we can check later once we've gathered all the refs.
9034 static int calc_extent_flag(struct cache_tree *extent_cache,
9035 struct extent_buffer *buf,
9036 struct root_item_record *ri,
9039 struct extent_record *rec;
9040 struct cache_extent *cache;
9041 struct tree_backref *tback;
9044 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9045 /* we have added this extent before */
9049 rec = container_of(cache, struct extent_record, cache);
9052 * Except file/reloc tree, we can not have
9055 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9060 if (buf->start == ri->bytenr)
9063 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9066 owner = btrfs_header_owner(buf);
9067 if (owner == ri->objectid)
9070 tback = find_tree_backref(rec, 0, owner);
9075 if (rec->flag_block_full_backref != FLAG_UNSET &&
9076 rec->flag_block_full_backref != 0)
9077 rec->bad_full_backref = 1;
9080 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9081 if (rec->flag_block_full_backref != FLAG_UNSET &&
9082 rec->flag_block_full_backref != 1)
9083 rec->bad_full_backref = 1;
9087 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9089 fprintf(stderr, "Invalid key type(");
9090 print_key_type(stderr, 0, key_type);
9091 fprintf(stderr, ") found in root(");
9092 print_objectid(stderr, rootid, 0);
9093 fprintf(stderr, ")\n");
9097 * Check if the key is valid with its extent buffer.
9099 * This is a early check in case invalid key exists in a extent buffer
9100 * This is not comprehensive yet, but should prevent wrong key/item passed
9103 static int check_type_with_root(u64 rootid, u8 key_type)
9106 /* Only valid in chunk tree */
9107 case BTRFS_DEV_ITEM_KEY:
9108 case BTRFS_CHUNK_ITEM_KEY:
9109 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9112 /* valid in csum and log tree */
9113 case BTRFS_CSUM_TREE_OBJECTID:
9114 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9118 case BTRFS_EXTENT_ITEM_KEY:
9119 case BTRFS_METADATA_ITEM_KEY:
9120 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9121 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9124 case BTRFS_ROOT_ITEM_KEY:
9125 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9128 case BTRFS_DEV_EXTENT_KEY:
9129 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9135 report_mismatch_key_root(key_type, rootid);
9139 static int run_next_block(struct btrfs_root *root,
9140 struct block_info *bits,
9143 struct cache_tree *pending,
9144 struct cache_tree *seen,
9145 struct cache_tree *reada,
9146 struct cache_tree *nodes,
9147 struct cache_tree *extent_cache,
9148 struct cache_tree *chunk_cache,
9149 struct rb_root *dev_cache,
9150 struct block_group_tree *block_group_cache,
9151 struct device_extent_tree *dev_extent_cache,
9152 struct root_item_record *ri)
9154 struct btrfs_fs_info *fs_info = root->fs_info;
9155 struct extent_buffer *buf;
9156 struct extent_record *rec = NULL;
9167 struct btrfs_key key;
9168 struct cache_extent *cache;
9171 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9172 bits_nr, &reada_bits);
9177 for(i = 0; i < nritems; i++) {
9178 ret = add_cache_extent(reada, bits[i].start,
9183 /* fixme, get the parent transid */
9184 readahead_tree_block(fs_info, bits[i].start, 0);
9187 *last = bits[0].start;
9188 bytenr = bits[0].start;
9189 size = bits[0].size;
9191 cache = lookup_cache_extent(pending, bytenr, size);
9193 remove_cache_extent(pending, cache);
9196 cache = lookup_cache_extent(reada, bytenr, size);
9198 remove_cache_extent(reada, cache);
9201 cache = lookup_cache_extent(nodes, bytenr, size);
9203 remove_cache_extent(nodes, cache);
9206 cache = lookup_cache_extent(extent_cache, bytenr, size);
9208 rec = container_of(cache, struct extent_record, cache);
9209 gen = rec->parent_generation;
9212 /* fixme, get the real parent transid */
9213 buf = read_tree_block(root->fs_info, bytenr, gen);
9214 if (!extent_buffer_uptodate(buf)) {
9215 record_bad_block_io(root->fs_info,
9216 extent_cache, bytenr, size);
9220 nritems = btrfs_header_nritems(buf);
9223 if (!init_extent_tree) {
9224 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9225 btrfs_header_level(buf), 1, NULL,
9228 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9230 fprintf(stderr, "Couldn't calc extent flags\n");
9231 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9236 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9238 fprintf(stderr, "Couldn't calc extent flags\n");
9239 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9243 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9245 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9246 ri->objectid == btrfs_header_owner(buf)) {
9248 * Ok we got to this block from it's original owner and
9249 * we have FULL_BACKREF set. Relocation can leave
9250 * converted blocks over so this is altogether possible,
9251 * however it's not possible if the generation > the
9252 * last snapshot, so check for this case.
9254 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9255 btrfs_header_generation(buf) > ri->last_snapshot) {
9256 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9257 rec->bad_full_backref = 1;
9262 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9263 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9264 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9265 rec->bad_full_backref = 1;
9269 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9270 rec->flag_block_full_backref = 1;
9274 rec->flag_block_full_backref = 0;
9276 owner = btrfs_header_owner(buf);
9279 ret = check_block(root, extent_cache, buf, flags);
9283 if (btrfs_is_leaf(buf)) {
9284 btree_space_waste += btrfs_leaf_free_space(root, buf);
9285 for (i = 0; i < nritems; i++) {
9286 struct btrfs_file_extent_item *fi;
9287 btrfs_item_key_to_cpu(buf, &key, i);
9289 * Check key type against the leaf owner.
9290 * Could filter quite a lot of early error if
9293 if (check_type_with_root(btrfs_header_owner(buf),
9295 fprintf(stderr, "ignoring invalid key\n");
9298 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9299 process_extent_item(root, extent_cache, buf,
9303 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9304 process_extent_item(root, extent_cache, buf,
9308 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9310 btrfs_item_size_nr(buf, i);
9313 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9314 process_chunk_item(chunk_cache, &key, buf, i);
9317 if (key.type == BTRFS_DEV_ITEM_KEY) {
9318 process_device_item(dev_cache, &key, buf, i);
9321 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9322 process_block_group_item(block_group_cache,
9326 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9327 process_device_extent_item(dev_extent_cache,
9332 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9333 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9334 process_extent_ref_v0(extent_cache, buf, i);
9341 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9342 ret = add_tree_backref(extent_cache,
9343 key.objectid, 0, key.offset, 0);
9346 "add_tree_backref failed (leaf tree block): %s",
9350 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9351 ret = add_tree_backref(extent_cache,
9352 key.objectid, key.offset, 0, 0);
9355 "add_tree_backref failed (leaf shared block): %s",
9359 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9360 struct btrfs_extent_data_ref *ref;
9361 ref = btrfs_item_ptr(buf, i,
9362 struct btrfs_extent_data_ref);
9363 add_data_backref(extent_cache,
9365 btrfs_extent_data_ref_root(buf, ref),
9366 btrfs_extent_data_ref_objectid(buf,
9368 btrfs_extent_data_ref_offset(buf, ref),
9369 btrfs_extent_data_ref_count(buf, ref),
9370 0, root->fs_info->sectorsize);
9373 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9374 struct btrfs_shared_data_ref *ref;
9375 ref = btrfs_item_ptr(buf, i,
9376 struct btrfs_shared_data_ref);
9377 add_data_backref(extent_cache,
9378 key.objectid, key.offset, 0, 0, 0,
9379 btrfs_shared_data_ref_count(buf, ref),
9380 0, root->fs_info->sectorsize);
9383 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9384 struct bad_item *bad;
9386 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9390 bad = malloc(sizeof(struct bad_item));
9393 INIT_LIST_HEAD(&bad->list);
9394 memcpy(&bad->key, &key,
9395 sizeof(struct btrfs_key));
9396 bad->root_id = owner;
9397 list_add_tail(&bad->list, &delete_items);
9400 if (key.type != BTRFS_EXTENT_DATA_KEY)
9402 fi = btrfs_item_ptr(buf, i,
9403 struct btrfs_file_extent_item);
9404 if (btrfs_file_extent_type(buf, fi) ==
9405 BTRFS_FILE_EXTENT_INLINE)
9407 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9410 data_bytes_allocated +=
9411 btrfs_file_extent_disk_num_bytes(buf, fi);
9412 if (data_bytes_allocated < root->fs_info->sectorsize) {
9415 data_bytes_referenced +=
9416 btrfs_file_extent_num_bytes(buf, fi);
9417 add_data_backref(extent_cache,
9418 btrfs_file_extent_disk_bytenr(buf, fi),
9419 parent, owner, key.objectid, key.offset -
9420 btrfs_file_extent_offset(buf, fi), 1, 1,
9421 btrfs_file_extent_disk_num_bytes(buf, fi));
9425 struct btrfs_key first_key;
9427 first_key.objectid = 0;
9430 btrfs_item_key_to_cpu(buf, &first_key, 0);
9431 level = btrfs_header_level(buf);
9432 for (i = 0; i < nritems; i++) {
9433 struct extent_record tmpl;
9435 ptr = btrfs_node_blockptr(buf, i);
9436 size = root->fs_info->nodesize;
9437 btrfs_node_key_to_cpu(buf, &key, i);
9439 if ((level == ri->drop_level)
9440 && is_dropped_key(&key, &ri->drop_key)) {
9445 memset(&tmpl, 0, sizeof(tmpl));
9446 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9447 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9452 tmpl.max_size = size;
9453 ret = add_extent_rec(extent_cache, &tmpl);
9457 ret = add_tree_backref(extent_cache, ptr, parent,
9461 "add_tree_backref failed (non-leaf block): %s",
9467 add_pending(nodes, seen, ptr, size);
9469 add_pending(pending, seen, ptr, size);
9472 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9473 nritems) * sizeof(struct btrfs_key_ptr);
9475 total_btree_bytes += buf->len;
9476 if (fs_root_objectid(btrfs_header_owner(buf)))
9477 total_fs_tree_bytes += buf->len;
9478 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9479 total_extent_tree_bytes += buf->len;
9481 free_extent_buffer(buf);
9485 static int add_root_to_pending(struct extent_buffer *buf,
9486 struct cache_tree *extent_cache,
9487 struct cache_tree *pending,
9488 struct cache_tree *seen,
9489 struct cache_tree *nodes,
9492 struct extent_record tmpl;
9495 if (btrfs_header_level(buf) > 0)
9496 add_pending(nodes, seen, buf->start, buf->len);
9498 add_pending(pending, seen, buf->start, buf->len);
9500 memset(&tmpl, 0, sizeof(tmpl));
9501 tmpl.start = buf->start;
9506 tmpl.max_size = buf->len;
9507 add_extent_rec(extent_cache, &tmpl);
9509 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9510 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9511 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9514 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9519 /* as we fix the tree, we might be deleting blocks that
9520 * we're tracking for repair. This hook makes sure we
9521 * remove any backrefs for blocks as we are fixing them.
9523 static int free_extent_hook(struct btrfs_trans_handle *trans,
9524 struct btrfs_root *root,
9525 u64 bytenr, u64 num_bytes, u64 parent,
9526 u64 root_objectid, u64 owner, u64 offset,
9529 struct extent_record *rec;
9530 struct cache_extent *cache;
9532 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9534 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9535 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9539 rec = container_of(cache, struct extent_record, cache);
9541 struct data_backref *back;
9542 back = find_data_backref(rec, parent, root_objectid, owner,
9543 offset, 1, bytenr, num_bytes);
9546 if (back->node.found_ref) {
9547 back->found_ref -= refs_to_drop;
9549 rec->refs -= refs_to_drop;
9551 if (back->node.found_extent_tree) {
9552 back->num_refs -= refs_to_drop;
9553 if (rec->extent_item_refs)
9554 rec->extent_item_refs -= refs_to_drop;
9556 if (back->found_ref == 0)
9557 back->node.found_ref = 0;
9558 if (back->num_refs == 0)
9559 back->node.found_extent_tree = 0;
9561 if (!back->node.found_extent_tree && back->node.found_ref) {
9562 rb_erase(&back->node.node, &rec->backref_tree);
9566 struct tree_backref *back;
9567 back = find_tree_backref(rec, parent, root_objectid);
9570 if (back->node.found_ref) {
9573 back->node.found_ref = 0;
9575 if (back->node.found_extent_tree) {
9576 if (rec->extent_item_refs)
9577 rec->extent_item_refs--;
9578 back->node.found_extent_tree = 0;
9580 if (!back->node.found_extent_tree && back->node.found_ref) {
9581 rb_erase(&back->node.node, &rec->backref_tree);
9585 maybe_free_extent_rec(extent_cache, rec);
9590 static int delete_extent_records(struct btrfs_trans_handle *trans,
9591 struct btrfs_root *root,
9592 struct btrfs_path *path,
9595 struct btrfs_key key;
9596 struct btrfs_key found_key;
9597 struct extent_buffer *leaf;
9602 key.objectid = bytenr;
9604 key.offset = (u64)-1;
9607 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9614 if (path->slots[0] == 0)
9620 leaf = path->nodes[0];
9621 slot = path->slots[0];
9623 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9624 if (found_key.objectid != bytenr)
9627 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9628 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9629 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9630 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9631 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9632 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9633 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9634 btrfs_release_path(path);
9635 if (found_key.type == 0) {
9636 if (found_key.offset == 0)
9638 key.offset = found_key.offset - 1;
9639 key.type = found_key.type;
9641 key.type = found_key.type - 1;
9642 key.offset = (u64)-1;
9646 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9647 found_key.objectid, found_key.type, found_key.offset);
9649 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9652 btrfs_release_path(path);
9654 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9655 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9656 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9657 found_key.offset : root->fs_info->nodesize;
9659 ret = btrfs_update_block_group(trans, root, bytenr,
9666 btrfs_release_path(path);
9671 * for a single backref, this will allocate a new extent
9672 * and add the backref to it.
9674 static int record_extent(struct btrfs_trans_handle *trans,
9675 struct btrfs_fs_info *info,
9676 struct btrfs_path *path,
9677 struct extent_record *rec,
9678 struct extent_backref *back,
9679 int allocated, u64 flags)
9682 struct btrfs_root *extent_root = info->extent_root;
9683 struct extent_buffer *leaf;
9684 struct btrfs_key ins_key;
9685 struct btrfs_extent_item *ei;
9686 struct data_backref *dback;
9687 struct btrfs_tree_block_info *bi;
9690 rec->max_size = max_t(u64, rec->max_size,
9694 u32 item_size = sizeof(*ei);
9697 item_size += sizeof(*bi);
9699 ins_key.objectid = rec->start;
9700 ins_key.offset = rec->max_size;
9701 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9703 ret = btrfs_insert_empty_item(trans, extent_root, path,
9704 &ins_key, item_size);
9708 leaf = path->nodes[0];
9709 ei = btrfs_item_ptr(leaf, path->slots[0],
9710 struct btrfs_extent_item);
9712 btrfs_set_extent_refs(leaf, ei, 0);
9713 btrfs_set_extent_generation(leaf, ei, rec->generation);
9715 if (back->is_data) {
9716 btrfs_set_extent_flags(leaf, ei,
9717 BTRFS_EXTENT_FLAG_DATA);
9719 struct btrfs_disk_key copy_key;;
9721 bi = (struct btrfs_tree_block_info *)(ei + 1);
9722 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9725 btrfs_set_disk_key_objectid(©_key,
9726 rec->info_objectid);
9727 btrfs_set_disk_key_type(©_key, 0);
9728 btrfs_set_disk_key_offset(©_key, 0);
9730 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9731 btrfs_set_tree_block_key(leaf, bi, ©_key);
9733 btrfs_set_extent_flags(leaf, ei,
9734 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9737 btrfs_mark_buffer_dirty(leaf);
9738 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9739 rec->max_size, 1, 0);
9742 btrfs_release_path(path);
9745 if (back->is_data) {
9749 dback = to_data_backref(back);
9750 if (back->full_backref)
9751 parent = dback->parent;
9755 for (i = 0; i < dback->found_ref; i++) {
9756 /* if parent != 0, we're doing a full backref
9757 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9758 * just makes the backref allocator create a data
9761 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9762 rec->start, rec->max_size,
9766 BTRFS_FIRST_FREE_OBJECTID :
9772 fprintf(stderr, "adding new data backref"
9773 " on %llu %s %llu owner %llu"
9774 " offset %llu found %d\n",
9775 (unsigned long long)rec->start,
9776 back->full_backref ?
9778 back->full_backref ?
9779 (unsigned long long)parent :
9780 (unsigned long long)dback->root,
9781 (unsigned long long)dback->owner,
9782 (unsigned long long)dback->offset,
9786 struct tree_backref *tback;
9788 tback = to_tree_backref(back);
9789 if (back->full_backref)
9790 parent = tback->parent;
9794 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9795 rec->start, rec->max_size,
9796 parent, tback->root, 0, 0);
9797 fprintf(stderr, "adding new tree backref on "
9798 "start %llu len %llu parent %llu root %llu\n",
9799 rec->start, rec->max_size, parent, tback->root);
9802 btrfs_release_path(path);
9806 static struct extent_entry *find_entry(struct list_head *entries,
9807 u64 bytenr, u64 bytes)
9809 struct extent_entry *entry = NULL;
9811 list_for_each_entry(entry, entries, list) {
9812 if (entry->bytenr == bytenr && entry->bytes == bytes)
9819 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9821 struct extent_entry *entry, *best = NULL, *prev = NULL;
9823 list_for_each_entry(entry, entries, list) {
9825 * If there are as many broken entries as entries then we know
9826 * not to trust this particular entry.
9828 if (entry->broken == entry->count)
9832 * Special case, when there are only two entries and 'best' is
9842 * If our current entry == best then we can't be sure our best
9843 * is really the best, so we need to keep searching.
9845 if (best && best->count == entry->count) {
9851 /* Prev == entry, not good enough, have to keep searching */
9852 if (!prev->broken && prev->count == entry->count)
9856 best = (prev->count > entry->count) ? prev : entry;
9857 else if (best->count < entry->count)
9865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9866 struct data_backref *dback, struct extent_entry *entry)
9868 struct btrfs_trans_handle *trans;
9869 struct btrfs_root *root;
9870 struct btrfs_file_extent_item *fi;
9871 struct extent_buffer *leaf;
9872 struct btrfs_key key;
9876 key.objectid = dback->root;
9877 key.type = BTRFS_ROOT_ITEM_KEY;
9878 key.offset = (u64)-1;
9879 root = btrfs_read_fs_root(info, &key);
9881 fprintf(stderr, "Couldn't find root for our ref\n");
9886 * The backref points to the original offset of the extent if it was
9887 * split, so we need to search down to the offset we have and then walk
9888 * forward until we find the backref we're looking for.
9890 key.objectid = dback->owner;
9891 key.type = BTRFS_EXTENT_DATA_KEY;
9892 key.offset = dback->offset;
9893 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9895 fprintf(stderr, "Error looking up ref %d\n", ret);
9900 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9901 ret = btrfs_next_leaf(root, path);
9903 fprintf(stderr, "Couldn't find our ref, next\n");
9907 leaf = path->nodes[0];
9908 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9909 if (key.objectid != dback->owner ||
9910 key.type != BTRFS_EXTENT_DATA_KEY) {
9911 fprintf(stderr, "Couldn't find our ref, search\n");
9914 fi = btrfs_item_ptr(leaf, path->slots[0],
9915 struct btrfs_file_extent_item);
9916 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9917 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9919 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9924 btrfs_release_path(path);
9926 trans = btrfs_start_transaction(root, 1);
9928 return PTR_ERR(trans);
9931 * Ok we have the key of the file extent we want to fix, now we can cow
9932 * down to the thing and fix it.
9934 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9936 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9937 key.objectid, key.type, key.offset, ret);
9941 fprintf(stderr, "Well that's odd, we just found this key "
9942 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9947 leaf = path->nodes[0];
9948 fi = btrfs_item_ptr(leaf, path->slots[0],
9949 struct btrfs_file_extent_item);
9951 if (btrfs_file_extent_compression(leaf, fi) &&
9952 dback->disk_bytenr != entry->bytenr) {
9953 fprintf(stderr, "Ref doesn't match the record start and is "
9954 "compressed, please take a btrfs-image of this file "
9955 "system and send it to a btrfs developer so they can "
9956 "complete this functionality for bytenr %Lu\n",
9957 dback->disk_bytenr);
9962 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9963 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9964 } else if (dback->disk_bytenr > entry->bytenr) {
9965 u64 off_diff, offset;
9967 off_diff = dback->disk_bytenr - entry->bytenr;
9968 offset = btrfs_file_extent_offset(leaf, fi);
9969 if (dback->disk_bytenr + offset +
9970 btrfs_file_extent_num_bytes(leaf, fi) >
9971 entry->bytenr + entry->bytes) {
9972 fprintf(stderr, "Ref is past the entry end, please "
9973 "take a btrfs-image of this file system and "
9974 "send it to a btrfs developer, ref %Lu\n",
9975 dback->disk_bytenr);
9980 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9981 btrfs_set_file_extent_offset(leaf, fi, offset);
9982 } else if (dback->disk_bytenr < entry->bytenr) {
9985 offset = btrfs_file_extent_offset(leaf, fi);
9986 if (dback->disk_bytenr + offset < entry->bytenr) {
9987 fprintf(stderr, "Ref is before the entry start, please"
9988 " take a btrfs-image of this file system and "
9989 "send it to a btrfs developer, ref %Lu\n",
9990 dback->disk_bytenr);
9995 offset += dback->disk_bytenr;
9996 offset -= entry->bytenr;
9997 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9998 btrfs_set_file_extent_offset(leaf, fi, offset);
10001 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10004 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10005 * only do this if we aren't using compression, otherwise it's a
10008 if (!btrfs_file_extent_compression(leaf, fi))
10009 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10011 printf("ram bytes may be wrong?\n");
10012 btrfs_mark_buffer_dirty(leaf);
10014 err = btrfs_commit_transaction(trans, root);
10015 btrfs_release_path(path);
10016 return ret ? ret : err;
10019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10020 struct extent_record *rec)
10022 struct extent_backref *back, *tmp;
10023 struct data_backref *dback;
10024 struct extent_entry *entry, *best = NULL;
10025 LIST_HEAD(entries);
10026 int nr_entries = 0;
10027 int broken_entries = 0;
10029 short mismatch = 0;
10032 * Metadata is easy and the backrefs should always agree on bytenr and
10033 * size, if not we've got bigger issues.
10038 rbtree_postorder_for_each_entry_safe(back, tmp,
10039 &rec->backref_tree, node) {
10040 if (back->full_backref || !back->is_data)
10043 dback = to_data_backref(back);
10046 * We only pay attention to backrefs that we found a real
10049 if (dback->found_ref == 0)
10053 * For now we only catch when the bytes don't match, not the
10054 * bytenr. We can easily do this at the same time, but I want
10055 * to have a fs image to test on before we just add repair
10056 * functionality willy-nilly so we know we won't screw up the
10060 entry = find_entry(&entries, dback->disk_bytenr,
10063 entry = malloc(sizeof(struct extent_entry));
10068 memset(entry, 0, sizeof(*entry));
10069 entry->bytenr = dback->disk_bytenr;
10070 entry->bytes = dback->bytes;
10071 list_add_tail(&entry->list, &entries);
10076 * If we only have on entry we may think the entries agree when
10077 * in reality they don't so we have to do some extra checking.
10079 if (dback->disk_bytenr != rec->start ||
10080 dback->bytes != rec->nr || back->broken)
10083 if (back->broken) {
10091 /* Yay all the backrefs agree, carry on good sir */
10092 if (nr_entries <= 1 && !mismatch)
10095 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10096 "%Lu\n", rec->start);
10099 * First we want to see if the backrefs can agree amongst themselves who
10100 * is right, so figure out which one of the entries has the highest
10103 best = find_most_right_entry(&entries);
10106 * Ok so we may have an even split between what the backrefs think, so
10107 * this is where we use the extent ref to see what it thinks.
10110 entry = find_entry(&entries, rec->start, rec->nr);
10111 if (!entry && (!broken_entries || !rec->found_rec)) {
10112 fprintf(stderr, "Backrefs don't agree with each other "
10113 "and extent record doesn't agree with anybody,"
10114 " so we can't fix bytenr %Lu bytes %Lu\n",
10115 rec->start, rec->nr);
10118 } else if (!entry) {
10120 * Ok our backrefs were broken, we'll assume this is the
10121 * correct value and add an entry for this range.
10123 entry = malloc(sizeof(struct extent_entry));
10128 memset(entry, 0, sizeof(*entry));
10129 entry->bytenr = rec->start;
10130 entry->bytes = rec->nr;
10131 list_add_tail(&entry->list, &entries);
10135 best = find_most_right_entry(&entries);
10137 fprintf(stderr, "Backrefs and extent record evenly "
10138 "split on who is right, this is going to "
10139 "require user input to fix bytenr %Lu bytes "
10140 "%Lu\n", rec->start, rec->nr);
10147 * I don't think this can happen currently as we'll abort() if we catch
10148 * this case higher up, but in case somebody removes that we still can't
10149 * deal with it properly here yet, so just bail out of that's the case.
10151 if (best->bytenr != rec->start) {
10152 fprintf(stderr, "Extent start and backref starts don't match, "
10153 "please use btrfs-image on this file system and send "
10154 "it to a btrfs developer so they can make fsck fix "
10155 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10156 rec->start, rec->nr);
10162 * Ok great we all agreed on an extent record, let's go find the real
10163 * references and fix up the ones that don't match.
10165 rbtree_postorder_for_each_entry_safe(back, tmp,
10166 &rec->backref_tree, node) {
10167 if (back->full_backref || !back->is_data)
10170 dback = to_data_backref(back);
10173 * Still ignoring backrefs that don't have a real ref attached
10176 if (dback->found_ref == 0)
10179 if (dback->bytes == best->bytes &&
10180 dback->disk_bytenr == best->bytenr)
10183 ret = repair_ref(info, path, dback, best);
10189 * Ok we messed with the actual refs, which means we need to drop our
10190 * entire cache and go back and rescan. I know this is a huge pain and
10191 * adds a lot of extra work, but it's the only way to be safe. Once all
10192 * the backrefs agree we may not need to do anything to the extent
10197 while (!list_empty(&entries)) {
10198 entry = list_entry(entries.next, struct extent_entry, list);
10199 list_del_init(&entry->list);
10205 static int process_duplicates(struct cache_tree *extent_cache,
10206 struct extent_record *rec)
10208 struct extent_record *good, *tmp;
10209 struct cache_extent *cache;
10213 * If we found a extent record for this extent then return, or if we
10214 * have more than one duplicate we are likely going to need to delete
10217 if (rec->found_rec || rec->num_duplicates > 1)
10220 /* Shouldn't happen but just in case */
10221 BUG_ON(!rec->num_duplicates);
10224 * So this happens if we end up with a backref that doesn't match the
10225 * actual extent entry. So either the backref is bad or the extent
10226 * entry is bad. Either way we want to have the extent_record actually
10227 * reflect what we found in the extent_tree, so we need to take the
10228 * duplicate out and use that as the extent_record since the only way we
10229 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10231 remove_cache_extent(extent_cache, &rec->cache);
10233 good = to_extent_record(rec->dups.next);
10234 list_del_init(&good->list);
10235 INIT_LIST_HEAD(&good->backrefs);
10236 INIT_LIST_HEAD(&good->dups);
10237 good->cache.start = good->start;
10238 good->cache.size = good->nr;
10239 good->content_checked = 0;
10240 good->owner_ref_checked = 0;
10241 good->num_duplicates = 0;
10242 good->refs = rec->refs;
10243 list_splice_init(&rec->backrefs, &good->backrefs);
10245 cache = lookup_cache_extent(extent_cache, good->start,
10249 tmp = container_of(cache, struct extent_record, cache);
10252 * If we find another overlapping extent and it's found_rec is
10253 * set then it's a duplicate and we need to try and delete
10256 if (tmp->found_rec || tmp->num_duplicates > 0) {
10257 if (list_empty(&good->list))
10258 list_add_tail(&good->list,
10259 &duplicate_extents);
10260 good->num_duplicates += tmp->num_duplicates + 1;
10261 list_splice_init(&tmp->dups, &good->dups);
10262 list_del_init(&tmp->list);
10263 list_add_tail(&tmp->list, &good->dups);
10264 remove_cache_extent(extent_cache, &tmp->cache);
10269 * Ok we have another non extent item backed extent rec, so lets
10270 * just add it to this extent and carry on like we did above.
10272 good->refs += tmp->refs;
10273 list_splice_init(&tmp->backrefs, &good->backrefs);
10274 remove_cache_extent(extent_cache, &tmp->cache);
10277 ret = insert_cache_extent(extent_cache, &good->cache);
10280 return good->num_duplicates ? 0 : 1;
10283 static int delete_duplicate_records(struct btrfs_root *root,
10284 struct extent_record *rec)
10286 struct btrfs_trans_handle *trans;
10287 LIST_HEAD(delete_list);
10288 struct btrfs_path path;
10289 struct extent_record *tmp, *good, *n;
10292 struct btrfs_key key;
10294 btrfs_init_path(&path);
10297 /* Find the record that covers all of the duplicates. */
10298 list_for_each_entry(tmp, &rec->dups, list) {
10299 if (good->start < tmp->start)
10301 if (good->nr > tmp->nr)
10304 if (tmp->start + tmp->nr < good->start + good->nr) {
10305 fprintf(stderr, "Ok we have overlapping extents that "
10306 "aren't completely covered by each other, this "
10307 "is going to require more careful thought. "
10308 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10309 tmp->start, tmp->nr, good->start, good->nr);
10316 list_add_tail(&rec->list, &delete_list);
10318 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10321 list_move_tail(&tmp->list, &delete_list);
10324 root = root->fs_info->extent_root;
10325 trans = btrfs_start_transaction(root, 1);
10326 if (IS_ERR(trans)) {
10327 ret = PTR_ERR(trans);
10331 list_for_each_entry(tmp, &delete_list, list) {
10332 if (tmp->found_rec == 0)
10334 key.objectid = tmp->start;
10335 key.type = BTRFS_EXTENT_ITEM_KEY;
10336 key.offset = tmp->nr;
10338 /* Shouldn't happen but just in case */
10339 if (tmp->metadata) {
10340 fprintf(stderr, "Well this shouldn't happen, extent "
10341 "record overlaps but is metadata? "
10342 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10346 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10352 ret = btrfs_del_item(trans, root, &path);
10355 btrfs_release_path(&path);
10358 err = btrfs_commit_transaction(trans, root);
10362 while (!list_empty(&delete_list)) {
10363 tmp = to_extent_record(delete_list.next);
10364 list_del_init(&tmp->list);
10370 while (!list_empty(&rec->dups)) {
10371 tmp = to_extent_record(rec->dups.next);
10372 list_del_init(&tmp->list);
10376 btrfs_release_path(&path);
10378 if (!ret && !nr_del)
10379 rec->num_duplicates = 0;
10381 return ret ? ret : nr_del;
10384 static int find_possible_backrefs(struct btrfs_fs_info *info,
10385 struct btrfs_path *path,
10386 struct cache_tree *extent_cache,
10387 struct extent_record *rec)
10389 struct btrfs_root *root;
10390 struct extent_backref *back, *tmp;
10391 struct data_backref *dback;
10392 struct cache_extent *cache;
10393 struct btrfs_file_extent_item *fi;
10394 struct btrfs_key key;
10398 rbtree_postorder_for_each_entry_safe(back, tmp,
10399 &rec->backref_tree, node) {
10400 /* Don't care about full backrefs (poor unloved backrefs) */
10401 if (back->full_backref || !back->is_data)
10404 dback = to_data_backref(back);
10406 /* We found this one, we don't need to do a lookup */
10407 if (dback->found_ref)
10410 key.objectid = dback->root;
10411 key.type = BTRFS_ROOT_ITEM_KEY;
10412 key.offset = (u64)-1;
10414 root = btrfs_read_fs_root(info, &key);
10416 /* No root, definitely a bad ref, skip */
10417 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10419 /* Other err, exit */
10421 return PTR_ERR(root);
10423 key.objectid = dback->owner;
10424 key.type = BTRFS_EXTENT_DATA_KEY;
10425 key.offset = dback->offset;
10426 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10428 btrfs_release_path(path);
10431 /* Didn't find it, we can carry on */
10436 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10437 struct btrfs_file_extent_item);
10438 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10439 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10440 btrfs_release_path(path);
10441 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10443 struct extent_record *tmp;
10444 tmp = container_of(cache, struct extent_record, cache);
10447 * If we found an extent record for the bytenr for this
10448 * particular backref then we can't add it to our
10449 * current extent record. We only want to add backrefs
10450 * that don't have a corresponding extent item in the
10451 * extent tree since they likely belong to this record
10452 * and we need to fix it if it doesn't match bytenrs.
10454 if (tmp->found_rec)
10458 dback->found_ref += 1;
10459 dback->disk_bytenr = bytenr;
10460 dback->bytes = bytes;
10463 * Set this so the verify backref code knows not to trust the
10464 * values in this backref.
10473 * Record orphan data ref into corresponding root.
10475 * Return 0 if the extent item contains data ref and recorded.
10476 * Return 1 if the extent item contains no useful data ref
10477 * On that case, it may contains only shared_dataref or metadata backref
10478 * or the file extent exists(this should be handled by the extent bytenr
10479 * recovery routine)
10480 * Return <0 if something goes wrong.
10482 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10483 struct extent_record *rec)
10485 struct btrfs_key key;
10486 struct btrfs_root *dest_root;
10487 struct extent_backref *back, *tmp;
10488 struct data_backref *dback;
10489 struct orphan_data_extent *orphan;
10490 struct btrfs_path path;
10491 int recorded_data_ref = 0;
10496 btrfs_init_path(&path);
10497 rbtree_postorder_for_each_entry_safe(back, tmp,
10498 &rec->backref_tree, node) {
10499 if (back->full_backref || !back->is_data ||
10500 !back->found_extent_tree)
10502 dback = to_data_backref(back);
10503 if (dback->found_ref)
10505 key.objectid = dback->root;
10506 key.type = BTRFS_ROOT_ITEM_KEY;
10507 key.offset = (u64)-1;
10509 dest_root = btrfs_read_fs_root(fs_info, &key);
10511 /* For non-exist root we just skip it */
10512 if (IS_ERR(dest_root) || !dest_root)
10515 key.objectid = dback->owner;
10516 key.type = BTRFS_EXTENT_DATA_KEY;
10517 key.offset = dback->offset;
10519 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10520 btrfs_release_path(&path);
10522 * For ret < 0, it's OK since the fs-tree may be corrupted,
10523 * we need to record it for inode/file extent rebuild.
10524 * For ret > 0, we record it only for file extent rebuild.
10525 * For ret == 0, the file extent exists but only bytenr
10526 * mismatch, let the original bytenr fix routine to handle,
10532 orphan = malloc(sizeof(*orphan));
10537 INIT_LIST_HEAD(&orphan->list);
10538 orphan->root = dback->root;
10539 orphan->objectid = dback->owner;
10540 orphan->offset = dback->offset;
10541 orphan->disk_bytenr = rec->cache.start;
10542 orphan->disk_len = rec->cache.size;
10543 list_add(&dest_root->orphan_data_extents, &orphan->list);
10544 recorded_data_ref = 1;
10547 btrfs_release_path(&path);
10549 return !recorded_data_ref;
10555 * when an incorrect extent item is found, this will delete
10556 * all of the existing entries for it and recreate them
10557 * based on what the tree scan found.
10559 static int fixup_extent_refs(struct btrfs_fs_info *info,
10560 struct cache_tree *extent_cache,
10561 struct extent_record *rec)
10563 struct btrfs_trans_handle *trans = NULL;
10565 struct btrfs_path path;
10566 struct cache_extent *cache;
10567 struct extent_backref *back, *tmp;
10571 if (rec->flag_block_full_backref)
10572 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10574 btrfs_init_path(&path);
10575 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10577 * Sometimes the backrefs themselves are so broken they don't
10578 * get attached to any meaningful rec, so first go back and
10579 * check any of our backrefs that we couldn't find and throw
10580 * them into the list if we find the backref so that
10581 * verify_backrefs can figure out what to do.
10583 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10588 /* step one, make sure all of the backrefs agree */
10589 ret = verify_backrefs(info, &path, rec);
10593 trans = btrfs_start_transaction(info->extent_root, 1);
10594 if (IS_ERR(trans)) {
10595 ret = PTR_ERR(trans);
10599 /* step two, delete all the existing records */
10600 ret = delete_extent_records(trans, info->extent_root, &path,
10606 /* was this block corrupt? If so, don't add references to it */
10607 cache = lookup_cache_extent(info->corrupt_blocks,
10608 rec->start, rec->max_size);
10614 /* step three, recreate all the refs we did find */
10615 rbtree_postorder_for_each_entry_safe(back, tmp,
10616 &rec->backref_tree, node) {
10618 * if we didn't find any references, don't create a
10619 * new extent record
10621 if (!back->found_ref)
10624 rec->bad_full_backref = 0;
10625 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10633 int err = btrfs_commit_transaction(trans, info->extent_root);
10639 fprintf(stderr, "Repaired extent references for %llu\n",
10640 (unsigned long long)rec->start);
10642 btrfs_release_path(&path);
10646 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10647 struct extent_record *rec)
10649 struct btrfs_trans_handle *trans;
10650 struct btrfs_root *root = fs_info->extent_root;
10651 struct btrfs_path path;
10652 struct btrfs_extent_item *ei;
10653 struct btrfs_key key;
10657 key.objectid = rec->start;
10658 if (rec->metadata) {
10659 key.type = BTRFS_METADATA_ITEM_KEY;
10660 key.offset = rec->info_level;
10662 key.type = BTRFS_EXTENT_ITEM_KEY;
10663 key.offset = rec->max_size;
10666 trans = btrfs_start_transaction(root, 0);
10668 return PTR_ERR(trans);
10670 btrfs_init_path(&path);
10671 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10673 btrfs_release_path(&path);
10674 btrfs_commit_transaction(trans, root);
10677 fprintf(stderr, "Didn't find extent for %llu\n",
10678 (unsigned long long)rec->start);
10679 btrfs_release_path(&path);
10680 btrfs_commit_transaction(trans, root);
10684 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10685 struct btrfs_extent_item);
10686 flags = btrfs_extent_flags(path.nodes[0], ei);
10687 if (rec->flag_block_full_backref) {
10688 fprintf(stderr, "setting full backref on %llu\n",
10689 (unsigned long long)key.objectid);
10690 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10692 fprintf(stderr, "clearing full backref on %llu\n",
10693 (unsigned long long)key.objectid);
10694 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10697 btrfs_mark_buffer_dirty(path.nodes[0]);
10698 btrfs_release_path(&path);
10699 ret = btrfs_commit_transaction(trans, root);
10701 fprintf(stderr, "Repaired extent flags for %llu\n",
10702 (unsigned long long)rec->start);
10707 /* right now we only prune from the extent allocation tree */
10708 static int prune_one_block(struct btrfs_trans_handle *trans,
10709 struct btrfs_fs_info *info,
10710 struct btrfs_corrupt_block *corrupt)
10713 struct btrfs_path path;
10714 struct extent_buffer *eb;
10718 int level = corrupt->level + 1;
10720 btrfs_init_path(&path);
10722 /* we want to stop at the parent to our busted block */
10723 path.lowest_level = level;
10725 ret = btrfs_search_slot(trans, info->extent_root,
10726 &corrupt->key, &path, -1, 1);
10731 eb = path.nodes[level];
10738 * hopefully the search gave us the block we want to prune,
10739 * lets try that first
10741 slot = path.slots[level];
10742 found = btrfs_node_blockptr(eb, slot);
10743 if (found == corrupt->cache.start)
10746 nritems = btrfs_header_nritems(eb);
10748 /* the search failed, lets scan this node and hope we find it */
10749 for (slot = 0; slot < nritems; slot++) {
10750 found = btrfs_node_blockptr(eb, slot);
10751 if (found == corrupt->cache.start)
10755 * we couldn't find the bad block. TODO, search all the nodes for pointers
10758 if (eb == info->extent_root->node) {
10763 btrfs_release_path(&path);
10768 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10769 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10772 btrfs_release_path(&path);
10776 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10778 struct btrfs_trans_handle *trans = NULL;
10779 struct cache_extent *cache;
10780 struct btrfs_corrupt_block *corrupt;
10783 cache = search_cache_extent(info->corrupt_blocks, 0);
10787 trans = btrfs_start_transaction(info->extent_root, 1);
10789 return PTR_ERR(trans);
10791 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10792 prune_one_block(trans, info, corrupt);
10793 remove_cache_extent(info->corrupt_blocks, cache);
10796 return btrfs_commit_transaction(trans, info->extent_root);
10800 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10802 struct btrfs_block_group_cache *cache;
10807 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10808 &start, &end, EXTENT_DIRTY);
10811 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10816 cache = btrfs_lookup_first_block_group(fs_info, start);
10821 start = cache->key.objectid + cache->key.offset;
10825 static int check_extent_refs(struct btrfs_root *root,
10826 struct cache_tree *extent_cache)
10828 struct extent_record *rec;
10829 struct cache_extent *cache;
10835 * if we're doing a repair, we have to make sure
10836 * we don't allocate from the problem extents.
10837 * In the worst case, this will be all the
10838 * extents in the FS
10840 cache = search_cache_extent(extent_cache, 0);
10842 rec = container_of(cache, struct extent_record, cache);
10843 set_extent_dirty(root->fs_info->excluded_extents,
10845 rec->start + rec->max_size - 1);
10846 cache = next_cache_extent(cache);
10849 /* pin down all the corrupted blocks too */
10850 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10852 set_extent_dirty(root->fs_info->excluded_extents,
10854 cache->start + cache->size - 1);
10855 cache = next_cache_extent(cache);
10857 prune_corrupt_blocks(root->fs_info);
10858 reset_cached_block_groups(root->fs_info);
10861 reset_cached_block_groups(root->fs_info);
10864 * We need to delete any duplicate entries we find first otherwise we
10865 * could mess up the extent tree when we have backrefs that actually
10866 * belong to a different extent item and not the weird duplicate one.
10868 while (repair && !list_empty(&duplicate_extents)) {
10869 rec = to_extent_record(duplicate_extents.next);
10870 list_del_init(&rec->list);
10872 /* Sometimes we can find a backref before we find an actual
10873 * extent, so we need to process it a little bit to see if there
10874 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10875 * if this is a backref screwup. If we need to delete stuff
10876 * process_duplicates() will return 0, otherwise it will return
10879 if (process_duplicates(extent_cache, rec))
10881 ret = delete_duplicate_records(root, rec);
10885 * delete_duplicate_records will return the number of entries
10886 * deleted, so if it's greater than 0 then we know we actually
10887 * did something and we need to remove.
10900 cache = search_cache_extent(extent_cache, 0);
10903 rec = container_of(cache, struct extent_record, cache);
10904 if (rec->num_duplicates) {
10905 fprintf(stderr, "extent item %llu has multiple extent "
10906 "items\n", (unsigned long long)rec->start);
10910 if (rec->refs != rec->extent_item_refs) {
10911 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10912 (unsigned long long)rec->start,
10913 (unsigned long long)rec->nr);
10914 fprintf(stderr, "extent item %llu, found %llu\n",
10915 (unsigned long long)rec->extent_item_refs,
10916 (unsigned long long)rec->refs);
10917 ret = record_orphan_data_extents(root->fs_info, rec);
10923 if (all_backpointers_checked(rec, 1)) {
10924 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10925 (unsigned long long)rec->start,
10926 (unsigned long long)rec->nr);
10930 if (!rec->owner_ref_checked) {
10931 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10932 (unsigned long long)rec->start,
10933 (unsigned long long)rec->nr);
10938 if (repair && fix) {
10939 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10945 if (rec->bad_full_backref) {
10946 fprintf(stderr, "bad full backref, on [%llu]\n",
10947 (unsigned long long)rec->start);
10949 ret = fixup_extent_flags(root->fs_info, rec);
10957 * Although it's not a extent ref's problem, we reuse this
10958 * routine for error reporting.
10959 * No repair function yet.
10961 if (rec->crossing_stripes) {
10963 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10964 rec->start, rec->start + rec->max_size);
10968 if (rec->wrong_chunk_type) {
10970 "bad extent [%llu, %llu), type mismatch with chunk\n",
10971 rec->start, rec->start + rec->max_size);
10975 remove_cache_extent(extent_cache, cache);
10976 free_all_extent_backrefs(rec);
10977 if (!init_extent_tree && repair && (!cur_err || fix))
10978 clear_extent_dirty(root->fs_info->excluded_extents,
10980 rec->start + rec->max_size - 1);
10985 if (ret && ret != -EAGAIN) {
10986 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10989 struct btrfs_trans_handle *trans;
10991 root = root->fs_info->extent_root;
10992 trans = btrfs_start_transaction(root, 1);
10993 if (IS_ERR(trans)) {
10994 ret = PTR_ERR(trans);
10998 ret = btrfs_fix_block_accounting(trans, root);
11001 ret = btrfs_commit_transaction(trans, root);
11010 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11014 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11015 stripe_size = length;
11016 stripe_size /= num_stripes;
11017 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11018 stripe_size = length * 2;
11019 stripe_size /= num_stripes;
11020 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11021 stripe_size = length;
11022 stripe_size /= (num_stripes - 1);
11023 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11024 stripe_size = length;
11025 stripe_size /= (num_stripes - 2);
11027 stripe_size = length;
11029 return stripe_size;
11033 * Check the chunk with its block group/dev list ref:
11034 * Return 0 if all refs seems valid.
11035 * Return 1 if part of refs seems valid, need later check for rebuild ref
11036 * like missing block group and needs to search extent tree to rebuild them.
11037 * Return -1 if essential refs are missing and unable to rebuild.
11039 static int check_chunk_refs(struct chunk_record *chunk_rec,
11040 struct block_group_tree *block_group_cache,
11041 struct device_extent_tree *dev_extent_cache,
11044 struct cache_extent *block_group_item;
11045 struct block_group_record *block_group_rec;
11046 struct cache_extent *dev_extent_item;
11047 struct device_extent_record *dev_extent_rec;
11051 int metadump_v2 = 0;
11055 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11057 chunk_rec->length);
11058 if (block_group_item) {
11059 block_group_rec = container_of(block_group_item,
11060 struct block_group_record,
11062 if (chunk_rec->length != block_group_rec->offset ||
11063 chunk_rec->offset != block_group_rec->objectid ||
11065 chunk_rec->type_flags != block_group_rec->flags)) {
11068 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11069 chunk_rec->objectid,
11074 chunk_rec->type_flags,
11075 block_group_rec->objectid,
11076 block_group_rec->type,
11077 block_group_rec->offset,
11078 block_group_rec->offset,
11079 block_group_rec->objectid,
11080 block_group_rec->flags);
11083 list_del_init(&block_group_rec->list);
11084 chunk_rec->bg_rec = block_group_rec;
11089 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11090 chunk_rec->objectid,
11095 chunk_rec->type_flags);
11102 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11103 chunk_rec->num_stripes);
11104 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11105 devid = chunk_rec->stripes[i].devid;
11106 offset = chunk_rec->stripes[i].offset;
11107 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11108 devid, offset, length);
11109 if (dev_extent_item) {
11110 dev_extent_rec = container_of(dev_extent_item,
11111 struct device_extent_record,
11113 if (dev_extent_rec->objectid != devid ||
11114 dev_extent_rec->offset != offset ||
11115 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11116 dev_extent_rec->length != length) {
11119 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11120 chunk_rec->objectid,
11123 chunk_rec->stripes[i].devid,
11124 chunk_rec->stripes[i].offset,
11125 dev_extent_rec->objectid,
11126 dev_extent_rec->offset,
11127 dev_extent_rec->length);
11130 list_move(&dev_extent_rec->chunk_list,
11131 &chunk_rec->dextents);
11136 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11137 chunk_rec->objectid,
11140 chunk_rec->stripes[i].devid,
11141 chunk_rec->stripes[i].offset);
11148 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11149 int check_chunks(struct cache_tree *chunk_cache,
11150 struct block_group_tree *block_group_cache,
11151 struct device_extent_tree *dev_extent_cache,
11152 struct list_head *good, struct list_head *bad,
11153 struct list_head *rebuild, int silent)
11155 struct cache_extent *chunk_item;
11156 struct chunk_record *chunk_rec;
11157 struct block_group_record *bg_rec;
11158 struct device_extent_record *dext_rec;
11162 chunk_item = first_cache_extent(chunk_cache);
11163 while (chunk_item) {
11164 chunk_rec = container_of(chunk_item, struct chunk_record,
11166 err = check_chunk_refs(chunk_rec, block_group_cache,
11167 dev_extent_cache, silent);
11170 if (err == 0 && good)
11171 list_add_tail(&chunk_rec->list, good);
11172 if (err > 0 && rebuild)
11173 list_add_tail(&chunk_rec->list, rebuild);
11174 if (err < 0 && bad)
11175 list_add_tail(&chunk_rec->list, bad);
11176 chunk_item = next_cache_extent(chunk_item);
11179 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11182 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11190 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11194 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11195 dext_rec->objectid,
11205 static int check_device_used(struct device_record *dev_rec,
11206 struct device_extent_tree *dext_cache)
11208 struct cache_extent *cache;
11209 struct device_extent_record *dev_extent_rec;
11210 u64 total_byte = 0;
11212 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11214 dev_extent_rec = container_of(cache,
11215 struct device_extent_record,
11217 if (dev_extent_rec->objectid != dev_rec->devid)
11220 list_del_init(&dev_extent_rec->device_list);
11221 total_byte += dev_extent_rec->length;
11222 cache = next_cache_extent(cache);
11225 if (total_byte != dev_rec->byte_used) {
11227 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11228 total_byte, dev_rec->byte_used, dev_rec->objectid,
11229 dev_rec->type, dev_rec->offset);
11236 /* check btrfs_dev_item -> btrfs_dev_extent */
11237 static int check_devices(struct rb_root *dev_cache,
11238 struct device_extent_tree *dev_extent_cache)
11240 struct rb_node *dev_node;
11241 struct device_record *dev_rec;
11242 struct device_extent_record *dext_rec;
11246 dev_node = rb_first(dev_cache);
11248 dev_rec = container_of(dev_node, struct device_record, node);
11249 err = check_device_used(dev_rec, dev_extent_cache);
11253 dev_node = rb_next(dev_node);
11255 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11258 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11259 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11266 static int add_root_item_to_list(struct list_head *head,
11267 u64 objectid, u64 bytenr, u64 last_snapshot,
11268 u8 level, u8 drop_level,
11269 struct btrfs_key *drop_key)
11272 struct root_item_record *ri_rec;
11273 ri_rec = malloc(sizeof(*ri_rec));
11276 ri_rec->bytenr = bytenr;
11277 ri_rec->objectid = objectid;
11278 ri_rec->level = level;
11279 ri_rec->drop_level = drop_level;
11280 ri_rec->last_snapshot = last_snapshot;
11282 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11283 list_add_tail(&ri_rec->list, head);
11288 static void free_root_item_list(struct list_head *list)
11290 struct root_item_record *ri_rec;
11292 while (!list_empty(list)) {
11293 ri_rec = list_first_entry(list, struct root_item_record,
11295 list_del_init(&ri_rec->list);
11300 static int deal_root_from_list(struct list_head *list,
11301 struct btrfs_root *root,
11302 struct block_info *bits,
11304 struct cache_tree *pending,
11305 struct cache_tree *seen,
11306 struct cache_tree *reada,
11307 struct cache_tree *nodes,
11308 struct cache_tree *extent_cache,
11309 struct cache_tree *chunk_cache,
11310 struct rb_root *dev_cache,
11311 struct block_group_tree *block_group_cache,
11312 struct device_extent_tree *dev_extent_cache)
11317 while (!list_empty(list)) {
11318 struct root_item_record *rec;
11319 struct extent_buffer *buf;
11320 rec = list_entry(list->next,
11321 struct root_item_record, list);
11323 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11324 if (!extent_buffer_uptodate(buf)) {
11325 free_extent_buffer(buf);
11329 ret = add_root_to_pending(buf, extent_cache, pending,
11330 seen, nodes, rec->objectid);
11334 * To rebuild extent tree, we need deal with snapshot
11335 * one by one, otherwise we deal with node firstly which
11336 * can maximize readahead.
11339 ret = run_next_block(root, bits, bits_nr, &last,
11340 pending, seen, reada, nodes,
11341 extent_cache, chunk_cache,
11342 dev_cache, block_group_cache,
11343 dev_extent_cache, rec);
11347 free_extent_buffer(buf);
11348 list_del(&rec->list);
11354 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11355 reada, nodes, extent_cache, chunk_cache,
11356 dev_cache, block_group_cache,
11357 dev_extent_cache, NULL);
11367 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11369 struct rb_root dev_cache;
11370 struct cache_tree chunk_cache;
11371 struct block_group_tree block_group_cache;
11372 struct device_extent_tree dev_extent_cache;
11373 struct cache_tree extent_cache;
11374 struct cache_tree seen;
11375 struct cache_tree pending;
11376 struct cache_tree reada;
11377 struct cache_tree nodes;
11378 struct extent_io_tree excluded_extents;
11379 struct cache_tree corrupt_blocks;
11380 struct btrfs_path path;
11381 struct btrfs_key key;
11382 struct btrfs_key found_key;
11384 struct block_info *bits;
11386 struct extent_buffer *leaf;
11388 struct btrfs_root_item ri;
11389 struct list_head dropping_trees;
11390 struct list_head normal_trees;
11391 struct btrfs_root *root1;
11392 struct btrfs_root *root;
11396 root = fs_info->fs_root;
11397 dev_cache = RB_ROOT;
11398 cache_tree_init(&chunk_cache);
11399 block_group_tree_init(&block_group_cache);
11400 device_extent_tree_init(&dev_extent_cache);
11402 cache_tree_init(&extent_cache);
11403 cache_tree_init(&seen);
11404 cache_tree_init(&pending);
11405 cache_tree_init(&nodes);
11406 cache_tree_init(&reada);
11407 cache_tree_init(&corrupt_blocks);
11408 extent_io_tree_init(&excluded_extents);
11409 INIT_LIST_HEAD(&dropping_trees);
11410 INIT_LIST_HEAD(&normal_trees);
11413 fs_info->excluded_extents = &excluded_extents;
11414 fs_info->fsck_extent_cache = &extent_cache;
11415 fs_info->free_extent_hook = free_extent_hook;
11416 fs_info->corrupt_blocks = &corrupt_blocks;
11420 bits = malloc(bits_nr * sizeof(struct block_info));
11426 if (ctx.progress_enabled) {
11427 ctx.tp = TASK_EXTENTS;
11428 task_start(ctx.info);
11432 root1 = fs_info->tree_root;
11433 level = btrfs_header_level(root1->node);
11434 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11435 root1->node->start, 0, level, 0, NULL);
11438 root1 = fs_info->chunk_root;
11439 level = btrfs_header_level(root1->node);
11440 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11441 root1->node->start, 0, level, 0, NULL);
11444 btrfs_init_path(&path);
11447 key.type = BTRFS_ROOT_ITEM_KEY;
11448 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11452 leaf = path.nodes[0];
11453 slot = path.slots[0];
11454 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11455 ret = btrfs_next_leaf(root, &path);
11458 leaf = path.nodes[0];
11459 slot = path.slots[0];
11461 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11462 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11463 unsigned long offset;
11466 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11467 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11468 last_snapshot = btrfs_root_last_snapshot(&ri);
11469 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11470 level = btrfs_root_level(&ri);
11471 ret = add_root_item_to_list(&normal_trees,
11472 found_key.objectid,
11473 btrfs_root_bytenr(&ri),
11474 last_snapshot, level,
11479 level = btrfs_root_level(&ri);
11480 objectid = found_key.objectid;
11481 btrfs_disk_key_to_cpu(&found_key,
11482 &ri.drop_progress);
11483 ret = add_root_item_to_list(&dropping_trees,
11485 btrfs_root_bytenr(&ri),
11486 last_snapshot, level,
11487 ri.drop_level, &found_key);
11494 btrfs_release_path(&path);
11497 * check_block can return -EAGAIN if it fixes something, please keep
11498 * this in mind when dealing with return values from these functions, if
11499 * we get -EAGAIN we want to fall through and restart the loop.
11501 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11502 &seen, &reada, &nodes, &extent_cache,
11503 &chunk_cache, &dev_cache, &block_group_cache,
11504 &dev_extent_cache);
11506 if (ret == -EAGAIN)
11510 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11511 &pending, &seen, &reada, &nodes,
11512 &extent_cache, &chunk_cache, &dev_cache,
11513 &block_group_cache, &dev_extent_cache);
11515 if (ret == -EAGAIN)
11520 ret = check_chunks(&chunk_cache, &block_group_cache,
11521 &dev_extent_cache, NULL, NULL, NULL, 0);
11523 if (ret == -EAGAIN)
11528 ret = check_extent_refs(root, &extent_cache);
11530 if (ret == -EAGAIN)
11535 ret = check_devices(&dev_cache, &dev_extent_cache);
11540 task_stop(ctx.info);
11542 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11543 extent_io_tree_cleanup(&excluded_extents);
11544 fs_info->fsck_extent_cache = NULL;
11545 fs_info->free_extent_hook = NULL;
11546 fs_info->corrupt_blocks = NULL;
11547 fs_info->excluded_extents = NULL;
11550 free_chunk_cache_tree(&chunk_cache);
11551 free_device_cache_tree(&dev_cache);
11552 free_block_group_tree(&block_group_cache);
11553 free_device_extent_tree(&dev_extent_cache);
11554 free_extent_cache_tree(&seen);
11555 free_extent_cache_tree(&pending);
11556 free_extent_cache_tree(&reada);
11557 free_extent_cache_tree(&nodes);
11558 free_root_item_list(&normal_trees);
11559 free_root_item_list(&dropping_trees);
11562 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11563 free_extent_cache_tree(&seen);
11564 free_extent_cache_tree(&pending);
11565 free_extent_cache_tree(&reada);
11566 free_extent_cache_tree(&nodes);
11567 free_chunk_cache_tree(&chunk_cache);
11568 free_block_group_tree(&block_group_cache);
11569 free_device_cache_tree(&dev_cache);
11570 free_device_extent_tree(&dev_extent_cache);
11571 free_extent_record_cache(&extent_cache);
11572 free_root_item_list(&normal_trees);
11573 free_root_item_list(&dropping_trees);
11574 extent_io_tree_cleanup(&excluded_extents);
11579 * Check backrefs of a tree block given by @bytenr or @eb.
11581 * @root: the root containing the @bytenr or @eb
11582 * @eb: tree block extent buffer, can be NULL
11583 * @bytenr: bytenr of the tree block to search
11584 * @level: tree level of the tree block
11585 * @owner: owner of the tree block
11587 * Return >0 for any error found and output error message
11588 * Return 0 for no error found
11590 static int check_tree_block_ref(struct btrfs_root *root,
11591 struct extent_buffer *eb, u64 bytenr,
11592 int level, u64 owner, struct node_refs *nrefs)
11594 struct btrfs_key key;
11595 struct btrfs_root *extent_root = root->fs_info->extent_root;
11596 struct btrfs_path path;
11597 struct btrfs_extent_item *ei;
11598 struct btrfs_extent_inline_ref *iref;
11599 struct extent_buffer *leaf;
11604 int root_level = btrfs_header_level(root->node);
11606 u32 nodesize = root->fs_info->nodesize;
11609 int tree_reloc_root = 0;
11616 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11617 btrfs_header_bytenr(root->node) == bytenr)
11618 tree_reloc_root = 1;
11619 btrfs_init_path(&path);
11620 key.objectid = bytenr;
11621 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11622 key.type = BTRFS_METADATA_ITEM_KEY;
11624 key.type = BTRFS_EXTENT_ITEM_KEY;
11625 key.offset = (u64)-1;
11627 /* Search for the backref in extent tree */
11628 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11630 err |= BACKREF_MISSING;
11633 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11635 err |= BACKREF_MISSING;
11639 leaf = path.nodes[0];
11640 slot = path.slots[0];
11641 btrfs_item_key_to_cpu(leaf, &key, slot);
11643 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11645 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11646 skinny_level = (int)key.offset;
11647 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11649 struct btrfs_tree_block_info *info;
11651 info = (struct btrfs_tree_block_info *)(ei + 1);
11652 skinny_level = btrfs_tree_block_level(leaf, info);
11653 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11662 * Due to the feature of shared tree blocks, if the upper node
11663 * is a fs root or shared node, the extent of checked node may
11664 * not be updated until the next CoW.
11667 strict = should_check_extent_strictly(root, nrefs,
11669 if (!(btrfs_extent_flags(leaf, ei) &
11670 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11672 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11673 key.objectid, nodesize,
11674 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11675 err = BACKREF_MISMATCH;
11677 header_gen = btrfs_header_generation(eb);
11678 extent_gen = btrfs_extent_generation(leaf, ei);
11679 if (header_gen != extent_gen) {
11681 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11682 key.objectid, nodesize, header_gen,
11684 err = BACKREF_MISMATCH;
11686 if (level != skinny_level) {
11688 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11689 key.objectid, nodesize, level, skinny_level);
11690 err = BACKREF_MISMATCH;
11692 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11694 "extent[%llu %u] is referred by other roots than %llu",
11695 key.objectid, nodesize, root->objectid);
11696 err = BACKREF_MISMATCH;
11701 * Iterate the extent/metadata item to find the exact backref
11703 item_size = btrfs_item_size_nr(leaf, slot);
11704 ptr = (unsigned long)iref;
11705 end = (unsigned long)ei + item_size;
11707 while (ptr < end) {
11708 iref = (struct btrfs_extent_inline_ref *)ptr;
11709 type = btrfs_extent_inline_ref_type(leaf, iref);
11710 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11712 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11713 if (offset == root->objectid)
11715 if (!strict && owner == offset)
11717 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11719 * Backref of tree reloc root points to itself, no need
11720 * to check backref any more.
11722 if (tree_reloc_root) {
11726 * Check if the backref points to valid
11729 found_ref = !check_tree_block_ref( root, NULL,
11730 offset, level + 1, owner,
11737 ptr += btrfs_extent_inline_ref_size(type);
11741 * Inlined extent item doesn't have what we need, check
11742 * TREE_BLOCK_REF_KEY
11745 btrfs_release_path(&path);
11746 key.objectid = bytenr;
11747 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11748 key.offset = root->objectid;
11750 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11755 err |= BACKREF_MISSING;
11757 btrfs_release_path(&path);
11758 if (nrefs && strict &&
11759 level < root_level && nrefs->full_backref[level + 1])
11760 parent = nrefs->bytenr[level + 1];
11761 if (eb && (err & BACKREF_MISSING))
11763 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11764 bytenr, nodesize, owner, level,
11765 parent ? "parent" : "root",
11766 parent ? parent : root->objectid);
11771 * If @err contains BACKREF_MISSING then add extent of the
11772 * file_extent_data_item.
11774 * Returns error bits after reapir.
11776 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11777 struct btrfs_root *root,
11778 struct btrfs_path *pathp,
11779 struct node_refs *nrefs,
11782 struct btrfs_file_extent_item *fi;
11783 struct btrfs_key fi_key;
11784 struct btrfs_key key;
11785 struct btrfs_extent_item *ei;
11786 struct btrfs_path path;
11787 struct btrfs_root *extent_root = root->fs_info->extent_root;
11788 struct extent_buffer *eb;
11800 eb = pathp->nodes[0];
11801 slot = pathp->slots[0];
11802 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11803 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11805 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11806 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11809 file_offset = fi_key.offset;
11810 generation = btrfs_file_extent_generation(eb, fi);
11811 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11812 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11813 extent_offset = btrfs_file_extent_offset(eb, fi);
11814 offset = file_offset - extent_offset;
11816 /* now repair only adds backref */
11817 if ((err & BACKREF_MISSING) == 0)
11820 /* search extent item */
11821 key.objectid = disk_bytenr;
11822 key.type = BTRFS_EXTENT_ITEM_KEY;
11823 key.offset = num_bytes;
11825 btrfs_init_path(&path);
11826 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11832 /* insert an extent item */
11834 key.objectid = disk_bytenr;
11835 key.type = BTRFS_EXTENT_ITEM_KEY;
11836 key.offset = num_bytes;
11837 size = sizeof(*ei);
11839 btrfs_release_path(&path);
11840 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11844 eb = path.nodes[0];
11845 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11847 btrfs_set_extent_refs(eb, ei, 0);
11848 btrfs_set_extent_generation(eb, ei, generation);
11849 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11851 btrfs_mark_buffer_dirty(eb);
11852 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11854 btrfs_release_path(&path);
11857 if (nrefs->full_backref[0])
11858 parent = btrfs_header_bytenr(eb);
11862 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11864 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11868 "failed to increase extent data backref[%llu %llu] root %llu",
11869 disk_bytenr, num_bytes, root->objectid);
11872 printf("Add one extent data backref [%llu %llu]\n",
11873 disk_bytenr, num_bytes);
11876 err &= ~BACKREF_MISSING;
11879 error("can't repair root %llu extent data item[%llu %llu]",
11880 root->objectid, disk_bytenr, num_bytes);
11885 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11887 * Return >0 any error found and output error message
11888 * Return 0 for no error found
11890 static int check_extent_data_item(struct btrfs_root *root,
11891 struct btrfs_path *pathp,
11892 struct node_refs *nrefs, int account_bytes)
11894 struct btrfs_file_extent_item *fi;
11895 struct extent_buffer *eb = pathp->nodes[0];
11896 struct btrfs_path path;
11897 struct btrfs_root *extent_root = root->fs_info->extent_root;
11898 struct btrfs_key fi_key;
11899 struct btrfs_key dbref_key;
11900 struct extent_buffer *leaf;
11901 struct btrfs_extent_item *ei;
11902 struct btrfs_extent_inline_ref *iref;
11903 struct btrfs_extent_data_ref *dref;
11906 u64 disk_num_bytes;
11907 u64 extent_num_bytes;
11914 int found_dbackref = 0;
11915 int slot = pathp->slots[0];
11920 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11921 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11923 /* Nothing to check for hole and inline data extents */
11924 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11925 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11928 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11929 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11930 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11932 /* Check unaligned disk_num_bytes and num_bytes */
11933 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11935 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11936 fi_key.objectid, fi_key.offset, disk_num_bytes,
11937 root->fs_info->sectorsize);
11938 err |= BYTES_UNALIGNED;
11939 } else if (account_bytes) {
11940 data_bytes_allocated += disk_num_bytes;
11942 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11944 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11945 fi_key.objectid, fi_key.offset, extent_num_bytes,
11946 root->fs_info->sectorsize);
11947 err |= BYTES_UNALIGNED;
11948 } else if (account_bytes) {
11949 data_bytes_referenced += extent_num_bytes;
11951 owner = btrfs_header_owner(eb);
11953 /* Check the extent item of the file extent in extent tree */
11954 btrfs_init_path(&path);
11955 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11956 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11957 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11959 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11963 leaf = path.nodes[0];
11964 slot = path.slots[0];
11965 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11967 extent_flags = btrfs_extent_flags(leaf, ei);
11969 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11971 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11972 disk_bytenr, disk_num_bytes,
11973 BTRFS_EXTENT_FLAG_DATA);
11974 err |= BACKREF_MISMATCH;
11977 /* Check data backref inside that extent item */
11978 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11979 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11980 ptr = (unsigned long)iref;
11981 end = (unsigned long)ei + item_size;
11982 strict = should_check_extent_strictly(root, nrefs, -1);
11984 while (ptr < end) {
11985 iref = (struct btrfs_extent_inline_ref *)ptr;
11986 type = btrfs_extent_inline_ref_type(leaf, iref);
11987 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11989 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11990 ref_root = btrfs_extent_data_ref_root(leaf, dref);
11991 if (ref_root == root->objectid)
11992 found_dbackref = 1;
11993 else if (!strict && owner == ref_root)
11994 found_dbackref = 1;
11995 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11996 found_dbackref = !check_tree_block_ref(root, NULL,
11997 btrfs_extent_inline_ref_offset(leaf, iref),
12001 if (found_dbackref)
12003 ptr += btrfs_extent_inline_ref_size(type);
12006 if (!found_dbackref) {
12007 btrfs_release_path(&path);
12009 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12010 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12011 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12012 dbref_key.offset = hash_extent_data_ref(root->objectid,
12013 fi_key.objectid, fi_key.offset);
12015 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12016 &dbref_key, &path, 0, 0);
12018 found_dbackref = 1;
12022 btrfs_release_path(&path);
12025 * Neither inlined nor EXTENT_DATA_REF found, try
12026 * SHARED_DATA_REF as last chance.
12028 dbref_key.objectid = disk_bytenr;
12029 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12030 dbref_key.offset = eb->start;
12032 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12033 &dbref_key, &path, 0, 0);
12035 found_dbackref = 1;
12041 if (!found_dbackref)
12042 err |= BACKREF_MISSING;
12043 btrfs_release_path(&path);
12044 if (err & BACKREF_MISSING) {
12045 error("data extent[%llu %llu] backref lost",
12046 disk_bytenr, disk_num_bytes);
12052 * Get real tree block level for the case like shared block
12053 * Return >= 0 as tree level
12054 * Return <0 for error
12056 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12058 struct extent_buffer *eb;
12059 struct btrfs_path path;
12060 struct btrfs_key key;
12061 struct btrfs_extent_item *ei;
12068 /* Search extent tree for extent generation and level */
12069 key.objectid = bytenr;
12070 key.type = BTRFS_METADATA_ITEM_KEY;
12071 key.offset = (u64)-1;
12073 btrfs_init_path(&path);
12074 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12077 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12085 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12086 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12087 struct btrfs_extent_item);
12088 flags = btrfs_extent_flags(path.nodes[0], ei);
12089 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12094 /* Get transid for later read_tree_block() check */
12095 transid = btrfs_extent_generation(path.nodes[0], ei);
12097 /* Get backref level as one source */
12098 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12099 backref_level = key.offset;
12101 struct btrfs_tree_block_info *info;
12103 info = (struct btrfs_tree_block_info *)(ei + 1);
12104 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12106 btrfs_release_path(&path);
12108 /* Get level from tree block as an alternative source */
12109 eb = read_tree_block(fs_info, bytenr, transid);
12110 if (!extent_buffer_uptodate(eb)) {
12111 free_extent_buffer(eb);
12114 header_level = btrfs_header_level(eb);
12115 free_extent_buffer(eb);
12117 if (header_level != backref_level)
12119 return header_level;
12122 btrfs_release_path(&path);
12127 * Check if a tree block backref is valid (points to a valid tree block)
12128 * if level == -1, level will be resolved
12129 * Return >0 for any error found and print error message
12131 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12132 u64 bytenr, int level)
12134 struct btrfs_root *root;
12135 struct btrfs_key key;
12136 struct btrfs_path path;
12137 struct extent_buffer *eb;
12138 struct extent_buffer *node;
12139 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12143 /* Query level for level == -1 special case */
12145 level = query_tree_block_level(fs_info, bytenr);
12147 err |= REFERENCER_MISSING;
12151 key.objectid = root_id;
12152 key.type = BTRFS_ROOT_ITEM_KEY;
12153 key.offset = (u64)-1;
12155 root = btrfs_read_fs_root(fs_info, &key);
12156 if (IS_ERR(root)) {
12157 err |= REFERENCER_MISSING;
12161 /* Read out the tree block to get item/node key */
12162 eb = read_tree_block(fs_info, bytenr, 0);
12163 if (!extent_buffer_uptodate(eb)) {
12164 err |= REFERENCER_MISSING;
12165 free_extent_buffer(eb);
12169 /* Empty tree, no need to check key */
12170 if (!btrfs_header_nritems(eb) && !level) {
12171 free_extent_buffer(eb);
12176 btrfs_node_key_to_cpu(eb, &key, 0);
12178 btrfs_item_key_to_cpu(eb, &key, 0);
12180 free_extent_buffer(eb);
12182 btrfs_init_path(&path);
12183 path.lowest_level = level;
12184 /* Search with the first key, to ensure we can reach it */
12185 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12187 err |= REFERENCER_MISSING;
12191 node = path.nodes[level];
12192 if (btrfs_header_bytenr(node) != bytenr) {
12194 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12195 bytenr, nodesize, bytenr,
12196 btrfs_header_bytenr(node));
12197 err |= REFERENCER_MISMATCH;
12199 if (btrfs_header_level(node) != level) {
12201 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12202 bytenr, nodesize, level,
12203 btrfs_header_level(node));
12204 err |= REFERENCER_MISMATCH;
12208 btrfs_release_path(&path);
12210 if (err & REFERENCER_MISSING) {
12212 error("extent [%llu %d] lost referencer (owner: %llu)",
12213 bytenr, nodesize, root_id);
12216 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12217 bytenr, nodesize, root_id, level);
12224 * Check if tree block @eb is tree reloc root.
12225 * Return 0 if it's not or any problem happens
12226 * Return 1 if it's a tree reloc root
12228 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12229 struct extent_buffer *eb)
12231 struct btrfs_root *tree_reloc_root;
12232 struct btrfs_key key;
12233 u64 bytenr = btrfs_header_bytenr(eb);
12234 u64 owner = btrfs_header_owner(eb);
12237 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12238 key.offset = owner;
12239 key.type = BTRFS_ROOT_ITEM_KEY;
12241 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12242 if (IS_ERR(tree_reloc_root))
12245 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12247 btrfs_free_fs_root(tree_reloc_root);
12252 * Check referencer for shared block backref
12253 * If level == -1, this function will resolve the level.
12255 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12256 u64 parent, u64 bytenr, int level)
12258 struct extent_buffer *eb;
12260 int found_parent = 0;
12263 eb = read_tree_block(fs_info, parent, 0);
12264 if (!extent_buffer_uptodate(eb))
12268 level = query_tree_block_level(fs_info, bytenr);
12272 /* It's possible it's a tree reloc root */
12273 if (parent == bytenr) {
12274 if (is_tree_reloc_root(fs_info, eb))
12279 if (level + 1 != btrfs_header_level(eb))
12282 nr = btrfs_header_nritems(eb);
12283 for (i = 0; i < nr; i++) {
12284 if (bytenr == btrfs_node_blockptr(eb, i)) {
12290 free_extent_buffer(eb);
12291 if (!found_parent) {
12293 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12294 bytenr, fs_info->nodesize, parent, level);
12295 return REFERENCER_MISSING;
12301 * Check referencer for normal (inlined) data ref
12302 * If len == 0, it will be resolved by searching in extent tree
12304 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12305 u64 root_id, u64 objectid, u64 offset,
12306 u64 bytenr, u64 len, u32 count)
12308 struct btrfs_root *root;
12309 struct btrfs_root *extent_root = fs_info->extent_root;
12310 struct btrfs_key key;
12311 struct btrfs_path path;
12312 struct extent_buffer *leaf;
12313 struct btrfs_file_extent_item *fi;
12314 u32 found_count = 0;
12319 key.objectid = bytenr;
12320 key.type = BTRFS_EXTENT_ITEM_KEY;
12321 key.offset = (u64)-1;
12323 btrfs_init_path(&path);
12324 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12327 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12330 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12331 if (key.objectid != bytenr ||
12332 key.type != BTRFS_EXTENT_ITEM_KEY)
12335 btrfs_release_path(&path);
12337 key.objectid = root_id;
12338 key.type = BTRFS_ROOT_ITEM_KEY;
12339 key.offset = (u64)-1;
12340 btrfs_init_path(&path);
12342 root = btrfs_read_fs_root(fs_info, &key);
12346 key.objectid = objectid;
12347 key.type = BTRFS_EXTENT_DATA_KEY;
12349 * It can be nasty as data backref offset is
12350 * file offset - file extent offset, which is smaller or
12351 * equal to original backref offset. The only special case is
12352 * overflow. So we need to special check and do further search.
12354 key.offset = offset & (1ULL << 63) ? 0 : offset;
12356 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12361 * Search afterwards to get correct one
12362 * NOTE: As we must do a comprehensive check on the data backref to
12363 * make sure the dref count also matches, we must iterate all file
12364 * extents for that inode.
12367 leaf = path.nodes[0];
12368 slot = path.slots[0];
12370 if (slot >= btrfs_header_nritems(leaf))
12372 btrfs_item_key_to_cpu(leaf, &key, slot);
12373 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12375 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12377 * Except normal disk bytenr and disk num bytes, we still
12378 * need to do extra check on dbackref offset as
12379 * dbackref offset = file_offset - file_extent_offset
12381 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12382 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12383 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12388 ret = btrfs_next_item(root, &path);
12393 btrfs_release_path(&path);
12394 if (found_count != count) {
12396 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12397 bytenr, len, root_id, objectid, offset, count, found_count);
12398 return REFERENCER_MISSING;
12404 * Check if the referencer of a shared data backref exists
12406 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12407 u64 parent, u64 bytenr)
12409 struct extent_buffer *eb;
12410 struct btrfs_key key;
12411 struct btrfs_file_extent_item *fi;
12413 int found_parent = 0;
12416 eb = read_tree_block(fs_info, parent, 0);
12417 if (!extent_buffer_uptodate(eb))
12420 nr = btrfs_header_nritems(eb);
12421 for (i = 0; i < nr; i++) {
12422 btrfs_item_key_to_cpu(eb, &key, i);
12423 if (key.type != BTRFS_EXTENT_DATA_KEY)
12426 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12427 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12430 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12437 free_extent_buffer(eb);
12438 if (!found_parent) {
12439 error("shared extent %llu referencer lost (parent: %llu)",
12441 return REFERENCER_MISSING;
12447 * Only delete backref if REFERENCER_MISSING now
12449 * Returns <0 the extent was deleted
12450 * Returns >0 the backref was deleted but extent still exists, returned value
12451 * means error after repair
12452 * Returns 0 nothing happened
12454 static int repair_extent_item(struct btrfs_trans_handle *trans,
12455 struct btrfs_root *root, struct btrfs_path *path,
12456 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12457 u64 owner, u64 offset, int err)
12459 struct btrfs_key old_key;
12463 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12465 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12466 /* delete the backref */
12467 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12468 num_bytes, parent, root_objectid, owner, offset);
12471 err &= ~REFERENCER_MISSING;
12472 printf("Delete backref in extent [%llu %llu]\n",
12473 bytenr, num_bytes);
12475 error("fail to delete backref in extent [%llu %llu]",
12476 bytenr, num_bytes);
12480 /* btrfs_free_extent may delete the extent */
12481 btrfs_release_path(path);
12482 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12492 * This function will check a given extent item, including its backref and
12493 * itself (like crossing stripe boundary and type)
12495 * Since we don't use extent_record anymore, introduce new error bit
12497 static int check_extent_item(struct btrfs_trans_handle *trans,
12498 struct btrfs_fs_info *fs_info,
12499 struct btrfs_path *path)
12501 struct btrfs_extent_item *ei;
12502 struct btrfs_extent_inline_ref *iref;
12503 struct btrfs_extent_data_ref *dref;
12504 struct extent_buffer *eb = path->nodes[0];
12507 int slot = path->slots[0];
12509 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12510 u32 item_size = btrfs_item_size_nr(eb, slot);
12520 struct btrfs_key key;
12524 btrfs_item_key_to_cpu(eb, &key, slot);
12525 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12526 bytes_used += key.offset;
12527 num_bytes = key.offset;
12529 bytes_used += nodesize;
12530 num_bytes = nodesize;
12533 if (item_size < sizeof(*ei)) {
12535 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12536 * old thing when on disk format is still un-determined.
12537 * No need to care about it anymore
12539 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12543 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12544 flags = btrfs_extent_flags(eb, ei);
12546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12548 if (metadata && check_crossing_stripes(global_info, key.objectid,
12550 error("bad metadata [%llu, %llu) crossing stripe boundary",
12551 key.objectid, key.objectid + nodesize);
12552 err |= CROSSING_STRIPE_BOUNDARY;
12555 ptr = (unsigned long)(ei + 1);
12557 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12558 /* Old EXTENT_ITEM metadata */
12559 struct btrfs_tree_block_info *info;
12561 info = (struct btrfs_tree_block_info *)ptr;
12562 level = btrfs_tree_block_level(eb, info);
12563 ptr += sizeof(struct btrfs_tree_block_info);
12565 /* New METADATA_ITEM */
12566 level = key.offset;
12568 end = (unsigned long)ei + item_size;
12571 /* Reached extent item end normally */
12575 /* Beyond extent item end, wrong item size */
12577 err |= ITEM_SIZE_MISMATCH;
12578 error("extent item at bytenr %llu slot %d has wrong size",
12587 /* Now check every backref in this extent item */
12588 iref = (struct btrfs_extent_inline_ref *)ptr;
12589 type = btrfs_extent_inline_ref_type(eb, iref);
12590 offset = btrfs_extent_inline_ref_offset(eb, iref);
12592 case BTRFS_TREE_BLOCK_REF_KEY:
12593 root_objectid = offset;
12595 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12599 case BTRFS_SHARED_BLOCK_REF_KEY:
12601 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12605 case BTRFS_EXTENT_DATA_REF_KEY:
12606 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12607 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12608 owner = btrfs_extent_data_ref_objectid(eb, dref);
12609 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12610 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12611 owner_offset, key.objectid, key.offset,
12612 btrfs_extent_data_ref_count(eb, dref));
12615 case BTRFS_SHARED_DATA_REF_KEY:
12617 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12621 error("extent[%llu %d %llu] has unknown ref type: %d",
12622 key.objectid, key.type, key.offset, type);
12623 ret = UNKNOWN_TYPE;
12628 if (err && repair) {
12629 ret = repair_extent_item(trans, fs_info->extent_root, path,
12630 key.objectid, num_bytes, parent, root_objectid,
12631 owner, owner_offset, ret);
12640 ptr += btrfs_extent_inline_ref_size(type);
12648 * Check if a dev extent item is referred correctly by its chunk
12650 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12651 struct extent_buffer *eb, int slot)
12653 struct btrfs_root *chunk_root = fs_info->chunk_root;
12654 struct btrfs_dev_extent *ptr;
12655 struct btrfs_path path;
12656 struct btrfs_key chunk_key;
12657 struct btrfs_key devext_key;
12658 struct btrfs_chunk *chunk;
12659 struct extent_buffer *l;
12663 int found_chunk = 0;
12666 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12667 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12668 length = btrfs_dev_extent_length(eb, ptr);
12670 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12671 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12672 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12674 btrfs_init_path(&path);
12675 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12680 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12681 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12686 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12689 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12690 for (i = 0; i < num_stripes; i++) {
12691 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12692 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12694 if (devid == devext_key.objectid &&
12695 offset == devext_key.offset) {
12701 btrfs_release_path(&path);
12702 if (!found_chunk) {
12704 "device extent[%llu, %llu, %llu] did not find the related chunk",
12705 devext_key.objectid, devext_key.offset, length);
12706 return REFERENCER_MISSING;
12712 * Check if the used space is correct with the dev item
12714 static int check_dev_item(struct btrfs_fs_info *fs_info,
12715 struct extent_buffer *eb, int slot)
12717 struct btrfs_root *dev_root = fs_info->dev_root;
12718 struct btrfs_dev_item *dev_item;
12719 struct btrfs_path path;
12720 struct btrfs_key key;
12721 struct btrfs_dev_extent *ptr;
12727 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12728 dev_id = btrfs_device_id(eb, dev_item);
12729 used = btrfs_device_bytes_used(eb, dev_item);
12731 key.objectid = dev_id;
12732 key.type = BTRFS_DEV_EXTENT_KEY;
12735 btrfs_init_path(&path);
12736 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12738 btrfs_item_key_to_cpu(eb, &key, slot);
12739 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12740 key.objectid, key.type, key.offset);
12741 btrfs_release_path(&path);
12742 return REFERENCER_MISSING;
12745 /* Iterate dev_extents to calculate the used space of a device */
12747 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12750 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12751 if (key.objectid > dev_id)
12753 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12756 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12757 struct btrfs_dev_extent);
12758 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12760 ret = btrfs_next_item(dev_root, &path);
12764 btrfs_release_path(&path);
12766 if (used != total) {
12767 btrfs_item_key_to_cpu(eb, &key, slot);
12769 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12770 total, used, BTRFS_ROOT_TREE_OBJECTID,
12771 BTRFS_DEV_EXTENT_KEY, dev_id);
12772 return ACCOUNTING_MISMATCH;
12778 * Check a block group item with its referener (chunk) and its used space
12779 * with extent/metadata item
12781 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12782 struct extent_buffer *eb, int slot)
12784 struct btrfs_root *extent_root = fs_info->extent_root;
12785 struct btrfs_root *chunk_root = fs_info->chunk_root;
12786 struct btrfs_block_group_item *bi;
12787 struct btrfs_block_group_item bg_item;
12788 struct btrfs_path path;
12789 struct btrfs_key bg_key;
12790 struct btrfs_key chunk_key;
12791 struct btrfs_key extent_key;
12792 struct btrfs_chunk *chunk;
12793 struct extent_buffer *leaf;
12794 struct btrfs_extent_item *ei;
12795 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12803 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12804 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12805 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12806 used = btrfs_block_group_used(&bg_item);
12807 bg_flags = btrfs_block_group_flags(&bg_item);
12809 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12810 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12811 chunk_key.offset = bg_key.objectid;
12813 btrfs_init_path(&path);
12814 /* Search for the referencer chunk */
12815 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12818 "block group[%llu %llu] did not find the related chunk item",
12819 bg_key.objectid, bg_key.offset);
12820 err |= REFERENCER_MISSING;
12822 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12823 struct btrfs_chunk);
12824 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12827 "block group[%llu %llu] related chunk item length does not match",
12828 bg_key.objectid, bg_key.offset);
12829 err |= REFERENCER_MISMATCH;
12832 btrfs_release_path(&path);
12834 /* Search from the block group bytenr */
12835 extent_key.objectid = bg_key.objectid;
12836 extent_key.type = 0;
12837 extent_key.offset = 0;
12839 btrfs_init_path(&path);
12840 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12844 /* Iterate extent tree to account used space */
12846 leaf = path.nodes[0];
12848 /* Search slot can point to the last item beyond leaf nritems */
12849 if (path.slots[0] >= btrfs_header_nritems(leaf))
12852 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12853 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12856 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12857 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12859 if (extent_key.objectid < bg_key.objectid)
12862 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12865 total += extent_key.offset;
12867 ei = btrfs_item_ptr(leaf, path.slots[0],
12868 struct btrfs_extent_item);
12869 flags = btrfs_extent_flags(leaf, ei);
12870 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12871 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12873 "bad extent[%llu, %llu) type mismatch with chunk",
12874 extent_key.objectid,
12875 extent_key.objectid + extent_key.offset);
12876 err |= CHUNK_TYPE_MISMATCH;
12878 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12879 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12880 BTRFS_BLOCK_GROUP_METADATA))) {
12882 "bad extent[%llu, %llu) type mismatch with chunk",
12883 extent_key.objectid,
12884 extent_key.objectid + nodesize);
12885 err |= CHUNK_TYPE_MISMATCH;
12889 ret = btrfs_next_item(extent_root, &path);
12895 btrfs_release_path(&path);
12897 if (total != used) {
12899 "block group[%llu %llu] used %llu but extent items used %llu",
12900 bg_key.objectid, bg_key.offset, used, total);
12901 err |= BG_ACCOUNTING_ERROR;
12907 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12908 * FIXME: We still need to repair error of dev_item.
12910 * Returns error after repair.
12912 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12913 struct btrfs_root *chunk_root,
12914 struct btrfs_path *path, int err)
12916 struct btrfs_chunk *chunk;
12917 struct btrfs_key chunk_key;
12918 struct extent_buffer *eb = path->nodes[0];
12920 int slot = path->slots[0];
12924 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12925 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12927 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12928 type = btrfs_chunk_type(path->nodes[0], chunk);
12929 length = btrfs_chunk_length(eb, chunk);
12931 if (err & REFERENCER_MISSING) {
12932 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12933 type, chunk_key.objectid, chunk_key.offset, length);
12935 error("fail to add block group item[%llu %llu]",
12936 chunk_key.offset, length);
12939 err &= ~REFERENCER_MISSING;
12940 printf("Added block group item[%llu %llu]\n",
12941 chunk_key.offset, length);
12950 * Check a chunk item.
12951 * Including checking all referred dev_extents and block group
12953 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12954 struct extent_buffer *eb, int slot)
12956 struct btrfs_root *extent_root = fs_info->extent_root;
12957 struct btrfs_root *dev_root = fs_info->dev_root;
12958 struct btrfs_path path;
12959 struct btrfs_key chunk_key;
12960 struct btrfs_key bg_key;
12961 struct btrfs_key devext_key;
12962 struct btrfs_chunk *chunk;
12963 struct extent_buffer *leaf;
12964 struct btrfs_block_group_item *bi;
12965 struct btrfs_block_group_item bg_item;
12966 struct btrfs_dev_extent *ptr;
12978 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12979 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12980 length = btrfs_chunk_length(eb, chunk);
12981 chunk_end = chunk_key.offset + length;
12982 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12985 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12987 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12990 type = btrfs_chunk_type(eb, chunk);
12992 bg_key.objectid = chunk_key.offset;
12993 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12994 bg_key.offset = length;
12996 btrfs_init_path(&path);
12997 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13000 "chunk[%llu %llu) did not find the related block group item",
13001 chunk_key.offset, chunk_end);
13002 err |= REFERENCER_MISSING;
13004 leaf = path.nodes[0];
13005 bi = btrfs_item_ptr(leaf, path.slots[0],
13006 struct btrfs_block_group_item);
13007 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13009 if (btrfs_block_group_flags(&bg_item) != type) {
13011 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13012 chunk_key.offset, chunk_end, type,
13013 btrfs_block_group_flags(&bg_item));
13014 err |= REFERENCER_MISSING;
13018 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13019 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13020 for (i = 0; i < num_stripes; i++) {
13021 btrfs_release_path(&path);
13022 btrfs_init_path(&path);
13023 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13024 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13025 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13027 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13030 goto not_match_dev;
13032 leaf = path.nodes[0];
13033 ptr = btrfs_item_ptr(leaf, path.slots[0],
13034 struct btrfs_dev_extent);
13035 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13036 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13037 if (objectid != chunk_key.objectid ||
13038 offset != chunk_key.offset ||
13039 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13040 goto not_match_dev;
13043 err |= BACKREF_MISSING;
13045 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13046 chunk_key.objectid, chunk_end, i);
13049 btrfs_release_path(&path);
13054 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13055 struct btrfs_root *root,
13056 struct btrfs_path *path)
13058 struct btrfs_key key;
13061 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13062 btrfs_release_path(path);
13063 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13069 ret = btrfs_del_item(trans, root, path);
13073 if (path->slots[0] == 0)
13074 btrfs_prev_leaf(root, path);
13079 error("failed to delete root %llu item[%llu, %u, %llu]",
13080 root->objectid, key.objectid, key.type, key.offset);
13082 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13083 root->objectid, key.objectid, key.type, key.offset);
13088 * Main entry function to check known items and update related accounting info
13090 static int check_leaf_items(struct btrfs_trans_handle *trans,
13091 struct btrfs_root *root, struct btrfs_path *path,
13092 struct node_refs *nrefs, int account_bytes)
13094 struct btrfs_fs_info *fs_info = root->fs_info;
13095 struct btrfs_key key;
13096 struct extent_buffer *eb;
13099 struct btrfs_extent_data_ref *dref;
13104 eb = path->nodes[0];
13105 slot = path->slots[0];
13106 if (slot >= btrfs_header_nritems(eb)) {
13108 error("empty leaf [%llu %u] root %llu", eb->start,
13109 root->fs_info->nodesize, root->objectid);
13115 btrfs_item_key_to_cpu(eb, &key, slot);
13119 case BTRFS_EXTENT_DATA_KEY:
13120 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13122 ret = repair_extent_data_item(trans, root, path, nrefs,
13126 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13127 ret = check_block_group_item(fs_info, eb, slot);
13129 ret & REFERENCER_MISSING)
13130 ret = delete_extent_tree_item(trans, root, path);
13133 case BTRFS_DEV_ITEM_KEY:
13134 ret = check_dev_item(fs_info, eb, slot);
13137 case BTRFS_CHUNK_ITEM_KEY:
13138 ret = check_chunk_item(fs_info, eb, slot);
13140 ret = repair_chunk_item(trans, root, path, ret);
13143 case BTRFS_DEV_EXTENT_KEY:
13144 ret = check_dev_extent_item(fs_info, eb, slot);
13147 case BTRFS_EXTENT_ITEM_KEY:
13148 case BTRFS_METADATA_ITEM_KEY:
13149 ret = check_extent_item(trans, fs_info, path);
13152 case BTRFS_EXTENT_CSUM_KEY:
13153 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13156 case BTRFS_TREE_BLOCK_REF_KEY:
13157 ret = check_tree_block_backref(fs_info, key.offset,
13160 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13161 ret = delete_extent_tree_item(trans, root, path);
13164 case BTRFS_EXTENT_DATA_REF_KEY:
13165 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13166 ret = check_extent_data_backref(fs_info,
13167 btrfs_extent_data_ref_root(eb, dref),
13168 btrfs_extent_data_ref_objectid(eb, dref),
13169 btrfs_extent_data_ref_offset(eb, dref),
13171 btrfs_extent_data_ref_count(eb, dref));
13173 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13174 ret = delete_extent_tree_item(trans, root, path);
13177 case BTRFS_SHARED_BLOCK_REF_KEY:
13178 ret = check_shared_block_backref(fs_info, key.offset,
13181 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13182 ret = delete_extent_tree_item(trans, root, path);
13185 case BTRFS_SHARED_DATA_REF_KEY:
13186 ret = check_shared_data_backref(fs_info, key.offset,
13189 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13190 ret = delete_extent_tree_item(trans, root, path);
13203 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13206 * Low memory usage version check_chunks_and_extents.
13208 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13210 struct btrfs_trans_handle *trans = NULL;
13211 struct btrfs_path path;
13212 struct btrfs_key old_key;
13213 struct btrfs_key key;
13214 struct btrfs_root *root1;
13215 struct btrfs_root *root;
13216 struct btrfs_root *cur_root;
13220 root = fs_info->fs_root;
13223 /* pin every tree block to avoid extent overwrite */
13224 ret = pin_metadata_blocks(fs_info);
13226 error("failed to pin metadata blocks");
13229 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13230 if (IS_ERR(trans)) {
13231 error("failed to start transaction before check");
13232 return PTR_ERR(trans);
13236 root1 = root->fs_info->chunk_root;
13237 ret = check_btrfs_root(trans, root1, 0, 1);
13240 root1 = root->fs_info->tree_root;
13241 ret = check_btrfs_root(trans, root1, 0, 1);
13244 btrfs_init_path(&path);
13245 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13247 key.type = BTRFS_ROOT_ITEM_KEY;
13249 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13251 error("cannot find extent tree in tree_root");
13256 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13257 if (key.type != BTRFS_ROOT_ITEM_KEY)
13260 key.offset = (u64)-1;
13262 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13263 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13266 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13267 if (IS_ERR(cur_root) || !cur_root) {
13268 error("failed to read tree: %lld", key.objectid);
13272 ret = check_btrfs_root(trans, cur_root, 0, 1);
13275 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13276 btrfs_free_fs_root(cur_root);
13278 btrfs_release_path(&path);
13279 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13280 &old_key, &path, 0, 0);
13284 ret = btrfs_next_item(root1, &path);
13290 /* if repair, update block accounting */
13292 ret = btrfs_fix_block_accounting(trans, root);
13296 err &= ~BG_ACCOUNTING_ERROR;
13300 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13302 btrfs_release_path(&path);
13307 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13311 if (!ctx.progress_enabled)
13312 fprintf(stderr, "checking extents\n");
13313 if (check_mode == CHECK_MODE_LOWMEM)
13314 ret = check_chunks_and_extents_v2(fs_info);
13316 ret = check_chunks_and_extents(fs_info);
13321 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13322 struct btrfs_root *root, int overwrite)
13324 struct extent_buffer *c;
13325 struct extent_buffer *old = root->node;
13328 struct btrfs_disk_key disk_key = {0,0,0};
13334 extent_buffer_get(c);
13337 c = btrfs_alloc_free_block(trans, root,
13338 root->fs_info->nodesize,
13339 root->root_key.objectid,
13340 &disk_key, level, 0, 0);
13343 extent_buffer_get(c);
13347 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13348 btrfs_set_header_level(c, level);
13349 btrfs_set_header_bytenr(c, c->start);
13350 btrfs_set_header_generation(c, trans->transid);
13351 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13352 btrfs_set_header_owner(c, root->root_key.objectid);
13354 write_extent_buffer(c, root->fs_info->fsid,
13355 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13357 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13358 btrfs_header_chunk_tree_uuid(c),
13361 btrfs_mark_buffer_dirty(c);
13363 * this case can happen in the following case:
13365 * 1.overwrite previous root.
13367 * 2.reinit reloc data root, this is because we skip pin
13368 * down reloc data tree before which means we can allocate
13369 * same block bytenr here.
13371 if (old->start == c->start) {
13372 btrfs_set_root_generation(&root->root_item,
13374 root->root_item.level = btrfs_header_level(root->node);
13375 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13376 &root->root_key, &root->root_item);
13378 free_extent_buffer(c);
13382 free_extent_buffer(old);
13384 add_root_to_dirty_list(root);
13388 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13389 struct extent_buffer *eb, int tree_root)
13391 struct extent_buffer *tmp;
13392 struct btrfs_root_item *ri;
13393 struct btrfs_key key;
13395 int level = btrfs_header_level(eb);
13401 * If we have pinned this block before, don't pin it again.
13402 * This can not only avoid forever loop with broken filesystem
13403 * but also give us some speedups.
13405 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13406 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13409 btrfs_pin_extent(fs_info, eb->start, eb->len);
13411 nritems = btrfs_header_nritems(eb);
13412 for (i = 0; i < nritems; i++) {
13414 btrfs_item_key_to_cpu(eb, &key, i);
13415 if (key.type != BTRFS_ROOT_ITEM_KEY)
13417 /* Skip the extent root and reloc roots */
13418 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13419 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13420 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13422 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13423 bytenr = btrfs_disk_root_bytenr(eb, ri);
13426 * If at any point we start needing the real root we
13427 * will have to build a stump root for the root we are
13428 * in, but for now this doesn't actually use the root so
13429 * just pass in extent_root.
13431 tmp = read_tree_block(fs_info, bytenr, 0);
13432 if (!extent_buffer_uptodate(tmp)) {
13433 fprintf(stderr, "Error reading root block\n");
13436 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13437 free_extent_buffer(tmp);
13441 bytenr = btrfs_node_blockptr(eb, i);
13443 /* If we aren't the tree root don't read the block */
13444 if (level == 1 && !tree_root) {
13445 btrfs_pin_extent(fs_info, bytenr,
13446 fs_info->nodesize);
13450 tmp = read_tree_block(fs_info, bytenr, 0);
13451 if (!extent_buffer_uptodate(tmp)) {
13452 fprintf(stderr, "Error reading tree block\n");
13455 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13456 free_extent_buffer(tmp);
13465 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13469 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13473 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13476 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13478 struct btrfs_block_group_cache *cache;
13479 struct btrfs_path path;
13480 struct extent_buffer *leaf;
13481 struct btrfs_chunk *chunk;
13482 struct btrfs_key key;
13486 btrfs_init_path(&path);
13488 key.type = BTRFS_CHUNK_ITEM_KEY;
13490 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13492 btrfs_release_path(&path);
13497 * We do this in case the block groups were screwed up and had alloc
13498 * bits that aren't actually set on the chunks. This happens with
13499 * restored images every time and could happen in real life I guess.
13501 fs_info->avail_data_alloc_bits = 0;
13502 fs_info->avail_metadata_alloc_bits = 0;
13503 fs_info->avail_system_alloc_bits = 0;
13505 /* First we need to create the in-memory block groups */
13507 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13508 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13510 btrfs_release_path(&path);
13518 leaf = path.nodes[0];
13519 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13520 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13525 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13526 btrfs_add_block_group(fs_info, 0,
13527 btrfs_chunk_type(leaf, chunk),
13528 key.objectid, key.offset,
13529 btrfs_chunk_length(leaf, chunk));
13530 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13531 key.offset + btrfs_chunk_length(leaf, chunk));
13536 cache = btrfs_lookup_first_block_group(fs_info, start);
13540 start = cache->key.objectid + cache->key.offset;
13543 btrfs_release_path(&path);
13547 static int reset_balance(struct btrfs_trans_handle *trans,
13548 struct btrfs_fs_info *fs_info)
13550 struct btrfs_root *root = fs_info->tree_root;
13551 struct btrfs_path path;
13552 struct extent_buffer *leaf;
13553 struct btrfs_key key;
13554 int del_slot, del_nr = 0;
13558 btrfs_init_path(&path);
13559 key.objectid = BTRFS_BALANCE_OBJECTID;
13560 key.type = BTRFS_BALANCE_ITEM_KEY;
13562 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13567 goto reinit_data_reloc;
13572 ret = btrfs_del_item(trans, root, &path);
13575 btrfs_release_path(&path);
13577 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13578 key.type = BTRFS_ROOT_ITEM_KEY;
13580 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13584 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13589 ret = btrfs_del_items(trans, root, &path,
13596 btrfs_release_path(&path);
13599 ret = btrfs_search_slot(trans, root, &key, &path,
13606 leaf = path.nodes[0];
13607 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13608 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13610 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13615 del_slot = path.slots[0];
13624 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13628 btrfs_release_path(&path);
13631 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13632 key.type = BTRFS_ROOT_ITEM_KEY;
13633 key.offset = (u64)-1;
13634 root = btrfs_read_fs_root(fs_info, &key);
13635 if (IS_ERR(root)) {
13636 fprintf(stderr, "Error reading data reloc tree\n");
13637 ret = PTR_ERR(root);
13640 record_root_in_trans(trans, root);
13641 ret = btrfs_fsck_reinit_root(trans, root, 0);
13644 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13646 btrfs_release_path(&path);
13650 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13651 struct btrfs_fs_info *fs_info)
13657 * The only reason we don't do this is because right now we're just
13658 * walking the trees we find and pinning down their bytes, we don't look
13659 * at any of the leaves. In order to do mixed groups we'd have to check
13660 * the leaves of any fs roots and pin down the bytes for any file
13661 * extents we find. Not hard but why do it if we don't have to?
13663 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13664 fprintf(stderr, "We don't support re-initing the extent tree "
13665 "for mixed block groups yet, please notify a btrfs "
13666 "developer you want to do this so they can add this "
13667 "functionality.\n");
13672 * first we need to walk all of the trees except the extent tree and pin
13673 * down the bytes that are in use so we don't overwrite any existing
13676 ret = pin_metadata_blocks(fs_info);
13678 fprintf(stderr, "error pinning down used bytes\n");
13683 * Need to drop all the block groups since we're going to recreate all
13686 btrfs_free_block_groups(fs_info);
13687 ret = reset_block_groups(fs_info);
13689 fprintf(stderr, "error resetting the block groups\n");
13693 /* Ok we can allocate now, reinit the extent root */
13694 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13696 fprintf(stderr, "extent root initialization failed\n");
13698 * When the transaction code is updated we should end the
13699 * transaction, but for now progs only knows about commit so
13700 * just return an error.
13706 * Now we have all the in-memory block groups setup so we can make
13707 * allocations properly, and the metadata we care about is safe since we
13708 * pinned all of it above.
13711 struct btrfs_block_group_cache *cache;
13713 cache = btrfs_lookup_first_block_group(fs_info, start);
13716 start = cache->key.objectid + cache->key.offset;
13717 ret = btrfs_insert_item(trans, fs_info->extent_root,
13718 &cache->key, &cache->item,
13719 sizeof(cache->item));
13721 fprintf(stderr, "Error adding block group\n");
13724 btrfs_extent_post_op(trans, fs_info->extent_root);
13727 ret = reset_balance(trans, fs_info);
13729 fprintf(stderr, "error resetting the pending balance\n");
13734 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13736 struct btrfs_path path;
13737 struct btrfs_trans_handle *trans;
13738 struct btrfs_key key;
13741 printf("Recowing metadata block %llu\n", eb->start);
13742 key.objectid = btrfs_header_owner(eb);
13743 key.type = BTRFS_ROOT_ITEM_KEY;
13744 key.offset = (u64)-1;
13746 root = btrfs_read_fs_root(root->fs_info, &key);
13747 if (IS_ERR(root)) {
13748 fprintf(stderr, "Couldn't find owner root %llu\n",
13750 return PTR_ERR(root);
13753 trans = btrfs_start_transaction(root, 1);
13755 return PTR_ERR(trans);
13757 btrfs_init_path(&path);
13758 path.lowest_level = btrfs_header_level(eb);
13759 if (path.lowest_level)
13760 btrfs_node_key_to_cpu(eb, &key, 0);
13762 btrfs_item_key_to_cpu(eb, &key, 0);
13764 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13765 btrfs_commit_transaction(trans, root);
13766 btrfs_release_path(&path);
13770 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13772 struct btrfs_path path;
13773 struct btrfs_trans_handle *trans;
13774 struct btrfs_key key;
13777 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13778 bad->key.type, bad->key.offset);
13779 key.objectid = bad->root_id;
13780 key.type = BTRFS_ROOT_ITEM_KEY;
13781 key.offset = (u64)-1;
13783 root = btrfs_read_fs_root(root->fs_info, &key);
13784 if (IS_ERR(root)) {
13785 fprintf(stderr, "Couldn't find owner root %llu\n",
13787 return PTR_ERR(root);
13790 trans = btrfs_start_transaction(root, 1);
13792 return PTR_ERR(trans);
13794 btrfs_init_path(&path);
13795 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13801 ret = btrfs_del_item(trans, root, &path);
13803 btrfs_commit_transaction(trans, root);
13804 btrfs_release_path(&path);
13808 static int zero_log_tree(struct btrfs_root *root)
13810 struct btrfs_trans_handle *trans;
13813 trans = btrfs_start_transaction(root, 1);
13814 if (IS_ERR(trans)) {
13815 ret = PTR_ERR(trans);
13818 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13819 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13820 ret = btrfs_commit_transaction(trans, root);
13824 static int populate_csum(struct btrfs_trans_handle *trans,
13825 struct btrfs_root *csum_root, char *buf, u64 start,
13828 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13833 while (offset < len) {
13834 sectorsize = fs_info->sectorsize;
13835 ret = read_extent_data(fs_info, buf, start + offset,
13839 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13840 start + offset, buf, sectorsize);
13843 offset += sectorsize;
13848 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13849 struct btrfs_root *csum_root,
13850 struct btrfs_root *cur_root)
13852 struct btrfs_path path;
13853 struct btrfs_key key;
13854 struct extent_buffer *node;
13855 struct btrfs_file_extent_item *fi;
13862 buf = malloc(cur_root->fs_info->sectorsize);
13866 btrfs_init_path(&path);
13870 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13873 /* Iterate all regular file extents and fill its csum */
13875 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13877 if (key.type != BTRFS_EXTENT_DATA_KEY)
13879 node = path.nodes[0];
13880 slot = path.slots[0];
13881 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13882 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13884 start = btrfs_file_extent_disk_bytenr(node, fi);
13885 len = btrfs_file_extent_disk_num_bytes(node, fi);
13887 ret = populate_csum(trans, csum_root, buf, start, len);
13888 if (ret == -EEXIST)
13894 * TODO: if next leaf is corrupted, jump to nearest next valid
13897 ret = btrfs_next_item(cur_root, &path);
13907 btrfs_release_path(&path);
13912 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13913 struct btrfs_root *csum_root)
13915 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13916 struct btrfs_path path;
13917 struct btrfs_root *tree_root = fs_info->tree_root;
13918 struct btrfs_root *cur_root;
13919 struct extent_buffer *node;
13920 struct btrfs_key key;
13924 btrfs_init_path(&path);
13925 key.objectid = BTRFS_FS_TREE_OBJECTID;
13927 key.type = BTRFS_ROOT_ITEM_KEY;
13928 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13937 node = path.nodes[0];
13938 slot = path.slots[0];
13939 btrfs_item_key_to_cpu(node, &key, slot);
13940 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13942 if (key.type != BTRFS_ROOT_ITEM_KEY)
13944 if (!is_fstree(key.objectid))
13946 key.offset = (u64)-1;
13948 cur_root = btrfs_read_fs_root(fs_info, &key);
13949 if (IS_ERR(cur_root) || !cur_root) {
13950 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13954 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13959 ret = btrfs_next_item(tree_root, &path);
13969 btrfs_release_path(&path);
13973 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13974 struct btrfs_root *csum_root)
13976 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13977 struct btrfs_path path;
13978 struct btrfs_extent_item *ei;
13979 struct extent_buffer *leaf;
13981 struct btrfs_key key;
13984 btrfs_init_path(&path);
13986 key.type = BTRFS_EXTENT_ITEM_KEY;
13988 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13990 btrfs_release_path(&path);
13994 buf = malloc(csum_root->fs_info->sectorsize);
13996 btrfs_release_path(&path);
14001 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14002 ret = btrfs_next_leaf(extent_root, &path);
14010 leaf = path.nodes[0];
14012 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14013 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14018 ei = btrfs_item_ptr(leaf, path.slots[0],
14019 struct btrfs_extent_item);
14020 if (!(btrfs_extent_flags(leaf, ei) &
14021 BTRFS_EXTENT_FLAG_DATA)) {
14026 ret = populate_csum(trans, csum_root, buf, key.objectid,
14033 btrfs_release_path(&path);
14039 * Recalculate the csum and put it into the csum tree.
14041 * Extent tree init will wipe out all the extent info, so in that case, we
14042 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14043 * will use fs/subvol trees to init the csum tree.
14045 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14046 struct btrfs_root *csum_root,
14047 int search_fs_tree)
14049 if (search_fs_tree)
14050 return fill_csum_tree_from_fs(trans, csum_root);
14052 return fill_csum_tree_from_extent(trans, csum_root);
14055 static void free_roots_info_cache(void)
14057 if (!roots_info_cache)
14060 while (!cache_tree_empty(roots_info_cache)) {
14061 struct cache_extent *entry;
14062 struct root_item_info *rii;
14064 entry = first_cache_extent(roots_info_cache);
14067 remove_cache_extent(roots_info_cache, entry);
14068 rii = container_of(entry, struct root_item_info, cache_extent);
14072 free(roots_info_cache);
14073 roots_info_cache = NULL;
14076 static int build_roots_info_cache(struct btrfs_fs_info *info)
14079 struct btrfs_key key;
14080 struct extent_buffer *leaf;
14081 struct btrfs_path path;
14083 if (!roots_info_cache) {
14084 roots_info_cache = malloc(sizeof(*roots_info_cache));
14085 if (!roots_info_cache)
14087 cache_tree_init(roots_info_cache);
14090 btrfs_init_path(&path);
14092 key.type = BTRFS_EXTENT_ITEM_KEY;
14094 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14097 leaf = path.nodes[0];
14100 struct btrfs_key found_key;
14101 struct btrfs_extent_item *ei;
14102 struct btrfs_extent_inline_ref *iref;
14103 int slot = path.slots[0];
14108 struct cache_extent *entry;
14109 struct root_item_info *rii;
14111 if (slot >= btrfs_header_nritems(leaf)) {
14112 ret = btrfs_next_leaf(info->extent_root, &path);
14119 leaf = path.nodes[0];
14120 slot = path.slots[0];
14123 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14125 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14126 found_key.type != BTRFS_METADATA_ITEM_KEY)
14129 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14130 flags = btrfs_extent_flags(leaf, ei);
14132 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14133 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14136 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14137 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14138 level = found_key.offset;
14140 struct btrfs_tree_block_info *binfo;
14142 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14143 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14144 level = btrfs_tree_block_level(leaf, binfo);
14148 * For a root extent, it must be of the following type and the
14149 * first (and only one) iref in the item.
14151 type = btrfs_extent_inline_ref_type(leaf, iref);
14152 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14155 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14156 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14158 rii = malloc(sizeof(struct root_item_info));
14163 rii->cache_extent.start = root_id;
14164 rii->cache_extent.size = 1;
14165 rii->level = (u8)-1;
14166 entry = &rii->cache_extent;
14167 ret = insert_cache_extent(roots_info_cache, entry);
14170 rii = container_of(entry, struct root_item_info,
14174 ASSERT(rii->cache_extent.start == root_id);
14175 ASSERT(rii->cache_extent.size == 1);
14177 if (level > rii->level || rii->level == (u8)-1) {
14178 rii->level = level;
14179 rii->bytenr = found_key.objectid;
14180 rii->gen = btrfs_extent_generation(leaf, ei);
14181 rii->node_count = 1;
14182 } else if (level == rii->level) {
14190 btrfs_release_path(&path);
14195 static int maybe_repair_root_item(struct btrfs_path *path,
14196 const struct btrfs_key *root_key,
14197 const int read_only_mode)
14199 const u64 root_id = root_key->objectid;
14200 struct cache_extent *entry;
14201 struct root_item_info *rii;
14202 struct btrfs_root_item ri;
14203 unsigned long offset;
14205 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14208 "Error: could not find extent items for root %llu\n",
14209 root_key->objectid);
14213 rii = container_of(entry, struct root_item_info, cache_extent);
14214 ASSERT(rii->cache_extent.start == root_id);
14215 ASSERT(rii->cache_extent.size == 1);
14217 if (rii->node_count != 1) {
14219 "Error: could not find btree root extent for root %llu\n",
14224 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14225 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14227 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14228 btrfs_root_level(&ri) != rii->level ||
14229 btrfs_root_generation(&ri) != rii->gen) {
14232 * If we're in repair mode but our caller told us to not update
14233 * the root item, i.e. just check if it needs to be updated, don't
14234 * print this message, since the caller will call us again shortly
14235 * for the same root item without read only mode (the caller will
14236 * open a transaction first).
14238 if (!(read_only_mode && repair))
14240 "%sroot item for root %llu,"
14241 " current bytenr %llu, current gen %llu, current level %u,"
14242 " new bytenr %llu, new gen %llu, new level %u\n",
14243 (read_only_mode ? "" : "fixing "),
14245 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14246 btrfs_root_level(&ri),
14247 rii->bytenr, rii->gen, rii->level);
14249 if (btrfs_root_generation(&ri) > rii->gen) {
14251 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14252 root_id, btrfs_root_generation(&ri), rii->gen);
14256 if (!read_only_mode) {
14257 btrfs_set_root_bytenr(&ri, rii->bytenr);
14258 btrfs_set_root_level(&ri, rii->level);
14259 btrfs_set_root_generation(&ri, rii->gen);
14260 write_extent_buffer(path->nodes[0], &ri,
14261 offset, sizeof(ri));
14271 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14272 * caused read-only snapshots to be corrupted if they were created at a moment
14273 * when the source subvolume/snapshot had orphan items. The issue was that the
14274 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14275 * node instead of the post orphan cleanup root node.
14276 * So this function, and its callees, just detects and fixes those cases. Even
14277 * though the regression was for read-only snapshots, this function applies to
14278 * any snapshot/subvolume root.
14279 * This must be run before any other repair code - not doing it so, makes other
14280 * repair code delete or modify backrefs in the extent tree for example, which
14281 * will result in an inconsistent fs after repairing the root items.
14283 static int repair_root_items(struct btrfs_fs_info *info)
14285 struct btrfs_path path;
14286 struct btrfs_key key;
14287 struct extent_buffer *leaf;
14288 struct btrfs_trans_handle *trans = NULL;
14291 int need_trans = 0;
14293 btrfs_init_path(&path);
14295 ret = build_roots_info_cache(info);
14299 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14300 key.type = BTRFS_ROOT_ITEM_KEY;
14305 * Avoid opening and committing transactions if a leaf doesn't have
14306 * any root items that need to be fixed, so that we avoid rotating
14307 * backup roots unnecessarily.
14310 trans = btrfs_start_transaction(info->tree_root, 1);
14311 if (IS_ERR(trans)) {
14312 ret = PTR_ERR(trans);
14317 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14321 leaf = path.nodes[0];
14324 struct btrfs_key found_key;
14326 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14327 int no_more_keys = find_next_key(&path, &key);
14329 btrfs_release_path(&path);
14331 ret = btrfs_commit_transaction(trans,
14343 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14345 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14347 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14350 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14354 if (!trans && repair) {
14357 btrfs_release_path(&path);
14367 free_roots_info_cache();
14368 btrfs_release_path(&path);
14370 btrfs_commit_transaction(trans, info->tree_root);
14377 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14379 struct btrfs_trans_handle *trans;
14380 struct btrfs_block_group_cache *bg_cache;
14384 /* Clear all free space cache inodes and its extent data */
14386 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14389 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14392 current = bg_cache->key.objectid + bg_cache->key.offset;
14395 /* Don't forget to set cache_generation to -1 */
14396 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14397 if (IS_ERR(trans)) {
14398 error("failed to update super block cache generation");
14399 return PTR_ERR(trans);
14401 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14402 btrfs_commit_transaction(trans, fs_info->tree_root);
14407 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14412 if (clear_version == 1) {
14413 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14415 "free space cache v2 detected, use --clear-space-cache v2");
14419 printf("Clearing free space cache\n");
14420 ret = clear_free_space_cache(fs_info);
14422 error("failed to clear free space cache");
14425 printf("Free space cache cleared\n");
14427 } else if (clear_version == 2) {
14428 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14429 printf("no free space cache v2 to clear\n");
14433 printf("Clear free space cache v2\n");
14434 ret = btrfs_clear_free_space_tree(fs_info);
14436 error("failed to clear free space cache v2: %d", ret);
14439 printf("free space cache v2 cleared\n");
14446 const char * const cmd_check_usage[] = {
14447 "btrfs check [options] <device>",
14448 "Check structural integrity of a filesystem (unmounted).",
14449 "Check structural integrity of an unmounted filesystem. Verify internal",
14450 "trees' consistency and item connectivity. In the repair mode try to",
14451 "fix the problems found. ",
14452 "WARNING: the repair mode is considered dangerous",
14454 "-s|--super <superblock> use this superblock copy",
14455 "-b|--backup use the first valid backup root copy",
14456 "--force skip mount checks, repair is not possible",
14457 "--repair try to repair the filesystem",
14458 "--readonly run in read-only mode (default)",
14459 "--init-csum-tree create a new CRC tree",
14460 "--init-extent-tree create a new extent tree",
14461 "--mode <MODE> allows choice of memory/IO trade-offs",
14462 " where MODE is one of:",
14463 " original - read inodes and extents to memory (requires",
14464 " more memory, does less IO)",
14465 " lowmem - try to use less memory but read blocks again",
14467 "--check-data-csum verify checksums of data blocks",
14468 "-Q|--qgroup-report print a report on qgroup consistency",
14469 "-E|--subvol-extents <subvolid>",
14470 " print subvolume extents and sharing state",
14471 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14472 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14473 "-p|--progress indicate progress",
14474 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14478 int cmd_check(int argc, char **argv)
14480 struct cache_tree root_cache;
14481 struct btrfs_root *root;
14482 struct btrfs_fs_info *info;
14485 u64 tree_root_bytenr = 0;
14486 u64 chunk_root_bytenr = 0;
14487 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14491 int init_csum_tree = 0;
14493 int clear_space_cache = 0;
14494 int qgroup_report = 0;
14495 int qgroups_repaired = 0;
14496 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14501 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14502 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14503 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14504 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14505 GETOPT_VAL_FORCE };
14506 static const struct option long_options[] = {
14507 { "super", required_argument, NULL, 's' },
14508 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14509 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14510 { "init-csum-tree", no_argument, NULL,
14511 GETOPT_VAL_INIT_CSUM },
14512 { "init-extent-tree", no_argument, NULL,
14513 GETOPT_VAL_INIT_EXTENT },
14514 { "check-data-csum", no_argument, NULL,
14515 GETOPT_VAL_CHECK_CSUM },
14516 { "backup", no_argument, NULL, 'b' },
14517 { "subvol-extents", required_argument, NULL, 'E' },
14518 { "qgroup-report", no_argument, NULL, 'Q' },
14519 { "tree-root", required_argument, NULL, 'r' },
14520 { "chunk-root", required_argument, NULL,
14521 GETOPT_VAL_CHUNK_TREE },
14522 { "progress", no_argument, NULL, 'p' },
14523 { "mode", required_argument, NULL,
14525 { "clear-space-cache", required_argument, NULL,
14526 GETOPT_VAL_CLEAR_SPACE_CACHE},
14527 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14528 { NULL, 0, NULL, 0}
14531 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14535 case 'a': /* ignored */ break;
14537 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14540 num = arg_strtou64(optarg);
14541 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14543 "super mirror should be less than %d",
14544 BTRFS_SUPER_MIRROR_MAX);
14547 bytenr = btrfs_sb_offset(((int)num));
14548 printf("using SB copy %llu, bytenr %llu\n", num,
14549 (unsigned long long)bytenr);
14555 subvolid = arg_strtou64(optarg);
14558 tree_root_bytenr = arg_strtou64(optarg);
14560 case GETOPT_VAL_CHUNK_TREE:
14561 chunk_root_bytenr = arg_strtou64(optarg);
14564 ctx.progress_enabled = true;
14568 usage(cmd_check_usage);
14569 case GETOPT_VAL_REPAIR:
14570 printf("enabling repair mode\n");
14572 ctree_flags |= OPEN_CTREE_WRITES;
14574 case GETOPT_VAL_READONLY:
14577 case GETOPT_VAL_INIT_CSUM:
14578 printf("Creating a new CRC tree\n");
14579 init_csum_tree = 1;
14581 ctree_flags |= OPEN_CTREE_WRITES;
14583 case GETOPT_VAL_INIT_EXTENT:
14584 init_extent_tree = 1;
14585 ctree_flags |= (OPEN_CTREE_WRITES |
14586 OPEN_CTREE_NO_BLOCK_GROUPS);
14589 case GETOPT_VAL_CHECK_CSUM:
14590 check_data_csum = 1;
14592 case GETOPT_VAL_MODE:
14593 check_mode = parse_check_mode(optarg);
14594 if (check_mode == CHECK_MODE_UNKNOWN) {
14595 error("unknown mode: %s", optarg);
14599 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14600 if (strcmp(optarg, "v1") == 0) {
14601 clear_space_cache = 1;
14602 } else if (strcmp(optarg, "v2") == 0) {
14603 clear_space_cache = 2;
14604 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14607 "invalid argument to --clear-space-cache, must be v1 or v2");
14610 ctree_flags |= OPEN_CTREE_WRITES;
14612 case GETOPT_VAL_FORCE:
14618 if (check_argc_exact(argc - optind, 1))
14619 usage(cmd_check_usage);
14621 if (ctx.progress_enabled) {
14622 ctx.tp = TASK_NOTHING;
14623 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14626 /* This check is the only reason for --readonly to exist */
14627 if (readonly && repair) {
14628 error("repair options are not compatible with --readonly");
14633 * experimental and dangerous
14635 if (repair && check_mode == CHECK_MODE_LOWMEM)
14636 warning("low-memory mode repair support is only partial");
14639 cache_tree_init(&root_cache);
14641 ret = check_mounted(argv[optind]);
14644 error("could not check mount status: %s",
14650 "%s is currently mounted, use --force if you really intend to check the filesystem",
14658 error("repair and --force is not yet supported");
14665 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14669 "filesystem mounted, continuing because of --force");
14671 /* A block device is mounted in exclusive mode by kernel */
14672 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14675 /* only allow partial opening under repair mode */
14677 ctree_flags |= OPEN_CTREE_PARTIAL;
14679 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14680 chunk_root_bytenr, ctree_flags);
14682 error("cannot open file system");
14688 global_info = info;
14689 root = info->fs_root;
14690 uuid_unparse(info->super_copy->fsid, uuidbuf);
14692 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14695 * Check the bare minimum before starting anything else that could rely
14696 * on it, namely the tree roots, any local consistency checks
14698 if (!extent_buffer_uptodate(info->tree_root->node) ||
14699 !extent_buffer_uptodate(info->dev_root->node) ||
14700 !extent_buffer_uptodate(info->chunk_root->node)) {
14701 error("critical roots corrupted, unable to check the filesystem");
14707 if (clear_space_cache) {
14708 ret = do_clear_free_space_cache(info, clear_space_cache);
14714 * repair mode will force us to commit transaction which
14715 * will make us fail to load log tree when mounting.
14717 if (repair && btrfs_super_log_root(info->super_copy)) {
14718 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14724 ret = zero_log_tree(root);
14727 error("failed to zero log tree: %d", ret);
14732 if (qgroup_report) {
14733 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14735 ret = qgroup_verify_all(info);
14742 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14743 subvolid, argv[optind], uuidbuf);
14744 ret = print_extent_state(info, subvolid);
14749 if (init_extent_tree || init_csum_tree) {
14750 struct btrfs_trans_handle *trans;
14752 trans = btrfs_start_transaction(info->extent_root, 0);
14753 if (IS_ERR(trans)) {
14754 error("error starting transaction");
14755 ret = PTR_ERR(trans);
14760 if (init_extent_tree) {
14761 printf("Creating a new extent tree\n");
14762 ret = reinit_extent_tree(trans, info);
14768 if (init_csum_tree) {
14769 printf("Reinitialize checksum tree\n");
14770 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14772 error("checksum tree initialization failed: %d",
14779 ret = fill_csum_tree(trans, info->csum_root,
14783 error("checksum tree refilling failed: %d", ret);
14788 * Ok now we commit and run the normal fsck, which will add
14789 * extent entries for all of the items it finds.
14791 ret = btrfs_commit_transaction(trans, info->extent_root);
14796 if (!extent_buffer_uptodate(info->extent_root->node)) {
14797 error("critical: extent_root, unable to check the filesystem");
14802 if (!extent_buffer_uptodate(info->csum_root->node)) {
14803 error("critical: csum_root, unable to check the filesystem");
14809 ret = do_check_chunks_and_extents(info);
14813 "errors found in extent allocation tree or chunk allocation");
14815 ret = repair_root_items(info);
14818 error("failed to repair root items: %s", strerror(-ret));
14822 fprintf(stderr, "Fixed %d roots.\n", ret);
14824 } else if (ret > 0) {
14826 "Found %d roots with an outdated root item.\n",
14829 "Please run a filesystem check with the option --repair to fix them.\n");
14835 if (!ctx.progress_enabled) {
14836 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14837 fprintf(stderr, "checking free space tree\n");
14839 fprintf(stderr, "checking free space cache\n");
14841 ret = check_space_cache(root);
14844 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14845 error("errors found in free space tree");
14847 error("errors found in free space cache");
14852 * We used to have to have these hole extents in between our real
14853 * extents so if we don't have this flag set we need to make sure there
14854 * are no gaps in the file extents for inodes, otherwise we can just
14855 * ignore it when this happens.
14857 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14858 ret = do_check_fs_roots(info, &root_cache);
14861 error("errors found in fs roots");
14865 fprintf(stderr, "checking csums\n");
14866 ret = check_csums(root);
14869 error("errors found in csum tree");
14873 fprintf(stderr, "checking root refs\n");
14874 /* For low memory mode, check_fs_roots_v2 handles root refs */
14875 if (check_mode != CHECK_MODE_LOWMEM) {
14876 ret = check_root_refs(root, &root_cache);
14879 error("errors found in root refs");
14884 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14885 struct extent_buffer *eb;
14887 eb = list_first_entry(&root->fs_info->recow_ebs,
14888 struct extent_buffer, recow);
14889 list_del_init(&eb->recow);
14890 ret = recow_extent_buffer(root, eb);
14893 error("fails to fix transid errors");
14898 while (!list_empty(&delete_items)) {
14899 struct bad_item *bad;
14901 bad = list_first_entry(&delete_items, struct bad_item, list);
14902 list_del_init(&bad->list);
14904 ret = delete_bad_item(root, bad);
14910 if (info->quota_enabled) {
14911 fprintf(stderr, "checking quota groups\n");
14912 ret = qgroup_verify_all(info);
14915 error("failed to check quota groups");
14919 ret = repair_qgroups(info, &qgroups_repaired);
14922 error("failed to repair quota groups");
14928 if (!list_empty(&root->fs_info->recow_ebs)) {
14929 error("transid errors in file system");
14934 printf("found %llu bytes used, ",
14935 (unsigned long long)bytes_used);
14937 printf("error(s) found\n");
14939 printf("no error found\n");
14940 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14941 printf("total tree bytes: %llu\n",
14942 (unsigned long long)total_btree_bytes);
14943 printf("total fs tree bytes: %llu\n",
14944 (unsigned long long)total_fs_tree_bytes);
14945 printf("total extent tree bytes: %llu\n",
14946 (unsigned long long)total_extent_tree_bytes);
14947 printf("btree space waste bytes: %llu\n",
14948 (unsigned long long)btree_space_waste);
14949 printf("file data blocks allocated: %llu\n referenced %llu\n",
14950 (unsigned long long)data_bytes_allocated,
14951 (unsigned long long)data_bytes_referenced);
14953 free_qgroup_counts();
14954 free_root_recs_tree(&root_cache);
14958 if (ctx.progress_enabled)
14959 task_deinit(ctx.info);