2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
98 return rb_entry(node, struct extent_backref, node);
101 struct data_backref {
102 struct extent_backref node;
116 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM (1<<14) /* no inode_item */
130 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR (1<<21) /* Block group accounting error */
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
140 return container_of(back, struct data_backref, node);
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
145 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147 struct data_backref *back1 = to_data_backref(ext1);
148 struct data_backref *back2 = to_data_backref(ext2);
150 WARN_ON(!ext1->is_data);
151 WARN_ON(!ext2->is_data);
153 /* parent and root are a union, so this covers both */
154 if (back1->parent > back2->parent)
156 if (back1->parent < back2->parent)
159 /* This is a full backref and the parents match. */
160 if (back1->node.full_backref)
163 if (back1->owner > back2->owner)
165 if (back1->owner < back2->owner)
168 if (back1->offset > back2->offset)
170 if (back1->offset < back2->offset)
173 if (back1->found_ref && back2->found_ref) {
174 if (back1->disk_bytenr > back2->disk_bytenr)
176 if (back1->disk_bytenr < back2->disk_bytenr)
179 if (back1->bytes > back2->bytes)
181 if (back1->bytes < back2->bytes)
189 * Much like data_backref, just removed the undetermined members
190 * and change it to use list_head.
191 * During extent scan, it is stored in root->orphan_data_extent.
192 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
194 struct orphan_data_extent {
195 struct list_head list;
203 struct tree_backref {
204 struct extent_backref node;
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
213 return container_of(back, struct tree_backref, node);
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
218 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 struct tree_backref *back1 = to_tree_backref(ext1);
221 struct tree_backref *back2 = to_tree_backref(ext2);
223 WARN_ON(ext1->is_data);
224 WARN_ON(ext2->is_data);
226 /* parent and root are a union, so this covers both */
227 if (back1->parent > back2->parent)
229 if (back1->parent < back2->parent)
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
237 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
240 if (ext1->is_data > ext2->is_data)
243 if (ext1->is_data < ext2->is_data)
246 if (ext1->full_backref > ext2->full_backref)
248 if (ext1->full_backref < ext2->full_backref)
252 return compare_data_backref(node1, node2);
254 return compare_tree_backref(node1, node2);
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
260 struct extent_record {
261 struct list_head backrefs;
262 struct list_head dups;
263 struct rb_root backref_tree;
264 struct list_head list;
265 struct cache_extent cache;
266 struct btrfs_disk_key parent_key;
271 u64 extent_item_refs;
273 u64 parent_generation;
277 unsigned int flag_block_full_backref:2;
278 unsigned int found_rec:1;
279 unsigned int content_checked:1;
280 unsigned int owner_ref_checked:1;
281 unsigned int is_root:1;
282 unsigned int metadata:1;
283 unsigned int bad_full_backref:1;
284 unsigned int crossing_stripes:1;
285 unsigned int wrong_chunk_type:1;
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
290 return container_of(entry, struct extent_record, list);
293 struct inode_backref {
294 struct list_head list;
295 unsigned int found_dir_item:1;
296 unsigned int found_dir_index:1;
297 unsigned int found_inode_ref:1;
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
309 return list_entry(entry, struct inode_backref, list);
312 struct root_item_record {
313 struct list_head list;
319 struct btrfs_key drop_key;
322 #define REF_ERR_NO_DIR_ITEM (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX (1 << 1)
324 #define REF_ERR_NO_INODE_REF (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
327 #define REF_ERR_DUP_INODE_REF (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
336 struct file_extent_hole {
342 struct inode_record {
343 struct list_head backrefs;
344 unsigned int checked:1;
345 unsigned int merging:1;
346 unsigned int found_inode_item:1;
347 unsigned int found_dir_item:1;
348 unsigned int found_file_extent:1;
349 unsigned int found_csum_item:1;
350 unsigned int some_csum_missing:1;
351 unsigned int nodatasum:1;
364 struct rb_root holes;
365 struct list_head orphan_extents;
370 #define I_ERR_NO_INODE_ITEM (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
386 struct root_backref {
387 struct list_head list;
388 unsigned int found_dir_item:1;
389 unsigned int found_dir_index:1;
390 unsigned int found_back_ref:1;
391 unsigned int found_forward_ref:1;
392 unsigned int reachable:1;
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
403 return list_entry(entry, struct root_backref, list);
407 struct list_head backrefs;
408 struct cache_extent cache;
409 unsigned int found_root_item:1;
415 struct cache_extent cache;
420 struct cache_extent cache;
421 struct cache_tree root_cache;
422 struct cache_tree inode_cache;
423 struct inode_record *current;
432 struct walk_control {
433 struct cache_tree shared;
434 struct shared_node *nodes[BTRFS_MAX_LEVEL];
440 struct btrfs_key key;
442 struct list_head list;
445 struct extent_entry {
450 struct list_head list;
453 struct root_item_info {
454 /* level of the root */
456 /* number of nodes at this level, must be 1 for a root */
460 struct cache_extent cache_extent;
464 * Error bit for low memory mode check.
466 * Currently no caller cares about it yet. Just internal use for error
469 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH (1 << 8)
480 static void *print_status_check(void *p)
482 struct task_ctx *priv = p;
483 const char work_indicator[] = { '.', 'o', 'O', 'o' };
485 static char *task_position_string[] = {
487 "checking free space cache",
491 task_period_start(priv->info, 1000 /* 1s */);
493 if (priv->tp == TASK_NOTHING)
497 printf("%s [%c]\r", task_position_string[priv->tp],
498 work_indicator[count % 4]);
501 task_period_wait(priv->info);
506 static int print_status_return(void *p)
514 static enum btrfs_check_mode parse_check_mode(const char *str)
516 if (strcmp(str, "lowmem") == 0)
517 return CHECK_MODE_LOWMEM;
518 if (strcmp(str, "orig") == 0)
519 return CHECK_MODE_ORIGINAL;
520 if (strcmp(str, "original") == 0)
521 return CHECK_MODE_ORIGINAL;
523 return CHECK_MODE_UNKNOWN;
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
529 struct file_extent_hole *hole;
531 if (RB_EMPTY_ROOT(holes))
534 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
540 struct file_extent_hole *hole1;
541 struct file_extent_hole *hole2;
543 hole1 = rb_entry(node1, struct file_extent_hole, node);
544 hole2 = rb_entry(node2, struct file_extent_hole, node);
546 if (hole1->start > hole2->start)
548 if (hole1->start < hole2->start)
550 /* Now hole1->start == hole2->start */
551 if (hole1->len >= hole2->len)
553 * Hole 1 will be merge center
554 * Same hole will be merged later
557 /* Hole 2 will be merge center */
562 * Add a hole to the record
564 * This will do hole merge for copy_file_extent_holes(),
565 * which will ensure there won't be continuous holes.
567 static int add_file_extent_hole(struct rb_root *holes,
570 struct file_extent_hole *hole;
571 struct file_extent_hole *prev = NULL;
572 struct file_extent_hole *next = NULL;
574 hole = malloc(sizeof(*hole));
579 /* Since compare will not return 0, no -EEXIST will happen */
580 rb_insert(holes, &hole->node, compare_hole);
582 /* simple merge with previous hole */
583 if (rb_prev(&hole->node))
584 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
586 if (prev && prev->start + prev->len >= hole->start) {
587 hole->len = hole->start + hole->len - prev->start;
588 hole->start = prev->start;
589 rb_erase(&prev->node, holes);
594 /* iterate merge with next holes */
596 if (!rb_next(&hole->node))
598 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
600 if (hole->start + hole->len >= next->start) {
601 if (hole->start + hole->len <= next->start + next->len)
602 hole->len = next->start + next->len -
604 rb_erase(&next->node, holes);
613 static int compare_hole_range(struct rb_node *node, void *data)
615 struct file_extent_hole *hole;
618 hole = (struct file_extent_hole *)data;
621 hole = rb_entry(node, struct file_extent_hole, node);
622 if (start < hole->start)
624 if (start >= hole->start && start < hole->start + hole->len)
630 * Delete a hole in the record
632 * This will do the hole split and is much restrict than add.
634 static int del_file_extent_hole(struct rb_root *holes,
637 struct file_extent_hole *hole;
638 struct file_extent_hole tmp;
643 struct rb_node *node;
650 node = rb_search(holes, &tmp, compare_hole_range, NULL);
653 hole = rb_entry(node, struct file_extent_hole, node);
654 if (start + len > hole->start + hole->len)
658 * Now there will be no overlap, delete the hole and re-add the
659 * split(s) if they exists.
661 if (start > hole->start) {
662 prev_start = hole->start;
663 prev_len = start - hole->start;
666 if (hole->start + hole->len > start + len) {
667 next_start = start + len;
668 next_len = hole->start + hole->len - start - len;
671 rb_erase(node, holes);
674 ret = add_file_extent_hole(holes, prev_start, prev_len);
679 ret = add_file_extent_hole(holes, next_start, next_len);
686 static int copy_file_extent_holes(struct rb_root *dst,
689 struct file_extent_hole *hole;
690 struct rb_node *node;
693 node = rb_first(src);
695 hole = rb_entry(node, struct file_extent_hole, node);
696 ret = add_file_extent_hole(dst, hole->start, hole->len);
699 node = rb_next(node);
704 static void free_file_extent_holes(struct rb_root *holes)
706 struct rb_node *node;
707 struct file_extent_hole *hole;
709 node = rb_first(holes);
711 hole = rb_entry(node, struct file_extent_hole, node);
712 rb_erase(node, holes);
714 node = rb_first(holes);
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721 struct btrfs_root *root)
723 if (root->last_trans != trans->transid) {
724 root->track_dirty = 1;
725 root->last_trans = trans->transid;
726 root->commit_root = root->node;
727 extent_buffer_get(root->node);
731 static u8 imode_to_type(u32 imode)
734 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
736 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
737 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
738 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
739 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
740 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
741 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
744 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
750 struct device_record *rec1;
751 struct device_record *rec2;
753 rec1 = rb_entry(node1, struct device_record, node);
754 rec2 = rb_entry(node2, struct device_record, node);
755 if (rec1->devid > rec2->devid)
757 else if (rec1->devid < rec2->devid)
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
765 struct inode_record *rec;
766 struct inode_backref *backref;
767 struct inode_backref *orig;
768 struct inode_backref *tmp;
769 struct orphan_data_extent *src_orphan;
770 struct orphan_data_extent *dst_orphan;
775 rec = malloc(sizeof(*rec));
777 return ERR_PTR(-ENOMEM);
778 memcpy(rec, orig_rec, sizeof(*rec));
780 INIT_LIST_HEAD(&rec->backrefs);
781 INIT_LIST_HEAD(&rec->orphan_extents);
782 rec->holes = RB_ROOT;
784 list_for_each_entry(orig, &orig_rec->backrefs, list) {
785 size = sizeof(*orig) + orig->namelen + 1;
786 backref = malloc(size);
791 memcpy(backref, orig, size);
792 list_add_tail(&backref->list, &rec->backrefs);
794 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795 dst_orphan = malloc(sizeof(*dst_orphan));
800 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
803 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
810 rb = rb_first(&rec->holes);
812 struct file_extent_hole *hole;
814 hole = rb_entry(rb, struct file_extent_hole, node);
820 if (!list_empty(&rec->backrefs))
821 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822 list_del(&orig->list);
826 if (!list_empty(&rec->orphan_extents))
827 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828 list_del(&orig->list);
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
840 struct orphan_data_extent *orphan;
842 if (list_empty(orphan_extents))
844 printf("The following data extent is lost in tree %llu:\n",
846 list_for_each_entry(orphan, orphan_extents, list) {
847 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848 orphan->objectid, orphan->offset, orphan->disk_bytenr,
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
855 u64 root_objectid = root->root_key.objectid;
856 int errors = rec->errors;
860 /* reloc root errors, we print its corresponding fs root objectid*/
861 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862 root_objectid = root->root_key.offset;
863 fprintf(stderr, "reloc");
865 fprintf(stderr, "root %llu inode %llu errors %x",
866 (unsigned long long) root_objectid,
867 (unsigned long long) rec->ino, rec->errors);
869 if (errors & I_ERR_NO_INODE_ITEM)
870 fprintf(stderr, ", no inode item");
871 if (errors & I_ERR_NO_ORPHAN_ITEM)
872 fprintf(stderr, ", no orphan item");
873 if (errors & I_ERR_DUP_INODE_ITEM)
874 fprintf(stderr, ", dup inode item");
875 if (errors & I_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & I_ERR_ODD_DIR_ITEM)
878 fprintf(stderr, ", odd dir item");
879 if (errors & I_ERR_ODD_FILE_EXTENT)
880 fprintf(stderr, ", odd file extent");
881 if (errors & I_ERR_BAD_FILE_EXTENT)
882 fprintf(stderr, ", bad file extent");
883 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884 fprintf(stderr, ", file extent overlap");
885 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886 fprintf(stderr, ", file extent discount");
887 if (errors & I_ERR_DIR_ISIZE_WRONG)
888 fprintf(stderr, ", dir isize wrong");
889 if (errors & I_ERR_FILE_NBYTES_WRONG)
890 fprintf(stderr, ", nbytes wrong");
891 if (errors & I_ERR_ODD_CSUM_ITEM)
892 fprintf(stderr, ", odd csum item");
893 if (errors & I_ERR_SOME_CSUM_MISSING)
894 fprintf(stderr, ", some csum missing");
895 if (errors & I_ERR_LINK_COUNT_WRONG)
896 fprintf(stderr, ", link count wrong");
897 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898 fprintf(stderr, ", orphan file extent");
899 fprintf(stderr, "\n");
900 /* Print the orphan extents if needed */
901 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
904 /* Print the holes if needed */
905 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906 struct file_extent_hole *hole;
907 struct rb_node *node;
910 node = rb_first(&rec->holes);
911 fprintf(stderr, "Found file extent holes:\n");
914 hole = rb_entry(node, struct file_extent_hole, node);
915 fprintf(stderr, "\tstart: %llu, len: %llu\n",
916 hole->start, hole->len);
917 node = rb_next(node);
920 fprintf(stderr, "\tstart: 0, len: %llu\n",
922 root->fs_info->sectorsize));
926 static void print_ref_error(int errors)
928 if (errors & REF_ERR_NO_DIR_ITEM)
929 fprintf(stderr, ", no dir item");
930 if (errors & REF_ERR_NO_DIR_INDEX)
931 fprintf(stderr, ", no dir index");
932 if (errors & REF_ERR_NO_INODE_REF)
933 fprintf(stderr, ", no inode ref");
934 if (errors & REF_ERR_DUP_DIR_ITEM)
935 fprintf(stderr, ", dup dir item");
936 if (errors & REF_ERR_DUP_DIR_INDEX)
937 fprintf(stderr, ", dup dir index");
938 if (errors & REF_ERR_DUP_INODE_REF)
939 fprintf(stderr, ", dup inode ref");
940 if (errors & REF_ERR_INDEX_UNMATCH)
941 fprintf(stderr, ", index mismatch");
942 if (errors & REF_ERR_FILETYPE_UNMATCH)
943 fprintf(stderr, ", filetype mismatch");
944 if (errors & REF_ERR_NAME_TOO_LONG)
945 fprintf(stderr, ", name too long");
946 if (errors & REF_ERR_NO_ROOT_REF)
947 fprintf(stderr, ", no root ref");
948 if (errors & REF_ERR_NO_ROOT_BACKREF)
949 fprintf(stderr, ", no root backref");
950 if (errors & REF_ERR_DUP_ROOT_REF)
951 fprintf(stderr, ", dup root ref");
952 if (errors & REF_ERR_DUP_ROOT_BACKREF)
953 fprintf(stderr, ", dup root backref");
954 fprintf(stderr, "\n");
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
960 struct ptr_node *node;
961 struct cache_extent *cache;
962 struct inode_record *rec = NULL;
965 cache = lookup_cache_extent(inode_cache, ino, 1);
967 node = container_of(cache, struct ptr_node, cache);
969 if (mod && rec->refs > 1) {
970 node->data = clone_inode_rec(rec);
971 if (IS_ERR(node->data))
977 rec = calloc(1, sizeof(*rec));
979 return ERR_PTR(-ENOMEM);
981 rec->extent_start = (u64)-1;
983 INIT_LIST_HEAD(&rec->backrefs);
984 INIT_LIST_HEAD(&rec->orphan_extents);
985 rec->holes = RB_ROOT;
987 node = malloc(sizeof(*node));
990 return ERR_PTR(-ENOMEM);
992 node->cache.start = ino;
993 node->cache.size = 1;
996 if (ino == BTRFS_FREE_INO_OBJECTID)
999 ret = insert_cache_extent(inode_cache, &node->cache);
1001 return ERR_PTR(-EEXIST);
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1008 struct orphan_data_extent *orphan;
1010 while (!list_empty(orphan_extents)) {
1011 orphan = list_entry(orphan_extents->next,
1012 struct orphan_data_extent, list);
1013 list_del(&orphan->list);
1018 static void free_inode_rec(struct inode_record *rec)
1020 struct inode_backref *backref;
1022 if (--rec->refs > 0)
1025 while (!list_empty(&rec->backrefs)) {
1026 backref = to_inode_backref(rec->backrefs.next);
1027 list_del(&backref->list);
1030 free_orphan_data_extents(&rec->orphan_extents);
1031 free_file_extent_holes(&rec->holes);
1035 static int can_free_inode_rec(struct inode_record *rec)
1037 if (!rec->errors && rec->checked && rec->found_inode_item &&
1038 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044 struct inode_record *rec)
1046 struct cache_extent *cache;
1047 struct inode_backref *tmp, *backref;
1048 struct ptr_node *node;
1051 if (!rec->found_inode_item)
1054 filetype = imode_to_type(rec->imode);
1055 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056 if (backref->found_dir_item && backref->found_dir_index) {
1057 if (backref->filetype != filetype)
1058 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059 if (!backref->errors && backref->found_inode_ref &&
1060 rec->nlink == rec->found_link) {
1061 list_del(&backref->list);
1067 if (!rec->checked || rec->merging)
1070 if (S_ISDIR(rec->imode)) {
1071 if (rec->found_size != rec->isize)
1072 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073 if (rec->found_file_extent)
1074 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076 if (rec->found_dir_item)
1077 rec->errors |= I_ERR_ODD_DIR_ITEM;
1078 if (rec->found_size != rec->nbytes)
1079 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080 if (rec->nlink > 0 && !no_holes &&
1081 (rec->extent_end < rec->isize ||
1082 first_extent_gap(&rec->holes) < rec->isize))
1083 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1086 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087 if (rec->found_csum_item && rec->nodatasum)
1088 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089 if (rec->some_csum_missing && !rec->nodatasum)
1090 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1093 BUG_ON(rec->refs != 1);
1094 if (can_free_inode_rec(rec)) {
1095 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096 node = container_of(cache, struct ptr_node, cache);
1097 BUG_ON(node->data != rec);
1098 remove_cache_extent(inode_cache, &node->cache);
1100 free_inode_rec(rec);
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1106 struct btrfs_path path;
1107 struct btrfs_key key;
1110 key.objectid = BTRFS_ORPHAN_OBJECTID;
1111 key.type = BTRFS_ORPHAN_ITEM_KEY;
1114 btrfs_init_path(&path);
1115 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116 btrfs_release_path(&path);
1122 static int process_inode_item(struct extent_buffer *eb,
1123 int slot, struct btrfs_key *key,
1124 struct shared_node *active_node)
1126 struct inode_record *rec;
1127 struct btrfs_inode_item *item;
1129 rec = active_node->current;
1130 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131 if (rec->found_inode_item) {
1132 rec->errors |= I_ERR_DUP_INODE_ITEM;
1135 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136 rec->nlink = btrfs_inode_nlink(eb, item);
1137 rec->isize = btrfs_inode_size(eb, item);
1138 rec->nbytes = btrfs_inode_nbytes(eb, item);
1139 rec->imode = btrfs_inode_mode(eb, item);
1140 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1142 rec->found_inode_item = 1;
1143 if (rec->nlink == 0)
1144 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145 maybe_free_inode_rec(&active_node->inode_cache, rec);
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1151 int namelen, u64 dir)
1153 struct inode_backref *backref;
1155 list_for_each_entry(backref, &rec->backrefs, list) {
1156 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1158 if (backref->dir != dir || backref->namelen != namelen)
1160 if (memcmp(name, backref->name, namelen))
1165 backref = malloc(sizeof(*backref) + namelen + 1);
1168 memset(backref, 0, sizeof(*backref));
1170 backref->namelen = namelen;
1171 memcpy(backref->name, name, namelen);
1172 backref->name[namelen] = '\0';
1173 list_add_tail(&backref->list, &rec->backrefs);
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178 u64 ino, u64 dir, u64 index,
1179 const char *name, int namelen,
1180 u8 filetype, u8 itemtype, int errors)
1182 struct inode_record *rec;
1183 struct inode_backref *backref;
1185 rec = get_inode_rec(inode_cache, ino, 1);
1186 BUG_ON(IS_ERR(rec));
1187 backref = get_inode_backref(rec, name, namelen, dir);
1190 backref->errors |= errors;
1191 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192 if (backref->found_dir_index)
1193 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194 if (backref->found_inode_ref && backref->index != index)
1195 backref->errors |= REF_ERR_INDEX_UNMATCH;
1196 if (backref->found_dir_item && backref->filetype != filetype)
1197 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1199 backref->index = index;
1200 backref->filetype = filetype;
1201 backref->found_dir_index = 1;
1202 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1204 if (backref->found_dir_item)
1205 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206 if (backref->found_dir_index && backref->filetype != filetype)
1207 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1209 backref->filetype = filetype;
1210 backref->found_dir_item = 1;
1211 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213 if (backref->found_inode_ref)
1214 backref->errors |= REF_ERR_DUP_INODE_REF;
1215 if (backref->found_dir_index && backref->index != index)
1216 backref->errors |= REF_ERR_INDEX_UNMATCH;
1218 backref->index = index;
1220 backref->ref_type = itemtype;
1221 backref->found_inode_ref = 1;
1226 maybe_free_inode_rec(inode_cache, rec);
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231 struct cache_tree *dst_cache)
1233 struct inode_backref *backref;
1238 list_for_each_entry(backref, &src->backrefs, list) {
1239 if (backref->found_dir_index) {
1240 add_inode_backref(dst_cache, dst->ino, backref->dir,
1241 backref->index, backref->name,
1242 backref->namelen, backref->filetype,
1243 BTRFS_DIR_INDEX_KEY, backref->errors);
1245 if (backref->found_dir_item) {
1247 add_inode_backref(dst_cache, dst->ino,
1248 backref->dir, 0, backref->name,
1249 backref->namelen, backref->filetype,
1250 BTRFS_DIR_ITEM_KEY, backref->errors);
1252 if (backref->found_inode_ref) {
1253 add_inode_backref(dst_cache, dst->ino,
1254 backref->dir, backref->index,
1255 backref->name, backref->namelen, 0,
1256 backref->ref_type, backref->errors);
1260 if (src->found_dir_item)
1261 dst->found_dir_item = 1;
1262 if (src->found_file_extent)
1263 dst->found_file_extent = 1;
1264 if (src->found_csum_item)
1265 dst->found_csum_item = 1;
1266 if (src->some_csum_missing)
1267 dst->some_csum_missing = 1;
1268 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1274 BUG_ON(src->found_link < dir_count);
1275 dst->found_link += src->found_link - dir_count;
1276 dst->found_size += src->found_size;
1277 if (src->extent_start != (u64)-1) {
1278 if (dst->extent_start == (u64)-1) {
1279 dst->extent_start = src->extent_start;
1280 dst->extent_end = src->extent_end;
1282 if (dst->extent_end > src->extent_start)
1283 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284 else if (dst->extent_end < src->extent_start) {
1285 ret = add_file_extent_hole(&dst->holes,
1287 src->extent_start - dst->extent_end);
1289 if (dst->extent_end < src->extent_end)
1290 dst->extent_end = src->extent_end;
1294 dst->errors |= src->errors;
1295 if (src->found_inode_item) {
1296 if (!dst->found_inode_item) {
1297 dst->nlink = src->nlink;
1298 dst->isize = src->isize;
1299 dst->nbytes = src->nbytes;
1300 dst->imode = src->imode;
1301 dst->nodatasum = src->nodatasum;
1302 dst->found_inode_item = 1;
1304 dst->errors |= I_ERR_DUP_INODE_ITEM;
1312 static int splice_shared_node(struct shared_node *src_node,
1313 struct shared_node *dst_node)
1315 struct cache_extent *cache;
1316 struct ptr_node *node, *ins;
1317 struct cache_tree *src, *dst;
1318 struct inode_record *rec, *conflict;
1319 u64 current_ino = 0;
1323 if (--src_node->refs == 0)
1325 if (src_node->current)
1326 current_ino = src_node->current->ino;
1328 src = &src_node->root_cache;
1329 dst = &dst_node->root_cache;
1331 cache = search_cache_extent(src, 0);
1333 node = container_of(cache, struct ptr_node, cache);
1335 cache = next_cache_extent(cache);
1338 remove_cache_extent(src, &node->cache);
1341 ins = malloc(sizeof(*ins));
1343 ins->cache.start = node->cache.start;
1344 ins->cache.size = node->cache.size;
1348 ret = insert_cache_extent(dst, &ins->cache);
1349 if (ret == -EEXIST) {
1350 conflict = get_inode_rec(dst, rec->ino, 1);
1351 BUG_ON(IS_ERR(conflict));
1352 merge_inode_recs(rec, conflict, dst);
1354 conflict->checked = 1;
1355 if (dst_node->current == conflict)
1356 dst_node->current = NULL;
1358 maybe_free_inode_rec(dst, conflict);
1359 free_inode_rec(rec);
1366 if (src == &src_node->root_cache) {
1367 src = &src_node->inode_cache;
1368 dst = &dst_node->inode_cache;
1372 if (current_ino > 0 && (!dst_node->current ||
1373 current_ino > dst_node->current->ino)) {
1374 if (dst_node->current) {
1375 dst_node->current->checked = 1;
1376 maybe_free_inode_rec(dst, dst_node->current);
1378 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379 BUG_ON(IS_ERR(dst_node->current));
1384 static void free_inode_ptr(struct cache_extent *cache)
1386 struct ptr_node *node;
1387 struct inode_record *rec;
1389 node = container_of(cache, struct ptr_node, cache);
1391 free_inode_rec(rec);
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1400 struct cache_extent *cache;
1401 struct shared_node *node;
1403 cache = lookup_cache_extent(shared, bytenr, 1);
1405 node = container_of(cache, struct shared_node, cache);
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1414 struct shared_node *node;
1416 node = calloc(1, sizeof(*node));
1419 node->cache.start = bytenr;
1420 node->cache.size = 1;
1421 cache_tree_init(&node->root_cache);
1422 cache_tree_init(&node->inode_cache);
1425 ret = insert_cache_extent(shared, &node->cache);
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431 struct walk_control *wc, int level)
1433 struct shared_node *node;
1434 struct shared_node *dest;
1437 if (level == wc->active_node)
1440 BUG_ON(wc->active_node <= level);
1441 node = find_shared_node(&wc->shared, bytenr);
1443 ret = add_shared_node(&wc->shared, bytenr, refs);
1445 node = find_shared_node(&wc->shared, bytenr);
1446 wc->nodes[level] = node;
1447 wc->active_node = level;
1451 if (wc->root_level == wc->active_node &&
1452 btrfs_root_refs(&root->root_item) == 0) {
1453 if (--node->refs == 0) {
1454 free_inode_recs_tree(&node->root_cache);
1455 free_inode_recs_tree(&node->inode_cache);
1456 remove_cache_extent(&wc->shared, &node->cache);
1462 dest = wc->nodes[wc->active_node];
1463 splice_shared_node(node, dest);
1464 if (node->refs == 0) {
1465 remove_cache_extent(&wc->shared, &node->cache);
1471 static int leave_shared_node(struct btrfs_root *root,
1472 struct walk_control *wc, int level)
1474 struct shared_node *node;
1475 struct shared_node *dest;
1478 if (level == wc->root_level)
1481 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1485 BUG_ON(i >= BTRFS_MAX_LEVEL);
1487 node = wc->nodes[wc->active_node];
1488 wc->nodes[wc->active_node] = NULL;
1489 wc->active_node = i;
1491 dest = wc->nodes[wc->active_node];
1492 if (wc->active_node < wc->root_level ||
1493 btrfs_root_refs(&root->root_item) > 0) {
1494 BUG_ON(node->refs <= 1);
1495 splice_shared_node(node, dest);
1497 BUG_ON(node->refs < 2);
1506 * 1 - if the root with id child_root_id is a child of root parent_root_id
1507 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1508 * has other root(s) as parent(s)
1509 * 2 - if the root child_root_id doesn't have any parent roots
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1514 struct btrfs_path path;
1515 struct btrfs_key key;
1516 struct extent_buffer *leaf;
1520 btrfs_init_path(&path);
1522 key.objectid = parent_root_id;
1523 key.type = BTRFS_ROOT_REF_KEY;
1524 key.offset = child_root_id;
1525 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1529 btrfs_release_path(&path);
1533 key.objectid = child_root_id;
1534 key.type = BTRFS_ROOT_BACKREF_KEY;
1536 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1542 leaf = path.nodes[0];
1543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1547 leaf = path.nodes[0];
1550 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551 if (key.objectid != child_root_id ||
1552 key.type != BTRFS_ROOT_BACKREF_KEY)
1557 if (key.offset == parent_root_id) {
1558 btrfs_release_path(&path);
1565 btrfs_release_path(&path);
1568 return has_parent ? 0 : 2;
1571 static int process_dir_item(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1583 struct btrfs_dir_item *di;
1584 struct inode_record *rec;
1585 struct cache_tree *root_cache;
1586 struct cache_tree *inode_cache;
1587 struct btrfs_key location;
1588 char namebuf[BTRFS_NAME_LEN];
1590 root_cache = &active_node->root_cache;
1591 inode_cache = &active_node->inode_cache;
1592 rec = active_node->current;
1593 rec->found_dir_item = 1;
1595 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596 total = btrfs_item_size_nr(eb, slot);
1597 while (cur < total) {
1599 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600 name_len = btrfs_dir_name_len(eb, di);
1601 data_len = btrfs_dir_data_len(eb, di);
1602 filetype = btrfs_dir_type(eb, di);
1604 rec->found_size += name_len;
1605 if (cur + sizeof(*di) + name_len > total ||
1606 name_len > BTRFS_NAME_LEN) {
1607 error = REF_ERR_NAME_TOO_LONG;
1609 if (cur + sizeof(*di) > total)
1611 len = min_t(u32, total - cur - sizeof(*di),
1618 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1620 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621 key->offset != btrfs_name_hash(namebuf, len)) {
1622 rec->errors |= I_ERR_ODD_DIR_ITEM;
1623 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624 key->objectid, key->offset, namebuf, len, filetype,
1625 key->offset, btrfs_name_hash(namebuf, len));
1628 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629 add_inode_backref(inode_cache, location.objectid,
1630 key->objectid, key->offset, namebuf,
1631 len, filetype, key->type, error);
1632 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633 add_inode_backref(root_cache, location.objectid,
1634 key->objectid, key->offset,
1635 namebuf, len, filetype,
1638 fprintf(stderr, "invalid location in dir item %u\n",
1640 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641 key->objectid, key->offset, namebuf,
1642 len, filetype, key->type, error);
1645 len = sizeof(*di) + name_len + data_len;
1646 di = (struct btrfs_dir_item *)((char *)di + len);
1649 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650 rec->errors |= I_ERR_DUP_DIR_INDEX;
1655 static int process_inode_ref(struct extent_buffer *eb,
1656 int slot, struct btrfs_key *key,
1657 struct shared_node *active_node)
1665 struct cache_tree *inode_cache;
1666 struct btrfs_inode_ref *ref;
1667 char namebuf[BTRFS_NAME_LEN];
1669 inode_cache = &active_node->inode_cache;
1671 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672 total = btrfs_item_size_nr(eb, slot);
1673 while (cur < total) {
1674 name_len = btrfs_inode_ref_name_len(eb, ref);
1675 index = btrfs_inode_ref_index(eb, ref);
1677 /* inode_ref + namelen should not cross item boundary */
1678 if (cur + sizeof(*ref) + name_len > total ||
1679 name_len > BTRFS_NAME_LEN) {
1680 if (total < cur + sizeof(*ref))
1683 /* Still try to read out the remaining part */
1684 len = min_t(u32, total - cur - sizeof(*ref),
1686 error = REF_ERR_NAME_TOO_LONG;
1692 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693 add_inode_backref(inode_cache, key->objectid, key->offset,
1694 index, namebuf, len, 0, key->type, error);
1696 len = sizeof(*ref) + name_len;
1697 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1703 static int process_inode_extref(struct extent_buffer *eb,
1704 int slot, struct btrfs_key *key,
1705 struct shared_node *active_node)
1714 struct cache_tree *inode_cache;
1715 struct btrfs_inode_extref *extref;
1716 char namebuf[BTRFS_NAME_LEN];
1718 inode_cache = &active_node->inode_cache;
1720 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721 total = btrfs_item_size_nr(eb, slot);
1722 while (cur < total) {
1723 name_len = btrfs_inode_extref_name_len(eb, extref);
1724 index = btrfs_inode_extref_index(eb, extref);
1725 parent = btrfs_inode_extref_parent(eb, extref);
1726 if (name_len <= BTRFS_NAME_LEN) {
1730 len = BTRFS_NAME_LEN;
1731 error = REF_ERR_NAME_TOO_LONG;
1733 read_extent_buffer(eb, namebuf,
1734 (unsigned long)(extref + 1), len);
1735 add_inode_backref(inode_cache, key->objectid, parent,
1736 index, namebuf, len, 0, key->type, error);
1738 len = sizeof(*extref) + name_len;
1739 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747 u64 len, u64 *found)
1749 struct btrfs_key key;
1750 struct btrfs_path path;
1751 struct extent_buffer *leaf;
1756 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758 btrfs_init_path(&path);
1760 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762 key.type = BTRFS_EXTENT_CSUM_KEY;
1764 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1768 if (ret > 0 && path.slots[0] > 0) {
1769 leaf = path.nodes[0];
1770 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772 key.type == BTRFS_EXTENT_CSUM_KEY)
1777 leaf = path.nodes[0];
1778 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1784 leaf = path.nodes[0];
1787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789 key.type != BTRFS_EXTENT_CSUM_KEY)
1792 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793 if (key.offset >= start + len)
1796 if (key.offset > start)
1799 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800 csum_end = key.offset + (size / csum_size) *
1801 root->fs_info->sectorsize;
1802 if (csum_end > start) {
1803 size = min(csum_end - start, len);
1812 btrfs_release_path(&path);
1818 static int process_file_extent(struct btrfs_root *root,
1819 struct extent_buffer *eb,
1820 int slot, struct btrfs_key *key,
1821 struct shared_node *active_node)
1823 struct inode_record *rec;
1824 struct btrfs_file_extent_item *fi;
1826 u64 disk_bytenr = 0;
1827 u64 extent_offset = 0;
1828 u64 mask = root->fs_info->sectorsize - 1;
1832 rec = active_node->current;
1833 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834 rec->found_file_extent = 1;
1836 if (rec->extent_start == (u64)-1) {
1837 rec->extent_start = key->offset;
1838 rec->extent_end = key->offset;
1841 if (rec->extent_end > key->offset)
1842 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843 else if (rec->extent_end < key->offset) {
1844 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845 key->offset - rec->extent_end);
1850 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851 extent_type = btrfs_file_extent_type(eb, fi);
1853 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857 rec->found_size += num_bytes;
1858 num_bytes = (num_bytes + mask) & ~mask;
1859 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863 extent_offset = btrfs_file_extent_offset(eb, fi);
1864 if (num_bytes == 0 || (num_bytes & mask))
1865 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866 if (num_bytes + extent_offset >
1867 btrfs_file_extent_ram_bytes(eb, fi))
1868 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870 (btrfs_file_extent_compression(eb, fi) ||
1871 btrfs_file_extent_encryption(eb, fi) ||
1872 btrfs_file_extent_other_encoding(eb, fi)))
1873 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874 if (disk_bytenr > 0)
1875 rec->found_size += num_bytes;
1877 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879 rec->extent_end = key->offset + num_bytes;
1882 * The data reloc tree will copy full extents into its inode and then
1883 * copy the corresponding csums. Because the extent it copied could be
1884 * a preallocated extent that hasn't been written to yet there may be no
1885 * csums to copy, ergo we won't have csums for our file extent. This is
1886 * ok so just don't bother checking csums if the inode belongs to the
1889 if (disk_bytenr > 0 &&
1890 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892 if (btrfs_file_extent_compression(eb, fi))
1893 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895 disk_bytenr += extent_offset;
1897 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1900 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902 rec->found_csum_item = 1;
1903 if (found < num_bytes)
1904 rec->some_csum_missing = 1;
1905 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914 struct walk_control *wc)
1916 struct btrfs_key key;
1920 struct cache_tree *inode_cache;
1921 struct shared_node *active_node;
1923 if (wc->root_level == wc->active_node &&
1924 btrfs_root_refs(&root->root_item) == 0)
1927 active_node = wc->nodes[wc->active_node];
1928 inode_cache = &active_node->inode_cache;
1929 nritems = btrfs_header_nritems(eb);
1930 for (i = 0; i < nritems; i++) {
1931 btrfs_item_key_to_cpu(eb, &key, i);
1933 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1938 if (active_node->current == NULL ||
1939 active_node->current->ino < key.objectid) {
1940 if (active_node->current) {
1941 active_node->current->checked = 1;
1942 maybe_free_inode_rec(inode_cache,
1943 active_node->current);
1945 active_node->current = get_inode_rec(inode_cache,
1947 BUG_ON(IS_ERR(active_node->current));
1950 case BTRFS_DIR_ITEM_KEY:
1951 case BTRFS_DIR_INDEX_KEY:
1952 ret = process_dir_item(eb, i, &key, active_node);
1954 case BTRFS_INODE_REF_KEY:
1955 ret = process_inode_ref(eb, i, &key, active_node);
1957 case BTRFS_INODE_EXTREF_KEY:
1958 ret = process_inode_extref(eb, i, &key, active_node);
1960 case BTRFS_INODE_ITEM_KEY:
1961 ret = process_inode_item(eb, i, &key, active_node);
1963 case BTRFS_EXTENT_DATA_KEY:
1964 ret = process_file_extent(root, eb, i, &key,
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1977 int need_check[BTRFS_MAX_LEVEL];
1978 /* field for checking all trees */
1979 int checked[BTRFS_MAX_LEVEL];
1980 /* the corresponding extent should be marked as full backref or not */
1981 int full_backref[BTRFS_MAX_LEVEL];
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985 struct extent_buffer *eb, struct node_refs *nrefs,
1986 u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988 unsigned int ext_ref);
1991 * Returns >0 Found error, not fatal, should continue
1992 * Returns <0 Fatal error, must exit the whole check
1993 * Returns 0 No errors found
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996 struct node_refs *nrefs, int *level, int ext_ref)
1998 struct extent_buffer *cur = path->nodes[0];
1999 struct btrfs_key key;
2003 int root_level = btrfs_header_level(root->node);
2005 int ret = 0; /* Final return value */
2006 int err = 0; /* Positive error bitmap */
2008 cur_bytenr = cur->start;
2010 /* skip to first inode item or the first inode number change */
2011 nritems = btrfs_header_nritems(cur);
2012 for (i = 0; i < nritems; i++) {
2013 btrfs_item_key_to_cpu(cur, &key, i);
2015 first_ino = key.objectid;
2016 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017 (first_ino && first_ino != key.objectid))
2021 path->slots[0] = nritems;
2027 err |= check_inode_item(root, path, ext_ref);
2029 /* modify cur since check_inode_item may change path */
2030 cur = path->nodes[0];
2032 if (err & LAST_ITEM)
2035 /* still have inode items in thie leaf */
2036 if (cur->start == cur_bytenr)
2040 * we have switched to another leaf, above nodes may
2041 * have changed, here walk down the path, if a node
2042 * or leaf is shared, check whether we can skip this
2045 for (i = root_level; i >= 0; i--) {
2046 if (path->nodes[i]->start == nrefs->bytenr[i])
2049 ret = update_nodes_refs(root, path->nodes[i]->start,
2050 path->nodes[i], nrefs, i, 0);
2054 if (!nrefs->need_check[i]) {
2060 for (i = 0; i < *level; i++) {
2061 free_extent_buffer(path->nodes[i]);
2062 path->nodes[i] = NULL;
2071 static void reada_walk_down(struct btrfs_root *root,
2072 struct extent_buffer *node, int slot)
2074 struct btrfs_fs_info *fs_info = root->fs_info;
2081 level = btrfs_header_level(node);
2085 nritems = btrfs_header_nritems(node);
2086 for (i = slot; i < nritems; i++) {
2087 bytenr = btrfs_node_blockptr(node, i);
2088 ptr_gen = btrfs_node_ptr_generation(node, i);
2089 readahead_tree_block(fs_info, bytenr, ptr_gen);
2094 * Check the child node/leaf by the following condition:
2095 * 1. the first item key of the node/leaf should be the same with the one
2097 * 2. block in parent node should match the child node/leaf.
2098 * 3. generation of parent node and child's header should be consistent.
2100 * Or the child node/leaf pointed by the key in parent is not valid.
2102 * We hope to check leaf owner too, but since subvol may share leaves,
2103 * which makes leaf owner check not so strong, key check should be
2104 * sufficient enough for that case.
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107 struct extent_buffer *child)
2109 struct btrfs_key parent_key;
2110 struct btrfs_key child_key;
2113 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114 if (btrfs_header_level(child) == 0)
2115 btrfs_item_key_to_cpu(child, &child_key, 0);
2117 btrfs_node_key_to_cpu(child, &child_key, 0);
2119 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2122 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123 parent_key.objectid, parent_key.type, parent_key.offset,
2124 child_key.objectid, child_key.type, child_key.offset);
2126 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129 btrfs_node_blockptr(parent, slot),
2130 btrfs_header_bytenr(child));
2132 if (btrfs_node_ptr_generation(parent, slot) !=
2133 btrfs_header_generation(child)) {
2135 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136 btrfs_header_generation(child),
2137 btrfs_node_ptr_generation(parent, slot));
2143 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144 * in every fs or file tree check. Here we find its all root ids, and only check
2145 * it in the fs or file tree which has the smallest root id.
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 struct rb_node *node;
2150 struct ulist_node *u;
2152 if (roots->nnodes == 1)
2155 node = rb_first(&roots->root);
2156 u = rb_entry(node, struct ulist_node, rb_node);
2158 * current root id is not smallest, we skip it and let it be checked
2159 * in the fs or file tree who hash the smallest root id.
2161 if (root->objectid != u->val)
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2170 struct btrfs_root *extent_root = root->fs_info->extent_root;
2171 struct btrfs_root_item *ri = &root->root_item;
2172 struct btrfs_extent_inline_ref *iref;
2173 struct btrfs_extent_item *ei;
2174 struct btrfs_key key;
2175 struct btrfs_path *path = NULL;
2186 * Except file/reloc tree, we can not have FULL BACKREF MODE
2188 if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2192 if (eb->start == btrfs_root_bytenr(ri))
2195 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2198 owner = btrfs_header_owner(eb);
2199 if (owner == root->objectid)
2202 path = btrfs_alloc_path();
2206 key.objectid = btrfs_header_bytenr(eb);
2208 key.offset = (u64)-1;
2210 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2217 ret = btrfs_previous_extent_item(extent_root, path,
2223 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2225 eb = path->nodes[0];
2226 slot = path->slots[0];
2227 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2229 flags = btrfs_extent_flags(eb, ei);
2230 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2233 ptr = (unsigned long)(ei + 1);
2234 end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2236 if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237 ptr += sizeof(struct btrfs_tree_block_info);
2240 /* Reached extent item ends normally */
2244 /* Beyond extent item end, wrong item size */
2246 error("extent item at bytenr %llu slot %d has wrong size",
2251 iref = (struct btrfs_extent_inline_ref *)ptr;
2252 offset = btrfs_extent_inline_ref_offset(eb, iref);
2253 type = btrfs_extent_inline_ref_type(eb, iref);
2255 if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2257 ptr += btrfs_extent_inline_ref_size(type);
2261 *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267 btrfs_free_path(path);
2272 * for a tree node or leaf, we record its reference count, so later if we still
2273 * process this node or leaf, don't need to compute its reference count again.
2275 * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278 struct extent_buffer *eb, struct node_refs *nrefs,
2279 u64 level, int check_all)
2281 struct ulist *roots;
2284 int root_level = btrfs_header_level(root->node);
2288 if (nrefs->bytenr[level] == bytenr)
2291 if (bytenr != (u64)-1) {
2292 /* the return value of this function seems a mistake */
2293 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294 level, 1, &refs, &flags);
2296 if (ret < 0 && !check_all)
2299 nrefs->bytenr[level] = bytenr;
2300 nrefs->refs[level] = refs;
2301 nrefs->full_backref[level] = 0;
2302 nrefs->checked[level] = 0;
2305 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2310 check = need_check(root, roots);
2312 nrefs->need_check[level] = check;
2315 nrefs->need_check[level] = 1;
2317 if (level == root_level) {
2318 nrefs->need_check[level] = 1;
2321 * The node refs may have not been
2322 * updated if upper needs checking (the
2323 * lowest root_objectid) the node can
2326 nrefs->need_check[level] =
2327 nrefs->need_check[level + 1];
2333 if (check_all && eb) {
2334 calc_extent_flag_v2(root, eb, &flags);
2335 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336 nrefs->full_backref[level] = 1;
2343 * @level if @level == -1 means extent data item
2344 * else normal treeblocl.
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347 struct node_refs *nrefs, int level)
2349 int root_level = btrfs_header_level(root->node);
2351 if (level > root_level || level < -1)
2353 if (level == root_level)
2356 * if the upper node is marked full backref, it should contain shared
2357 * backref of the parent (except owner == root->objectid).
2359 while (++level <= root_level)
2360 if (nrefs->refs[level] > 1)
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367 struct walk_control *wc, int *level,
2368 struct node_refs *nrefs)
2370 enum btrfs_tree_block_status status;
2373 struct btrfs_fs_info *fs_info = root->fs_info;
2374 struct extent_buffer *next;
2375 struct extent_buffer *cur;
2379 WARN_ON(*level < 0);
2380 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2382 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383 refs = nrefs->refs[*level];
2386 ret = btrfs_lookup_extent_info(NULL, root,
2387 path->nodes[*level]->start,
2388 *level, 1, &refs, NULL);
2393 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394 nrefs->refs[*level] = refs;
2398 ret = enter_shared_node(root, path->nodes[*level]->start,
2406 while (*level >= 0) {
2407 WARN_ON(*level < 0);
2408 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409 cur = path->nodes[*level];
2411 if (btrfs_header_level(cur) != *level)
2414 if (path->slots[*level] >= btrfs_header_nritems(cur))
2417 ret = process_one_leaf(root, cur, wc);
2422 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2425 if (bytenr == nrefs->bytenr[*level - 1]) {
2426 refs = nrefs->refs[*level - 1];
2428 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429 *level - 1, 1, &refs, NULL);
2433 nrefs->bytenr[*level - 1] = bytenr;
2434 nrefs->refs[*level - 1] = refs;
2439 ret = enter_shared_node(root, bytenr, refs,
2442 path->slots[*level]++;
2447 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449 free_extent_buffer(next);
2450 reada_walk_down(root, cur, path->slots[*level]);
2451 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452 if (!extent_buffer_uptodate(next)) {
2453 struct btrfs_key node_key;
2455 btrfs_node_key_to_cpu(path->nodes[*level],
2457 path->slots[*level]);
2458 btrfs_add_corrupt_extent_record(root->fs_info,
2460 path->nodes[*level]->start,
2461 root->fs_info->nodesize,
2468 ret = check_child_node(cur, path->slots[*level], next);
2470 free_extent_buffer(next);
2475 if (btrfs_is_leaf(next))
2476 status = btrfs_check_leaf(root, NULL, next);
2478 status = btrfs_check_node(root, NULL, next);
2479 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480 free_extent_buffer(next);
2485 *level = *level - 1;
2486 free_extent_buffer(path->nodes[*level]);
2487 path->nodes[*level] = next;
2488 path->slots[*level] = 0;
2491 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2495 static int fs_root_objectid(u64 objectid);
2498 * Update global fs information.
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2504 struct extent_buffer *eb = path->nodes[level];
2506 total_btree_bytes += eb->len;
2507 if (fs_root_objectid(root->objectid))
2508 total_fs_tree_bytes += eb->len;
2509 if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510 total_extent_tree_bytes += eb->len;
2513 btree_space_waste += btrfs_leaf_free_space(root, eb);
2515 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516 btrfs_header_nritems(eb));
2517 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2522 * This function only handles BACKREF_MISSING,
2523 * If corresponding extent item exists, increase the ref, else insert an extent
2526 * Returns error bits after repair.
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529 struct btrfs_root *root,
2530 struct extent_buffer *node,
2531 struct node_refs *nrefs, int level, int err)
2533 struct btrfs_fs_info *fs_info = root->fs_info;
2534 struct btrfs_root *extent_root = fs_info->extent_root;
2535 struct btrfs_path path;
2536 struct btrfs_extent_item *ei;
2537 struct btrfs_tree_block_info *bi;
2538 struct btrfs_key key;
2539 struct extent_buffer *eb;
2540 u32 size = sizeof(*ei);
2541 u32 node_size = root->fs_info->nodesize;
2542 int insert_extent = 0;
2543 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544 int root_level = btrfs_header_level(root->node);
2549 u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2552 if ((err & BACKREF_MISSING) == 0)
2555 WARN_ON(level > BTRFS_MAX_LEVEL);
2558 btrfs_init_path(&path);
2559 bytenr = btrfs_header_bytenr(node);
2560 owner = btrfs_header_owner(node);
2561 generation = btrfs_header_generation(node);
2563 key.objectid = bytenr;
2565 key.offset = (u64)-1;
2567 /* Search for the extent item */
2568 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2574 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2578 /* calculate if the extent item flag is full backref or not */
2579 if (nrefs->full_backref[level] != 0)
2580 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2582 /* insert an extent item */
2583 if (insert_extent) {
2584 struct btrfs_disk_key copy_key;
2586 generation = btrfs_header_generation(node);
2588 if (level < root_level && nrefs->full_backref[level + 1] &&
2589 owner != root->objectid) {
2590 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2593 key.objectid = bytenr;
2594 if (!skinny_metadata) {
2595 key.type = BTRFS_EXTENT_ITEM_KEY;
2596 key.offset = node_size;
2597 size += sizeof(*bi);
2599 key.type = BTRFS_METADATA_ITEM_KEY;
2603 btrfs_release_path(&path);
2604 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2610 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2612 btrfs_set_extent_refs(eb, ei, 0);
2613 btrfs_set_extent_generation(eb, ei, generation);
2614 btrfs_set_extent_flags(eb, ei, flags);
2616 if (!skinny_metadata) {
2617 bi = (struct btrfs_tree_block_info *)(ei + 1);
2618 memset_extent_buffer(eb, 0, (unsigned long)bi,
2620 btrfs_set_disk_key_objectid(©_key, root->objectid);
2621 btrfs_set_disk_key_type(©_key, 0);
2622 btrfs_set_disk_key_offset(©_key, 0);
2624 btrfs_set_tree_block_level(eb, bi, level);
2625 btrfs_set_tree_block_key(eb, bi, ©_key);
2627 btrfs_mark_buffer_dirty(eb);
2628 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2632 nrefs->refs[level] = 0;
2633 nrefs->full_backref[level] =
2634 flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635 btrfs_release_path(&path);
2638 if (level < root_level && nrefs->full_backref[level + 1] &&
2639 owner != root->objectid)
2640 parent = nrefs->bytenr[level + 1];
2642 /* increase the ref */
2643 ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644 parent, root->objectid, level, 0);
2646 nrefs->refs[level]++;
2648 btrfs_release_path(&path);
2651 "failed to repair tree block ref start %llu root %llu due to %s",
2652 bytenr, root->objectid, strerror(-ret));
2654 printf("Added one tree block ref start %llu %s %llu\n",
2655 bytenr, parent ? "parent" : "root",
2656 parent ? parent : root->objectid);
2657 err &= ~BACKREF_MISSING;
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664 unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666 struct extent_buffer *eb, u64 bytenr,
2667 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669 struct btrfs_root *root, struct btrfs_path *path,
2670 struct node_refs *nrefs, int account_bytes);
2673 * @trans just for lowmem repair mode
2674 * @check all if not 0 then check all tree block backrefs and items
2675 * 0 then just check relationship of items in fs tree(s)
2677 * Returns >0 Found error, should continue
2678 * Returns <0 Fatal error, must exit the whole check
2679 * Returns 0 No errors found
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682 struct btrfs_root *root, struct btrfs_path *path,
2683 int *level, struct node_refs *nrefs, int ext_ref,
2687 enum btrfs_tree_block_status status;
2690 struct btrfs_fs_info *fs_info = root->fs_info;
2691 struct extent_buffer *next;
2692 struct extent_buffer *cur;
2696 int account_file_data = 0;
2698 WARN_ON(*level < 0);
2699 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2701 ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702 path->nodes[*level], nrefs, *level, check_all);
2706 while (*level >= 0) {
2707 WARN_ON(*level < 0);
2708 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709 cur = path->nodes[*level];
2710 bytenr = btrfs_header_bytenr(cur);
2711 check = nrefs->need_check[*level];
2713 if (btrfs_header_level(cur) != *level)
2716 * Update bytes accounting and check tree block ref
2717 * NOTE: Doing accounting and check before checking nritems
2718 * is necessary because of empty node/leaf.
2720 if ((check_all && !nrefs->checked[*level]) ||
2721 (!check_all && nrefs->need_check[*level])) {
2722 ret = check_tree_block_ref(root, cur,
2723 btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724 btrfs_header_owner(cur), nrefs);
2727 ret = repair_tree_block_ref(trans, root,
2728 path->nodes[*level], nrefs, *level, ret);
2731 if (check_all && nrefs->need_check[*level] &&
2732 nrefs->refs[*level]) {
2733 account_bytes(root, path, *level);
2734 account_file_data = 1;
2736 nrefs->checked[*level] = 1;
2739 if (path->slots[*level] >= btrfs_header_nritems(cur))
2742 /* Don't forgot to check leaf/node validation */
2744 /* skip duplicate check */
2745 if (check || !check_all) {
2746 ret = btrfs_check_leaf(root, NULL, cur);
2747 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2755 ret = process_one_leaf_v2(root, path, nrefs,
2758 ret = check_leaf_items(trans, root, path,
2759 nrefs, account_file_data);
2763 if (check || !check_all) {
2764 ret = btrfs_check_node(root, NULL, cur);
2765 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2772 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2775 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2780 * check all trees in check_chunks_and_extent_v2
2781 * check shared node once in check_fs_roots
2783 if (!check_all && !nrefs->need_check[*level - 1]) {
2784 path->slots[*level]++;
2788 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790 free_extent_buffer(next);
2791 reada_walk_down(root, cur, path->slots[*level]);
2792 next = read_tree_block(fs_info, bytenr, ptr_gen);
2793 if (!extent_buffer_uptodate(next)) {
2794 struct btrfs_key node_key;
2796 btrfs_node_key_to_cpu(path->nodes[*level],
2798 path->slots[*level]);
2799 btrfs_add_corrupt_extent_record(fs_info,
2800 &node_key, path->nodes[*level]->start,
2801 fs_info->nodesize, *level);
2807 ret = check_child_node(cur, path->slots[*level], next);
2812 if (btrfs_is_leaf(next))
2813 status = btrfs_check_leaf(root, NULL, next);
2815 status = btrfs_check_node(root, NULL, next);
2816 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817 free_extent_buffer(next);
2822 *level = *level - 1;
2823 free_extent_buffer(path->nodes[*level]);
2824 path->nodes[*level] = next;
2825 path->slots[*level] = 0;
2826 account_file_data = 0;
2828 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834 struct walk_control *wc, int *level)
2837 struct extent_buffer *leaf;
2839 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840 leaf = path->nodes[i];
2841 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2846 free_extent_buffer(path->nodes[*level]);
2847 path->nodes[*level] = NULL;
2848 BUG_ON(*level > wc->active_node);
2849 if (*level == wc->active_node)
2850 leave_shared_node(root, wc, *level);
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2861 struct extent_buffer *leaf;
2863 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864 leaf = path->nodes[i];
2865 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2870 free_extent_buffer(path->nodes[*level]);
2871 path->nodes[*level] = NULL;
2878 static int check_root_dir(struct inode_record *rec)
2880 struct inode_backref *backref;
2883 if (!rec->found_inode_item || rec->errors)
2885 if (rec->nlink != 1 || rec->found_link != 0)
2887 if (list_empty(&rec->backrefs))
2889 backref = to_inode_backref(rec->backrefs.next);
2890 if (!backref->found_inode_ref)
2892 if (backref->index != 0 || backref->namelen != 2 ||
2893 memcmp(backref->name, "..", 2))
2895 if (backref->found_dir_index || backref->found_dir_item)
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903 struct btrfs_root *root, struct btrfs_path *path,
2904 struct inode_record *rec)
2906 struct btrfs_inode_item *ei;
2907 struct btrfs_key key;
2910 key.objectid = rec->ino;
2911 key.type = BTRFS_INODE_ITEM_KEY;
2912 key.offset = (u64)-1;
2914 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2918 if (!path->slots[0]) {
2925 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926 if (key.objectid != rec->ino) {
2931 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932 struct btrfs_inode_item);
2933 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934 btrfs_mark_buffer_dirty(path->nodes[0]);
2935 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937 root->root_key.objectid);
2939 btrfs_release_path(path);
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944 struct btrfs_root *root,
2945 struct btrfs_path *path,
2946 struct inode_record *rec)
2950 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951 btrfs_release_path(path);
2953 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958 struct btrfs_root *root,
2959 struct btrfs_path *path,
2960 struct inode_record *rec)
2962 struct btrfs_inode_item *ei;
2963 struct btrfs_key key;
2966 key.objectid = rec->ino;
2967 key.type = BTRFS_INODE_ITEM_KEY;
2970 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2977 /* Since ret == 0, no need to check anything */
2978 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979 struct btrfs_inode_item);
2980 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981 btrfs_mark_buffer_dirty(path->nodes[0]);
2982 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983 printf("reset nbytes for ino %llu root %llu\n",
2984 rec->ino, root->root_key.objectid);
2986 btrfs_release_path(path);
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991 struct cache_tree *inode_cache,
2992 struct inode_record *rec,
2993 struct inode_backref *backref)
2995 struct btrfs_path path;
2996 struct btrfs_trans_handle *trans;
2997 struct btrfs_dir_item *dir_item;
2998 struct extent_buffer *leaf;
2999 struct btrfs_key key;
3000 struct btrfs_disk_key disk_key;
3001 struct inode_record *dir_rec;
3002 unsigned long name_ptr;
3003 u32 data_size = sizeof(*dir_item) + backref->namelen;
3006 trans = btrfs_start_transaction(root, 1);
3008 return PTR_ERR(trans);
3010 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011 (unsigned long long)rec->ino);
3013 btrfs_init_path(&path);
3014 key.objectid = backref->dir;
3015 key.type = BTRFS_DIR_INDEX_KEY;
3016 key.offset = backref->index;
3017 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3020 leaf = path.nodes[0];
3021 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3023 disk_key.objectid = cpu_to_le64(rec->ino);
3024 disk_key.type = BTRFS_INODE_ITEM_KEY;
3025 disk_key.offset = 0;
3027 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029 btrfs_set_dir_data_len(leaf, dir_item, 0);
3030 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031 name_ptr = (unsigned long)(dir_item + 1);
3032 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033 btrfs_mark_buffer_dirty(leaf);
3034 btrfs_release_path(&path);
3035 btrfs_commit_transaction(trans, root);
3037 backref->found_dir_index = 1;
3038 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039 BUG_ON(IS_ERR(dir_rec));
3042 dir_rec->found_size += backref->namelen;
3043 if (dir_rec->found_size == dir_rec->isize &&
3044 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046 if (dir_rec->found_size != dir_rec->isize)
3047 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3052 static int delete_dir_index(struct btrfs_root *root,
3053 struct inode_backref *backref)
3055 struct btrfs_trans_handle *trans;
3056 struct btrfs_dir_item *di;
3057 struct btrfs_path path;
3060 trans = btrfs_start_transaction(root, 1);
3062 return PTR_ERR(trans);
3064 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065 (unsigned long long)backref->dir,
3066 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067 (unsigned long long)root->objectid);
3069 btrfs_init_path(&path);
3070 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071 backref->name, backref->namelen,
3072 backref->index, -1);
3075 btrfs_release_path(&path);
3076 btrfs_commit_transaction(trans, root);
3083 ret = btrfs_del_item(trans, root, &path);
3085 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3087 btrfs_release_path(&path);
3088 btrfs_commit_transaction(trans, root);
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093 struct btrfs_root *root, u64 ino, u64 size,
3094 u64 nbytes, u64 nlink, u32 mode)
3096 struct btrfs_inode_item ii;
3097 time_t now = time(NULL);
3100 btrfs_set_stack_inode_size(&ii, size);
3101 btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102 btrfs_set_stack_inode_nlink(&ii, nlink);
3103 btrfs_set_stack_inode_mode(&ii, mode);
3104 btrfs_set_stack_inode_generation(&ii, trans->transid);
3105 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106 btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108 btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110 btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3113 ret = btrfs_insert_inode(trans, root, ino, &ii);
3116 warning("root %llu inode %llu recreating inode item, this may "
3117 "be incomplete, please check permissions and content after "
3118 "the fsck completes.\n", (unsigned long long)root->objectid,
3119 (unsigned long long)ino);
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125 struct btrfs_root *root, u64 ino,
3128 u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3130 return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3133 static int create_inode_item(struct btrfs_root *root,
3134 struct inode_record *rec, int root_dir)
3136 struct btrfs_trans_handle *trans;
3142 trans = btrfs_start_transaction(root, 1);
3143 if (IS_ERR(trans)) {
3144 ret = PTR_ERR(trans);
3148 nlink = root_dir ? 1 : rec->found_link;
3149 if (rec->found_dir_item) {
3150 if (rec->found_file_extent)
3151 fprintf(stderr, "root %llu inode %llu has both a dir "
3152 "item and extents, unsure if it is a dir or a "
3153 "regular file so setting it as a directory\n",
3154 (unsigned long long)root->objectid,
3155 (unsigned long long)rec->ino);
3156 mode = S_IFDIR | 0755;
3157 size = rec->found_size;
3158 } else if (!rec->found_dir_item) {
3159 size = rec->extent_end;
3160 mode = S_IFREG | 0755;
3163 ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3165 btrfs_commit_transaction(trans, root);
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170 struct inode_record *rec,
3171 struct cache_tree *inode_cache,
3174 struct inode_backref *tmp, *backref;
3175 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3179 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180 if (!delete && rec->ino == root_dirid) {
3181 if (!rec->found_inode_item) {
3182 ret = create_inode_item(root, rec, 1);
3189 /* Index 0 for root dir's are special, don't mess with it */
3190 if (rec->ino == root_dirid && backref->index == 0)
3194 ((backref->found_dir_index && !backref->found_inode_ref) ||
3195 (backref->found_dir_index && backref->found_inode_ref &&
3196 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197 ret = delete_dir_index(root, backref);
3201 list_del(&backref->list);
3206 if (!delete && !backref->found_dir_index &&
3207 backref->found_dir_item && backref->found_inode_ref) {
3208 ret = add_missing_dir_index(root, inode_cache, rec,
3213 if (backref->found_dir_item &&
3214 backref->found_dir_index) {
3215 if (!backref->errors &&
3216 backref->found_inode_ref) {
3217 list_del(&backref->list);
3224 if (!delete && (!backref->found_dir_index &&
3225 !backref->found_dir_item &&
3226 backref->found_inode_ref)) {
3227 struct btrfs_trans_handle *trans;
3228 struct btrfs_key location;
3230 ret = check_dir_conflict(root, backref->name,
3236 * let nlink fixing routine to handle it,
3237 * which can do it better.
3242 location.objectid = rec->ino;
3243 location.type = BTRFS_INODE_ITEM_KEY;
3244 location.offset = 0;
3246 trans = btrfs_start_transaction(root, 1);
3247 if (IS_ERR(trans)) {
3248 ret = PTR_ERR(trans);
3251 fprintf(stderr, "adding missing dir index/item pair "
3253 (unsigned long long)rec->ino);
3254 ret = btrfs_insert_dir_item(trans, root, backref->name,
3256 backref->dir, &location,
3257 imode_to_type(rec->imode),
3260 btrfs_commit_transaction(trans, root);
3264 if (!delete && (backref->found_inode_ref &&
3265 backref->found_dir_index &&
3266 backref->found_dir_item &&
3267 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268 !rec->found_inode_item)) {
3269 ret = create_inode_item(root, rec, 0);
3276 return ret ? ret : repaired;
3280 * To determine the file type for nlink/inode_item repair
3282 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283 * Return -ENOENT if file type is not found.
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3287 struct inode_backref *backref;
3289 /* For inode item recovered case */
3290 if (rec->found_inode_item) {
3291 *type = imode_to_type(rec->imode);
3295 list_for_each_entry(backref, &rec->backrefs, list) {
3296 if (backref->found_dir_index || backref->found_dir_item) {
3297 *type = backref->filetype;
3305 * To determine the file name for nlink repair
3307 * Return 0 if file name is found, set name and namelen.
3308 * Return -ENOENT if file name is not found.
3310 static int find_file_name(struct inode_record *rec,
3311 char *name, int *namelen)
3313 struct inode_backref *backref;
3315 list_for_each_entry(backref, &rec->backrefs, list) {
3316 if (backref->found_dir_index || backref->found_dir_item ||
3317 backref->found_inode_ref) {
3318 memcpy(name, backref->name, backref->namelen);
3319 *namelen = backref->namelen;
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328 struct btrfs_root *root,
3329 struct btrfs_path *path,
3330 struct inode_record *rec)
3332 struct inode_backref *backref;
3333 struct inode_backref *tmp;
3334 struct btrfs_key key;
3335 struct btrfs_inode_item *inode_item;
3338 /* We don't believe this either, reset it and iterate backref */
3339 rec->found_link = 0;
3341 /* Remove all backref including the valid ones */
3342 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344 backref->index, backref->name,
3345 backref->namelen, 0);
3349 /* remove invalid backref, so it won't be added back */
3350 if (!(backref->found_dir_index &&
3351 backref->found_dir_item &&
3352 backref->found_inode_ref)) {
3353 list_del(&backref->list);
3360 /* Set nlink to 0 */
3361 key.objectid = rec->ino;
3362 key.type = BTRFS_INODE_ITEM_KEY;
3364 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3371 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372 struct btrfs_inode_item);
3373 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374 btrfs_mark_buffer_dirty(path->nodes[0]);
3375 btrfs_release_path(path);
3378 * Add back valid inode_ref/dir_item/dir_index,
3379 * add_link() will handle the nlink inc, so new nlink must be correct
3381 list_for_each_entry(backref, &rec->backrefs, list) {
3382 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383 backref->name, backref->namelen,
3384 backref->filetype, &backref->index, 1, 0);
3389 btrfs_release_path(path);
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394 struct btrfs_root *root,
3395 struct btrfs_path *path,
3398 struct btrfs_key key, found_key;
3401 btrfs_init_path(path);
3402 key.objectid = BTRFS_LAST_FREE_OBJECTID;
3404 key.type = BTRFS_INODE_ITEM_KEY;
3405 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3407 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408 path->slots[0] - 1);
3409 *highest_ino = found_key.objectid;
3412 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3414 btrfs_release_path(path);
3419 * Link inode to dir 'lost+found'. Increase @ref_count.
3421 * Returns 0 means success.
3422 * Returns <0 means failure.
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425 struct btrfs_root *root,
3426 struct btrfs_path *path,
3427 u64 ino, char *namebuf, u32 name_len,
3428 u8 filetype, u64 *ref_count)
3430 char *dir_name = "lost+found";
3435 btrfs_release_path(path);
3436 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3441 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3445 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3448 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449 namebuf, name_len, filetype, NULL, 1, 0);
3451 * Add ".INO" suffix several times to handle case where
3452 * "FILENAME.INO" is already taken by another file.
3454 while (ret == -EEXIST) {
3456 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3458 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3462 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3464 name_len += count_digits(ino) + 1;
3465 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466 name_len, filetype, NULL, 1, 0);
3469 error("failed to link the inode %llu to %s dir: %s",
3470 ino, dir_name, strerror(-ret));
3475 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476 name_len, namebuf, dir_name);
3478 btrfs_release_path(path);
3480 error("failed to move file '%.*s' to '%s' dir", name_len,
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486 struct btrfs_root *root,
3487 struct btrfs_path *path,
3488 struct inode_record *rec)
3490 char namebuf[BTRFS_NAME_LEN] = {0};
3493 int name_recovered = 0;
3494 int type_recovered = 0;
3498 * Get file name and type first before these invalid inode ref
3499 * are deleted by remove_all_invalid_backref()
3501 name_recovered = !find_file_name(rec, namebuf, &namelen);
3502 type_recovered = !find_file_type(rec, &type);
3504 if (!name_recovered) {
3505 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506 rec->ino, rec->ino);
3507 namelen = count_digits(rec->ino);
3508 sprintf(namebuf, "%llu", rec->ino);
3511 if (!type_recovered) {
3512 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3514 type = BTRFS_FT_REG_FILE;
3518 ret = reset_nlink(trans, root, path, rec);
3521 "Failed to reset nlink for inode %llu: %s\n",
3522 rec->ino, strerror(-ret));
3526 if (rec->found_link == 0) {
3527 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528 namebuf, namelen, type,
3529 (u64 *)&rec->found_link);
3533 printf("Fixed the nlink of inode %llu\n", rec->ino);
3536 * Clear the flag anyway, or we will loop forever for the same inode
3537 * as it will not be removed from the bad inode list and the dead loop
3540 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541 btrfs_release_path(path);
3546 * Check if there is any normal(reg or prealloc) file extent for given
3548 * This is used to determine the file type when neither its dir_index/item or
3549 * inode_item exists.
3551 * This will *NOT* report error, if any error happens, just consider it does
3552 * not have any normal file extent.
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3556 struct btrfs_path path;
3557 struct btrfs_key key;
3558 struct btrfs_key found_key;
3559 struct btrfs_file_extent_item *fi;
3563 btrfs_init_path(&path);
3565 key.type = BTRFS_EXTENT_DATA_KEY;
3568 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3573 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574 ret = btrfs_next_leaf(root, &path);
3581 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3583 if (found_key.objectid != ino ||
3584 found_key.type != BTRFS_EXTENT_DATA_KEY)
3586 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587 struct btrfs_file_extent_item);
3588 type = btrfs_file_extent_type(path.nodes[0], fi);
3589 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595 btrfs_release_path(&path);
3599 static u32 btrfs_type_to_imode(u8 type)
3601 static u32 imode_by_btrfs_type[] = {
3602 [BTRFS_FT_REG_FILE] = S_IFREG,
3603 [BTRFS_FT_DIR] = S_IFDIR,
3604 [BTRFS_FT_CHRDEV] = S_IFCHR,
3605 [BTRFS_FT_BLKDEV] = S_IFBLK,
3606 [BTRFS_FT_FIFO] = S_IFIFO,
3607 [BTRFS_FT_SOCK] = S_IFSOCK,
3608 [BTRFS_FT_SYMLINK] = S_IFLNK,
3611 return imode_by_btrfs_type[(type)];
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615 struct btrfs_root *root,
3616 struct btrfs_path *path,
3617 struct inode_record *rec)
3621 int type_recovered = 0;
3624 printf("Trying to rebuild inode:%llu\n", rec->ino);
3626 type_recovered = !find_file_type(rec, &filetype);
3629 * Try to determine inode type if type not found.
3631 * For found regular file extent, it must be FILE.
3632 * For found dir_item/index, it must be DIR.
3634 * For undetermined one, use FILE as fallback.
3637 * 1. If found backref(inode_index/item is already handled) to it,
3639 * Need new inode-inode ref structure to allow search for that.
3641 if (!type_recovered) {
3642 if (rec->found_file_extent &&
3643 find_normal_file_extent(root, rec->ino)) {
3645 filetype = BTRFS_FT_REG_FILE;
3646 } else if (rec->found_dir_item) {
3648 filetype = BTRFS_FT_DIR;
3649 } else if (!list_empty(&rec->orphan_extents)) {
3651 filetype = BTRFS_FT_REG_FILE;
3653 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3656 filetype = BTRFS_FT_REG_FILE;
3660 ret = btrfs_new_inode(trans, root, rec->ino,
3661 mode | btrfs_type_to_imode(filetype));
3666 * Here inode rebuild is done, we only rebuild the inode item,
3667 * don't repair the nlink(like move to lost+found).
3668 * That is the job of nlink repair.
3670 * We just fill the record and return
3672 rec->found_dir_item = 1;
3673 rec->imode = mode | btrfs_type_to_imode(filetype);
3675 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676 /* Ensure the inode_nlinks repair function will be called */
3677 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683 struct btrfs_root *root,
3684 struct btrfs_path *path,
3685 struct inode_record *rec)
3687 struct orphan_data_extent *orphan;
3688 struct orphan_data_extent *tmp;
3691 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3693 * Check for conflicting file extents
3695 * Here we don't know whether the extents is compressed or not,
3696 * so we can only assume it not compressed nor data offset,
3697 * and use its disk_len as extent length.
3699 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700 orphan->offset, orphan->disk_len, 0);
3701 btrfs_release_path(path);
3706 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707 orphan->disk_bytenr, orphan->disk_len);
3708 ret = btrfs_free_extent(trans,
3709 root->fs_info->extent_root,
3710 orphan->disk_bytenr, orphan->disk_len,
3711 0, root->objectid, orphan->objectid,
3716 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717 orphan->offset, orphan->disk_bytenr,
3718 orphan->disk_len, orphan->disk_len);
3722 /* Update file size info */
3723 rec->found_size += orphan->disk_len;
3724 if (rec->found_size == rec->nbytes)
3725 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3727 /* Update the file extent hole info too */
3728 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3732 if (RB_EMPTY_ROOT(&rec->holes))
3733 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3735 list_del(&orphan->list);
3738 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744 struct btrfs_root *root,
3745 struct btrfs_path *path,
3746 struct inode_record *rec)
3748 struct rb_node *node;
3749 struct file_extent_hole *hole;
3753 node = rb_first(&rec->holes);
3757 hole = rb_entry(node, struct file_extent_hole, node);
3758 ret = btrfs_punch_hole(trans, root, rec->ino,
3759 hole->start, hole->len);
3762 ret = del_file_extent_hole(&rec->holes, hole->start,
3766 if (RB_EMPTY_ROOT(&rec->holes))
3767 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768 node = rb_first(&rec->holes);
3770 /* special case for a file losing all its file extent */
3772 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773 round_up(rec->isize,
3774 root->fs_info->sectorsize));
3778 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779 rec->ino, root->objectid);
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3786 struct btrfs_trans_handle *trans;
3787 struct btrfs_path path;
3790 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791 I_ERR_NO_ORPHAN_ITEM |
3792 I_ERR_LINK_COUNT_WRONG |
3793 I_ERR_NO_INODE_ITEM |
3794 I_ERR_FILE_EXTENT_ORPHAN |
3795 I_ERR_FILE_EXTENT_DISCOUNT|
3796 I_ERR_FILE_NBYTES_WRONG)))
3800 * For nlink repair, it may create a dir and add link, so
3801 * 2 for parent(256)'s dir_index and dir_item
3802 * 2 for lost+found dir's inode_item and inode_ref
3803 * 1 for the new inode_ref of the file
3804 * 2 for lost+found dir's dir_index and dir_item for the file
3806 trans = btrfs_start_transaction(root, 7);
3808 return PTR_ERR(trans);
3810 btrfs_init_path(&path);
3811 if (rec->errors & I_ERR_NO_INODE_ITEM)
3812 ret = repair_inode_no_item(trans, root, &path, rec);
3813 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818 ret = repair_inode_isize(trans, root, &path, rec);
3819 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822 ret = repair_inode_nlinks(trans, root, &path, rec);
3823 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824 ret = repair_inode_nbytes(trans, root, &path, rec);
3825 btrfs_commit_transaction(trans, root);
3826 btrfs_release_path(&path);
3830 static int check_inode_recs(struct btrfs_root *root,
3831 struct cache_tree *inode_cache)
3833 struct cache_extent *cache;
3834 struct ptr_node *node;
3835 struct inode_record *rec;
3836 struct inode_backref *backref;
3841 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3843 if (btrfs_root_refs(&root->root_item) == 0) {
3844 if (!cache_tree_empty(inode_cache))
3845 fprintf(stderr, "warning line %d\n", __LINE__);
3850 * We need to repair backrefs first because we could change some of the
3851 * errors in the inode recs.
3853 * We also need to go through and delete invalid backrefs first and then
3854 * add the correct ones second. We do this because we may get EEXIST
3855 * when adding back the correct index because we hadn't yet deleted the
3858 * For example, if we were missing a dir index then the directories
3859 * isize would be wrong, so if we fixed the isize to what we thought it
3860 * would be and then fixed the backref we'd still have a invalid fs, so
3861 * we need to add back the dir index and then check to see if the isize
3866 if (stage == 3 && !err)
3869 cache = search_cache_extent(inode_cache, 0);
3870 while (repair && cache) {
3871 node = container_of(cache, struct ptr_node, cache);
3873 cache = next_cache_extent(cache);
3875 /* Need to free everything up and rescan */
3877 remove_cache_extent(inode_cache, &node->cache);
3879 free_inode_rec(rec);
3883 if (list_empty(&rec->backrefs))
3886 ret = repair_inode_backrefs(root, rec, inode_cache,
3900 rec = get_inode_rec(inode_cache, root_dirid, 0);
3901 BUG_ON(IS_ERR(rec));
3903 ret = check_root_dir(rec);
3905 fprintf(stderr, "root %llu root dir %llu error\n",
3906 (unsigned long long)root->root_key.objectid,
3907 (unsigned long long)root_dirid);
3908 print_inode_error(root, rec);
3913 struct btrfs_trans_handle *trans;
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 err = PTR_ERR(trans);
3922 "root %llu missing its root dir, recreating\n",
3923 (unsigned long long)root->objectid);
3925 ret = btrfs_make_root_dir(trans, root, root_dirid);
3928 btrfs_commit_transaction(trans, root);
3932 fprintf(stderr, "root %llu root dir %llu not found\n",
3933 (unsigned long long)root->root_key.objectid,
3934 (unsigned long long)root_dirid);
3938 cache = search_cache_extent(inode_cache, 0);
3941 node = container_of(cache, struct ptr_node, cache);
3943 remove_cache_extent(inode_cache, &node->cache);
3945 if (rec->ino == root_dirid ||
3946 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947 free_inode_rec(rec);
3951 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952 ret = check_orphan_item(root, rec->ino);
3954 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955 if (can_free_inode_rec(rec)) {
3956 free_inode_rec(rec);
3961 if (!rec->found_inode_item)
3962 rec->errors |= I_ERR_NO_INODE_ITEM;
3963 if (rec->found_link != rec->nlink)
3964 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3966 ret = try_repair_inode(root, rec);
3967 if (ret == 0 && can_free_inode_rec(rec)) {
3968 free_inode_rec(rec);
3974 if (!(repair && ret == 0))
3976 print_inode_error(root, rec);
3977 list_for_each_entry(backref, &rec->backrefs, list) {
3978 if (!backref->found_dir_item)
3979 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980 if (!backref->found_dir_index)
3981 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982 if (!backref->found_inode_ref)
3983 backref->errors |= REF_ERR_NO_INODE_REF;
3984 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985 " namelen %u name %s filetype %d errors %x",
3986 (unsigned long long)backref->dir,
3987 (unsigned long long)backref->index,
3988 backref->namelen, backref->name,
3989 backref->filetype, backref->errors);
3990 print_ref_error(backref->errors);
3992 free_inode_rec(rec);
3994 return (error > 0) ? -1 : 0;
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4000 struct cache_extent *cache;
4001 struct root_record *rec = NULL;
4004 cache = lookup_cache_extent(root_cache, objectid, 1);
4006 rec = container_of(cache, struct root_record, cache);
4008 rec = calloc(1, sizeof(*rec));
4010 return ERR_PTR(-ENOMEM);
4011 rec->objectid = objectid;
4012 INIT_LIST_HEAD(&rec->backrefs);
4013 rec->cache.start = objectid;
4014 rec->cache.size = 1;
4016 ret = insert_cache_extent(root_cache, &rec->cache);
4018 return ERR_PTR(-EEXIST);
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024 u64 ref_root, u64 dir, u64 index,
4025 const char *name, int namelen)
4027 struct root_backref *backref;
4029 list_for_each_entry(backref, &rec->backrefs, list) {
4030 if (backref->ref_root != ref_root || backref->dir != dir ||
4031 backref->namelen != namelen)
4033 if (memcmp(name, backref->name, namelen))
4038 backref = calloc(1, sizeof(*backref) + namelen + 1);
4041 backref->ref_root = ref_root;
4043 backref->index = index;
4044 backref->namelen = namelen;
4045 memcpy(backref->name, name, namelen);
4046 backref->name[namelen] = '\0';
4047 list_add_tail(&backref->list, &rec->backrefs);
4051 static void free_root_record(struct cache_extent *cache)
4053 struct root_record *rec;
4054 struct root_backref *backref;
4056 rec = container_of(cache, struct root_record, cache);
4057 while (!list_empty(&rec->backrefs)) {
4058 backref = to_root_backref(rec->backrefs.next);
4059 list_del(&backref->list);
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4068 static int add_root_backref(struct cache_tree *root_cache,
4069 u64 root_id, u64 ref_root, u64 dir, u64 index,
4070 const char *name, int namelen,
4071 int item_type, int errors)
4073 struct root_record *rec;
4074 struct root_backref *backref;
4076 rec = get_root_rec(root_cache, root_id);
4077 BUG_ON(IS_ERR(rec));
4078 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4081 backref->errors |= errors;
4083 if (item_type != BTRFS_DIR_ITEM_KEY) {
4084 if (backref->found_dir_index || backref->found_back_ref ||
4085 backref->found_forward_ref) {
4086 if (backref->index != index)
4087 backref->errors |= REF_ERR_INDEX_UNMATCH;
4089 backref->index = index;
4093 if (item_type == BTRFS_DIR_ITEM_KEY) {
4094 if (backref->found_forward_ref)
4096 backref->found_dir_item = 1;
4097 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098 backref->found_dir_index = 1;
4099 } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100 if (backref->found_forward_ref)
4101 backref->errors |= REF_ERR_DUP_ROOT_REF;
4102 else if (backref->found_dir_item)
4104 backref->found_forward_ref = 1;
4105 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106 if (backref->found_back_ref)
4107 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108 backref->found_back_ref = 1;
4113 if (backref->found_forward_ref && backref->found_dir_item)
4114 backref->reachable = 1;
4118 static int merge_root_recs(struct btrfs_root *root,
4119 struct cache_tree *src_cache,
4120 struct cache_tree *dst_cache)
4122 struct cache_extent *cache;
4123 struct ptr_node *node;
4124 struct inode_record *rec;
4125 struct inode_backref *backref;
4128 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129 free_inode_recs_tree(src_cache);
4134 cache = search_cache_extent(src_cache, 0);
4137 node = container_of(cache, struct ptr_node, cache);
4139 remove_cache_extent(src_cache, &node->cache);
4142 ret = is_child_root(root, root->objectid, rec->ino);
4148 list_for_each_entry(backref, &rec->backrefs, list) {
4149 BUG_ON(backref->found_inode_ref);
4150 if (backref->found_dir_item)
4151 add_root_backref(dst_cache, rec->ino,
4152 root->root_key.objectid, backref->dir,
4153 backref->index, backref->name,
4154 backref->namelen, BTRFS_DIR_ITEM_KEY,
4156 if (backref->found_dir_index)
4157 add_root_backref(dst_cache, rec->ino,
4158 root->root_key.objectid, backref->dir,
4159 backref->index, backref->name,
4160 backref->namelen, BTRFS_DIR_INDEX_KEY,
4164 free_inode_rec(rec);
4171 static int check_root_refs(struct btrfs_root *root,
4172 struct cache_tree *root_cache)
4174 struct root_record *rec;
4175 struct root_record *ref_root;
4176 struct root_backref *backref;
4177 struct cache_extent *cache;
4183 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184 BUG_ON(IS_ERR(rec));
4187 /* fixme: this can not detect circular references */
4190 cache = search_cache_extent(root_cache, 0);
4194 rec = container_of(cache, struct root_record, cache);
4195 cache = next_cache_extent(cache);
4197 if (rec->found_ref == 0)
4200 list_for_each_entry(backref, &rec->backrefs, list) {
4201 if (!backref->reachable)
4204 ref_root = get_root_rec(root_cache,
4206 BUG_ON(IS_ERR(ref_root));
4207 if (ref_root->found_ref > 0)
4210 backref->reachable = 0;
4212 if (rec->found_ref == 0)
4218 cache = search_cache_extent(root_cache, 0);
4222 rec = container_of(cache, struct root_record, cache);
4223 cache = next_cache_extent(cache);
4225 if (rec->found_ref == 0 &&
4226 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228 ret = check_orphan_item(root->fs_info->tree_root,
4234 * If we don't have a root item then we likely just have
4235 * a dir item in a snapshot for this root but no actual
4236 * ref key or anything so it's meaningless.
4238 if (!rec->found_root_item)
4241 fprintf(stderr, "fs tree %llu not referenced\n",
4242 (unsigned long long)rec->objectid);
4246 if (rec->found_ref > 0 && !rec->found_root_item)
4248 list_for_each_entry(backref, &rec->backrefs, list) {
4249 if (!backref->found_dir_item)
4250 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251 if (!backref->found_dir_index)
4252 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253 if (!backref->found_back_ref)
4254 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255 if (!backref->found_forward_ref)
4256 backref->errors |= REF_ERR_NO_ROOT_REF;
4257 if (backref->reachable && backref->errors)
4264 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265 (unsigned long long)rec->objectid, rec->found_ref,
4266 rec->found_root_item ? "" : "not found");
4268 list_for_each_entry(backref, &rec->backrefs, list) {
4269 if (!backref->reachable)
4271 if (!backref->errors && rec->found_root_item)
4273 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274 " index %llu namelen %u name %s errors %x\n",
4275 (unsigned long long)backref->ref_root,
4276 (unsigned long long)backref->dir,
4277 (unsigned long long)backref->index,
4278 backref->namelen, backref->name,
4280 print_ref_error(backref->errors);
4283 return errors > 0 ? 1 : 0;
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287 struct btrfs_key *key,
4288 struct cache_tree *root_cache)
4294 struct btrfs_root_ref *ref;
4295 char namebuf[BTRFS_NAME_LEN];
4298 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4300 dirid = btrfs_root_ref_dirid(eb, ref);
4301 index = btrfs_root_ref_sequence(eb, ref);
4302 name_len = btrfs_root_ref_name_len(eb, ref);
4304 if (name_len <= BTRFS_NAME_LEN) {
4308 len = BTRFS_NAME_LEN;
4309 error = REF_ERR_NAME_TOO_LONG;
4311 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4313 if (key->type == BTRFS_ROOT_REF_KEY) {
4314 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315 index, namebuf, len, key->type, error);
4317 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318 index, namebuf, len, key->type, error);
4323 static void free_corrupt_block(struct cache_extent *cache)
4325 struct btrfs_corrupt_block *corrupt;
4327 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4334 * Repair the btree of the given root.
4336 * The fix is to remove the node key in corrupt_blocks cache_tree.
4337 * and rebalance the tree.
4338 * After the fix, the btree should be writeable.
4340 static int repair_btree(struct btrfs_root *root,
4341 struct cache_tree *corrupt_blocks)
4343 struct btrfs_trans_handle *trans;
4344 struct btrfs_path path;
4345 struct btrfs_corrupt_block *corrupt;
4346 struct cache_extent *cache;
4347 struct btrfs_key key;
4352 if (cache_tree_empty(corrupt_blocks))
4355 trans = btrfs_start_transaction(root, 1);
4356 if (IS_ERR(trans)) {
4357 ret = PTR_ERR(trans);
4358 fprintf(stderr, "Error starting transaction: %s\n",
4362 btrfs_init_path(&path);
4363 cache = first_cache_extent(corrupt_blocks);
4365 corrupt = container_of(cache, struct btrfs_corrupt_block,
4367 level = corrupt->level;
4368 path.lowest_level = level;
4369 key.objectid = corrupt->key.objectid;
4370 key.type = corrupt->key.type;
4371 key.offset = corrupt->key.offset;
4374 * Here we don't want to do any tree balance, since it may
4375 * cause a balance with corrupted brother leaf/node,
4376 * so ins_len set to 0 here.
4377 * Balance will be done after all corrupt node/leaf is deleted.
4379 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4382 offset = btrfs_node_blockptr(path.nodes[level],
4385 /* Remove the ptr */
4386 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4390 * Remove the corresponding extent
4391 * return value is not concerned.
4393 btrfs_release_path(&path);
4394 ret = btrfs_free_extent(trans, root, offset,
4395 root->fs_info->nodesize, 0,
4396 root->root_key.objectid, level - 1, 0);
4397 cache = next_cache_extent(cache);
4400 /* Balance the btree using btrfs_search_slot() */
4401 cache = first_cache_extent(corrupt_blocks);
4403 corrupt = container_of(cache, struct btrfs_corrupt_block,
4405 memcpy(&key, &corrupt->key, sizeof(key));
4406 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4409 /* return will always >0 since it won't find the item */
4411 btrfs_release_path(&path);
4412 cache = next_cache_extent(cache);
4415 btrfs_commit_transaction(trans, root);
4416 btrfs_release_path(&path);
4420 static int check_fs_root(struct btrfs_root *root,
4421 struct cache_tree *root_cache,
4422 struct walk_control *wc)
4428 struct btrfs_path path;
4429 struct shared_node root_node;
4430 struct root_record *rec;
4431 struct btrfs_root_item *root_item = &root->root_item;
4432 struct cache_tree corrupt_blocks;
4433 struct orphan_data_extent *orphan;
4434 struct orphan_data_extent *tmp;
4435 enum btrfs_tree_block_status status;
4436 struct node_refs nrefs;
4439 * Reuse the corrupt_block cache tree to record corrupted tree block
4441 * Unlike the usage in extent tree check, here we do it in a per
4442 * fs/subvol tree base.
4444 cache_tree_init(&corrupt_blocks);
4445 root->fs_info->corrupt_blocks = &corrupt_blocks;
4447 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448 rec = get_root_rec(root_cache, root->root_key.objectid);
4449 BUG_ON(IS_ERR(rec));
4450 if (btrfs_root_refs(root_item) > 0)
4451 rec->found_root_item = 1;
4454 btrfs_init_path(&path);
4455 memset(&root_node, 0, sizeof(root_node));
4456 cache_tree_init(&root_node.root_cache);
4457 cache_tree_init(&root_node.inode_cache);
4458 memset(&nrefs, 0, sizeof(nrefs));
4460 /* Move the orphan extent record to corresponding inode_record */
4461 list_for_each_entry_safe(orphan, tmp,
4462 &root->orphan_data_extents, list) {
4463 struct inode_record *inode;
4465 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4467 BUG_ON(IS_ERR(inode));
4468 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469 list_move(&orphan->list, &inode->orphan_extents);
4472 level = btrfs_header_level(root->node);
4473 memset(wc->nodes, 0, sizeof(wc->nodes));
4474 wc->nodes[level] = &root_node;
4475 wc->active_node = level;
4476 wc->root_level = level;
4478 /* We may not have checked the root block, lets do that now */
4479 if (btrfs_is_leaf(root->node))
4480 status = btrfs_check_leaf(root, NULL, root->node);
4482 status = btrfs_check_node(root, NULL, root->node);
4483 if (status != BTRFS_TREE_BLOCK_CLEAN)
4486 if (btrfs_root_refs(root_item) > 0 ||
4487 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488 path.nodes[level] = root->node;
4489 extent_buffer_get(root->node);
4490 path.slots[level] = 0;
4492 struct btrfs_key key;
4493 struct btrfs_disk_key found_key;
4495 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496 level = root_item->drop_level;
4497 path.lowest_level = level;
4498 if (level > btrfs_header_level(root->node) ||
4499 level >= BTRFS_MAX_LEVEL) {
4500 error("ignoring invalid drop level: %u", level);
4503 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4506 btrfs_node_key(path.nodes[level], &found_key,
4508 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509 sizeof(found_key)));
4513 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519 wret = walk_up_tree(root, &path, wc, &level);
4526 btrfs_release_path(&path);
4528 if (!cache_tree_empty(&corrupt_blocks)) {
4529 struct cache_extent *cache;
4530 struct btrfs_corrupt_block *corrupt;
4532 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533 root->root_key.objectid);
4534 cache = first_cache_extent(&corrupt_blocks);
4536 corrupt = container_of(cache,
4537 struct btrfs_corrupt_block,
4539 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540 cache->start, corrupt->level,
4541 corrupt->key.objectid, corrupt->key.type,
4542 corrupt->key.offset);
4543 cache = next_cache_extent(cache);
4546 printf("Try to repair the btree for root %llu\n",
4547 root->root_key.objectid);
4548 ret = repair_btree(root, &corrupt_blocks);
4550 fprintf(stderr, "Failed to repair btree: %s\n",
4553 printf("Btree for root %llu is fixed\n",
4554 root->root_key.objectid);
4558 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4562 if (root_node.current) {
4563 root_node.current->checked = 1;
4564 maybe_free_inode_rec(&root_node.inode_cache,
4568 err = check_inode_recs(root, &root_node.inode_cache);
4572 free_corrupt_blocks_tree(&corrupt_blocks);
4573 root->fs_info->corrupt_blocks = NULL;
4574 free_orphan_data_extents(&root->orphan_data_extents);
4578 static int fs_root_objectid(u64 objectid)
4580 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4583 return is_fstree(objectid);
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587 struct cache_tree *root_cache)
4589 struct btrfs_path path;
4590 struct btrfs_key key;
4591 struct walk_control wc;
4592 struct extent_buffer *leaf, *tree_node;
4593 struct btrfs_root *tmp_root;
4594 struct btrfs_root *tree_root = fs_info->tree_root;
4598 if (ctx.progress_enabled) {
4599 ctx.tp = TASK_FS_ROOTS;
4600 task_start(ctx.info);
4604 * Just in case we made any changes to the extent tree that weren't
4605 * reflected into the free space cache yet.
4608 reset_cached_block_groups(fs_info);
4609 memset(&wc, 0, sizeof(wc));
4610 cache_tree_init(&wc.shared);
4611 btrfs_init_path(&path);
4616 key.type = BTRFS_ROOT_ITEM_KEY;
4617 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4622 tree_node = tree_root->node;
4624 if (tree_node != tree_root->node) {
4625 free_root_recs_tree(root_cache);
4626 btrfs_release_path(&path);
4629 leaf = path.nodes[0];
4630 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631 ret = btrfs_next_leaf(tree_root, &path);
4637 leaf = path.nodes[0];
4639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641 fs_root_objectid(key.objectid)) {
4642 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643 tmp_root = btrfs_read_fs_root_no_cache(
4646 key.offset = (u64)-1;
4647 tmp_root = btrfs_read_fs_root(
4650 if (IS_ERR(tmp_root)) {
4654 ret = check_fs_root(tmp_root, root_cache, &wc);
4655 if (ret == -EAGAIN) {
4656 free_root_recs_tree(root_cache);
4657 btrfs_release_path(&path);
4662 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663 btrfs_free_fs_root(tmp_root);
4664 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665 key.type == BTRFS_ROOT_BACKREF_KEY) {
4666 process_root_ref(leaf, path.slots[0], &key,
4673 btrfs_release_path(&path);
4675 free_extent_cache_tree(&wc.shared);
4676 if (!cache_tree_empty(&wc.shared))
4677 fprintf(stderr, "warning line %d\n", __LINE__);
4679 task_stop(ctx.info);
4685 * Find the @index according by @ino and name.
4686 * Notice:time efficiency is O(N)
4688 * @root: the root of the fs/file tree
4689 * @index_ret: the index as return value
4690 * @namebuf: the name to match
4691 * @name_len: the length of name to match
4692 * @file_type: the file_type of INODE_ITEM to match
4694 * Returns 0 if found and *@index_ret will be modified with right value
4695 * Returns< 0 not found and *@index_ret will be (u64)-1
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698 u64 *index_ret, char *namebuf, u32 name_len,
4701 struct btrfs_path path;
4702 struct extent_buffer *node;
4703 struct btrfs_dir_item *di;
4704 struct btrfs_key key;
4705 struct btrfs_key location;
4706 char name[BTRFS_NAME_LEN] = {0};
4718 /* search from the last index */
4719 key.objectid = dirid;
4720 key.offset = (u64)-1;
4721 key.type = BTRFS_DIR_INDEX_KEY;
4723 btrfs_init_path(&path);
4724 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4729 ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4732 *index_ret = (64)-1;
4735 /* Check whether inode_id/filetype/name match */
4736 node = path.nodes[0];
4737 slot = path.slots[0];
4738 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739 total = btrfs_item_size_nr(node, slot);
4740 while (cur < total) {
4742 len = btrfs_dir_name_len(node, di);
4743 data_len = btrfs_dir_data_len(node, di);
4745 btrfs_dir_item_key_to_cpu(node, di, &location);
4746 if (location.objectid != location_id ||
4747 location.type != BTRFS_INODE_ITEM_KEY ||
4748 location.offset != 0)
4751 filetype = btrfs_dir_type(node, di);
4752 if (file_type != filetype)
4755 if (len > BTRFS_NAME_LEN)
4756 len = BTRFS_NAME_LEN;
4758 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759 if (len != name_len || strncmp(namebuf, name, len))
4762 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763 *index_ret = key.offset;
4767 len += sizeof(*di) + data_len;
4768 di = (struct btrfs_dir_item *)((char *)di + len);
4774 btrfs_release_path(&path);
4779 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780 * INODE_REF/INODE_EXTREF match.
4782 * @root: the root of the fs/file tree
4783 * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784 * value while find index
4785 * @location_key: location key of the struct btrfs_dir_item to match
4786 * @name: the name to match
4787 * @namelen: the length of name
4788 * @file_type: the type of file to math
4790 * Return 0 if no error occurred.
4791 * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792 * DIR_ITEM/DIR_INDEX
4793 * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794 * and DIR_ITEM/DIR_INDEX mismatch
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797 struct btrfs_key *location_key, char *name,
4798 u32 namelen, u8 file_type)
4800 struct btrfs_path path;
4801 struct extent_buffer *node;
4802 struct btrfs_dir_item *di;
4803 struct btrfs_key location;
4804 char namebuf[BTRFS_NAME_LEN] = {0};
4813 /* get the index by traversing all index */
4814 if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815 ret = find_dir_index(root, key->objectid,
4816 location_key->objectid, &key->offset,
4817 name, namelen, file_type);
4819 ret = DIR_INDEX_MISSING;
4823 btrfs_init_path(&path);
4824 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4826 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4831 /* Check whether inode_id/filetype/name match */
4832 node = path.nodes[0];
4833 slot = path.slots[0];
4834 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835 total = btrfs_item_size_nr(node, slot);
4836 while (cur < total) {
4837 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838 DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4840 len = btrfs_dir_name_len(node, di);
4841 data_len = btrfs_dir_data_len(node, di);
4843 btrfs_dir_item_key_to_cpu(node, di, &location);
4844 if (location.objectid != location_key->objectid ||
4845 location.type != location_key->type ||
4846 location.offset != location_key->offset)
4849 filetype = btrfs_dir_type(node, di);
4850 if (file_type != filetype)
4853 if (len > BTRFS_NAME_LEN) {
4854 len = BTRFS_NAME_LEN;
4855 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4857 key->type == BTRFS_DIR_ITEM_KEY ?
4858 "DIR_ITEM" : "DIR_INDEX",
4859 key->objectid, key->offset, len);
4861 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4863 if (len != namelen || strncmp(namebuf, name, len))
4869 len += sizeof(*di) + data_len;
4870 di = (struct btrfs_dir_item *)((char *)di + len);
4875 btrfs_release_path(&path);
4880 * Prints inode ref error message
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883 u64 index, const char *namebuf, int name_len,
4884 u8 filetype, int err)
4889 /* root dir error */
4890 if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4892 "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893 root->objectid, key->objectid, key->offset, namebuf);
4898 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900 root->objectid, key->offset,
4901 btrfs_name_hash(namebuf, name_len),
4902 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4904 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906 root->objectid, key->offset, index,
4907 err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4912 * Insert the missing inode item.
4914 * Returns 0 means success.
4915 * Returns <0 means error.
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4920 struct btrfs_key key;
4921 struct btrfs_trans_handle *trans;
4922 struct btrfs_path path;
4926 key.type = BTRFS_INODE_ITEM_KEY;
4929 btrfs_init_path(&path);
4930 trans = btrfs_start_transaction(root, 1);
4931 if (IS_ERR(trans)) {
4936 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937 if (ret < 0 || !ret)
4940 /* insert inode item */
4941 create_inode_item_lowmem(trans, root, ino, filetype);
4944 btrfs_commit_transaction(trans, root);
4947 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948 root->objectid, ino);
4949 btrfs_release_path(&path);
4954 * The ternary means dir item, dir index and relative inode ref.
4955 * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956 * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4958 * If two of three is missing or mismatched, delete the existing one.
4959 * If one of three is missing or mismatched, add the missing one.
4961 * returns 0 means success.
4962 * returns not 0 means on error;
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965 u64 index, char *name, int name_len, u8 filetype,
4968 struct btrfs_trans_handle *trans;
4973 * stage shall be one of following valild values:
4974 * 0: Fine, nothing to do.
4975 * 1: One of three is wrong, so add missing one.
4976 * 2: Two of three is wrong, so delete existed one.
4978 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4980 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4982 if (err & (INODE_REF_MISSING))
4985 /* stage must be smllarer than 3 */
4988 trans = btrfs_start_transaction(root, 1);
4990 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4995 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996 filetype, &index, 1, 1);
5000 btrfs_commit_transaction(trans, root);
5003 error("fail to repair inode %llu name %s filetype %u",
5004 ino, name, filetype);
5006 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007 stage == 2 ? "Delete" : "Add",
5008 ino, name, filetype);
5014 * Traverse the given INODE_REF and call find_dir_item() to find related
5015 * DIR_ITEM/DIR_INDEX.
5017 * @root: the root of the fs/file tree
5018 * @ref_key: the key of the INODE_REF
5019 * @path the path provides node and slot
5020 * @refs: the count of INODE_REF
5021 * @mode: the st_mode of INODE_ITEM
5022 * @name_ret: returns with the first ref's name
5023 * @name_len_ret: len of the name_ret
5025 * Return 0 if no error occurred.
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028 struct btrfs_path *path, char *name_ret,
5029 u32 *namelen_ret, u64 *refs_ret, int mode)
5031 struct btrfs_key key;
5032 struct btrfs_key location;
5033 struct btrfs_inode_ref *ref;
5034 struct extent_buffer *node;
5035 char namebuf[BTRFS_NAME_LEN] = {0};
5045 int need_research = 0;
5053 /* since after repair, path and the dir item may be changed */
5054 if (need_research) {
5056 btrfs_release_path(path);
5057 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058 /* the item was deleted, let path point to the last checked item */
5060 if (path->slots[0] == 0)
5061 btrfs_prev_leaf(root, path);
5069 location.objectid = ref_key->objectid;
5070 location.type = BTRFS_INODE_ITEM_KEY;
5071 location.offset = 0;
5072 node = path->nodes[0];
5073 slot = path->slots[0];
5075 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077 total = btrfs_item_size_nr(node, slot);
5080 /* Update inode ref count */
5083 index = btrfs_inode_ref_index(node, ref);
5084 name_len = btrfs_inode_ref_name_len(node, ref);
5086 if (name_len <= BTRFS_NAME_LEN) {
5089 len = BTRFS_NAME_LEN;
5090 warning("root %llu INODE_REF[%llu %llu] name too long",
5091 root->objectid, ref_key->objectid, ref_key->offset);
5094 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5096 /* copy the first name found to name_ret */
5097 if (refs == 1 && name_ret) {
5098 memcpy(name_ret, namebuf, len);
5102 /* Check root dir ref */
5103 if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104 if (index != 0 || len != strlen("..") ||
5105 strncmp("..", namebuf, len) ||
5106 ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107 /* set err bits then repair will delete the ref */
5108 err |= DIR_INDEX_MISSING;
5109 err |= DIR_ITEM_MISSING;
5114 /* Find related DIR_INDEX */
5115 key.objectid = ref_key->offset;
5116 key.type = BTRFS_DIR_INDEX_KEY;
5118 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119 imode_to_type(mode));
5121 /* Find related dir_item */
5122 key.objectid = ref_key->offset;
5123 key.type = BTRFS_DIR_ITEM_KEY;
5124 key.offset = btrfs_name_hash(namebuf, len);
5125 tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126 imode_to_type(mode));
5128 if (tmp_err && repair) {
5129 ret = repair_ternary_lowmem(root, ref_key->offset,
5130 ref_key->objectid, index, namebuf,
5131 name_len, imode_to_type(mode),
5138 print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139 imode_to_type(mode), tmp_err);
5141 len = sizeof(*ref) + name_len;
5142 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5153 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154 * DIR_ITEM/DIR_INDEX.
5156 * @root: the root of the fs/file tree
5157 * @ref_key: the key of the INODE_EXTREF
5158 * @refs: the count of INODE_EXTREF
5159 * @mode: the st_mode of INODE_ITEM
5161 * Return 0 if no error occurred.
5163 static int check_inode_extref(struct btrfs_root *root,
5164 struct btrfs_key *ref_key,
5165 struct extent_buffer *node, int slot, u64 *refs,
5168 struct btrfs_key key;
5169 struct btrfs_key location;
5170 struct btrfs_inode_extref *extref;
5171 char namebuf[BTRFS_NAME_LEN] = {0};
5181 location.objectid = ref_key->objectid;
5182 location.type = BTRFS_INODE_ITEM_KEY;
5183 location.offset = 0;
5185 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186 total = btrfs_item_size_nr(node, slot);
5189 /* update inode ref count */
5191 name_len = btrfs_inode_extref_name_len(node, extref);
5192 index = btrfs_inode_extref_index(node, extref);
5193 parent = btrfs_inode_extref_parent(node, extref);
5194 if (name_len <= BTRFS_NAME_LEN) {
5197 len = BTRFS_NAME_LEN;
5198 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199 root->objectid, ref_key->objectid, ref_key->offset);
5201 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5203 /* Check root dir ref name */
5204 if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206 root->objectid, ref_key->objectid, ref_key->offset,
5208 err |= ROOT_DIR_ERROR;
5211 /* find related dir_index */
5212 key.objectid = parent;
5213 key.type = BTRFS_DIR_INDEX_KEY;
5215 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5218 /* find related dir_item */
5219 key.objectid = parent;
5220 key.type = BTRFS_DIR_ITEM_KEY;
5221 key.offset = btrfs_name_hash(namebuf, len);
5222 ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5225 len = sizeof(*extref) + name_len;
5226 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5236 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237 * DIR_ITEM/DIR_INDEX match.
5238 * Return with @index_ret.
5240 * @root: the root of the fs/file tree
5241 * @key: the key of the INODE_REF/INODE_EXTREF
5242 * @name: the name in the INODE_REF/INODE_EXTREF
5243 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
5244 * @index_ret: the index in the INODE_REF/INODE_EXTREF,
5245 * value (64)-1 means do not check index
5246 * @ext_ref: the EXTENDED_IREF feature
5248 * Return 0 if no error occurred.
5249 * Return >0 for error bitmap
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252 char *name, int namelen, u64 *index_ret,
5253 unsigned int ext_ref)
5255 struct btrfs_path path;
5256 struct btrfs_inode_ref *ref;
5257 struct btrfs_inode_extref *extref;
5258 struct extent_buffer *node;
5259 char ref_namebuf[BTRFS_NAME_LEN] = {0};
5272 btrfs_init_path(&path);
5273 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5275 ret = INODE_REF_MISSING;
5279 node = path.nodes[0];
5280 slot = path.slots[0];
5282 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283 total = btrfs_item_size_nr(node, slot);
5285 /* Iterate all entry of INODE_REF */
5286 while (cur < total) {
5287 ret = INODE_REF_MISSING;
5289 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290 ref_index = btrfs_inode_ref_index(node, ref);
5291 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5294 if (cur + sizeof(*ref) + ref_namelen > total ||
5295 ref_namelen > BTRFS_NAME_LEN) {
5296 warning("root %llu INODE %s[%llu %llu] name too long",
5298 key->type == BTRFS_INODE_REF_KEY ?
5300 key->objectid, key->offset);
5302 if (cur + sizeof(*ref) > total)
5304 len = min_t(u32, total - cur - sizeof(*ref),
5310 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5313 if (len != namelen || strncmp(ref_namebuf, name, len))
5316 *index_ret = ref_index;
5320 len = sizeof(*ref) + ref_namelen;
5321 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5326 /* Skip if not support EXTENDED_IREF feature */
5330 btrfs_release_path(&path);
5331 btrfs_init_path(&path);
5333 dir_id = key->offset;
5334 key->type = BTRFS_INODE_EXTREF_KEY;
5335 key->offset = btrfs_extref_hash(dir_id, name, namelen);
5337 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5339 ret = INODE_REF_MISSING;
5343 node = path.nodes[0];
5344 slot = path.slots[0];
5346 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5348 total = btrfs_item_size_nr(node, slot);
5350 /* Iterate all entry of INODE_EXTREF */
5351 while (cur < total) {
5352 ret = INODE_REF_MISSING;
5354 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355 ref_index = btrfs_inode_extref_index(node, extref);
5356 parent = btrfs_inode_extref_parent(node, extref);
5357 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5360 if (parent != dir_id)
5363 if (ref_namelen <= BTRFS_NAME_LEN) {
5366 len = BTRFS_NAME_LEN;
5367 warning("root %llu INODE %s[%llu %llu] name too long",
5369 key->type == BTRFS_INODE_REF_KEY ?
5371 key->objectid, key->offset);
5373 read_extent_buffer(node, ref_namebuf,
5374 (unsigned long)(extref + 1), len);
5376 if (len != namelen || strncmp(ref_namebuf, name, len))
5379 *index_ret = ref_index;
5384 len = sizeof(*extref) + ref_namelen;
5385 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5390 btrfs_release_path(&path);
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395 u64 ino, u64 index, const char *namebuf,
5396 int name_len, u8 filetype, int err)
5398 if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400 root->objectid, key->objectid, key->offset, namebuf,
5402 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5405 if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407 root->objectid, key->objectid, index, namebuf, filetype,
5408 err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5411 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5413 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414 root->objectid, ino, index, namebuf, filetype,
5415 err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5418 if (err & INODE_REF_MISSING)
5420 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421 root->objectid, ino, key->objectid, namebuf, filetype);
5426 * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5428 * Returns error after repair
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431 u64 index, u8 filetype, char *namebuf, u32 name_len,
5436 if (err & INODE_ITEM_MISSING) {
5437 ret = repair_inode_item_missing(root, ino, filetype);
5439 err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5442 if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444 name_len, filetype, err);
5446 err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447 err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448 err &= ~(INODE_REF_MISSING);
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5457 struct btrfs_key key;
5458 struct btrfs_path path;
5460 struct btrfs_dir_item *di;
5470 key.offset = (u64)-1;
5472 btrfs_init_path(&path);
5473 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5478 /* if found, go to spacial case */
5483 ret = btrfs_previous_item(root, &path, ino, type);
5491 di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5493 total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5495 while (cur < total) {
5496 len = btrfs_dir_name_len(path.nodes[0], di);
5497 if (len > BTRFS_NAME_LEN)
5498 len = BTRFS_NAME_LEN;
5501 len += btrfs_dir_data_len(path.nodes[0], di);
5503 di = (struct btrfs_dir_item *)((char *)di + len);
5509 btrfs_release_path(&path);
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5520 ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5524 ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5528 *size = item_size + index_size;
5532 error("failed to count root %llu INODE[%llu] root size",
5533 root->objectid, ino);
5538 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5541 * @root: the root of the fs/file tree
5542 * @key: the key of the INODE_REF/INODE_EXTREF
5544 * @size: the st_size of the INODE_ITEM
5545 * @ext_ref: the EXTENDED_IREF feature
5547 * Return 0 if no error occurred.
5548 * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551 struct btrfs_path *path, u64 *size,
5552 unsigned int ext_ref)
5554 struct btrfs_dir_item *di;
5555 struct btrfs_inode_item *ii;
5556 struct btrfs_key key;
5557 struct btrfs_key location;
5558 struct extent_buffer *node;
5560 char namebuf[BTRFS_NAME_LEN] = {0};
5572 int need_research = 0;
5575 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576 * ignore index check.
5578 if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579 index = di_key->offset;
5586 /* since after repair, path and the dir item may be changed */
5587 if (need_research) {
5589 err |= DIR_COUNT_AGAIN;
5590 btrfs_release_path(path);
5591 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592 /* the item was deleted, let path point the last checked item */
5594 if (path->slots[0] == 0)
5595 btrfs_prev_leaf(root, path);
5603 node = path->nodes[0];
5604 slot = path->slots[0];
5606 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607 total = btrfs_item_size_nr(node, slot);
5608 memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5610 while (cur < total) {
5611 data_len = btrfs_dir_data_len(node, di);
5614 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5616 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617 di_key->objectid, di_key->offset, data_len);
5619 name_len = btrfs_dir_name_len(node, di);
5620 if (name_len <= BTRFS_NAME_LEN) {
5623 len = BTRFS_NAME_LEN;
5624 warning("root %llu %s[%llu %llu] name too long",
5626 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627 di_key->objectid, di_key->offset);
5629 (*size) += name_len;
5630 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5632 filetype = btrfs_dir_type(node, di);
5634 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635 di_key->offset != btrfs_name_hash(namebuf, len)) {
5637 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638 root->objectid, di_key->objectid, di_key->offset,
5639 namebuf, len, filetype, di_key->offset,
5640 btrfs_name_hash(namebuf, len));
5643 btrfs_dir_item_key_to_cpu(node, di, &location);
5644 /* Ignore related ROOT_ITEM check */
5645 if (location.type == BTRFS_ROOT_ITEM_KEY)
5648 btrfs_release_path(path);
5649 /* Check relative INODE_ITEM(existence/filetype) */
5650 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5652 tmp_err |= INODE_ITEM_MISSING;
5656 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657 struct btrfs_inode_item);
5658 mode = btrfs_inode_mode(path->nodes[0], ii);
5659 if (imode_to_type(mode) != filetype) {
5660 tmp_err |= INODE_ITEM_MISMATCH;
5664 /* Check relative INODE_REF/INODE_EXTREF */
5665 key.objectid = location.objectid;
5666 key.type = BTRFS_INODE_REF_KEY;
5667 key.offset = di_key->objectid;
5668 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5671 /* check relative INDEX/ITEM */
5672 key.objectid = di_key->objectid;
5673 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674 key.type = BTRFS_DIR_INDEX_KEY;
5677 key.type = BTRFS_DIR_ITEM_KEY;
5678 key.offset = btrfs_name_hash(namebuf, name_len);
5681 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682 name_len, filetype);
5683 /* find_dir_item may find index */
5684 if (key.type == BTRFS_DIR_INDEX_KEY)
5688 if (tmp_err && repair) {
5689 ret = repair_dir_item(root, di_key->objectid,
5690 location.objectid, index,
5691 imode_to_type(mode), namebuf,
5693 if (ret != tmp_err) {
5698 btrfs_release_path(path);
5699 print_dir_item_err(root, di_key, location.objectid, index,
5700 namebuf, name_len, filetype, tmp_err);
5702 len = sizeof(*di) + name_len + data_len;
5703 di = (struct btrfs_dir_item *)((char *)di + len);
5706 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708 root->objectid, di_key->objectid,
5715 btrfs_release_path(path);
5716 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5718 err |= ret > 0 ? -ENOENT : ret;
5723 * Wrapper function of btrfs_punch_hole.
5725 * Returns 0 means success.
5726 * Returns not 0 means error.
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5731 struct btrfs_trans_handle *trans;
5734 trans = btrfs_start_transaction(root, 1);
5736 return PTR_ERR(trans);
5738 ret = btrfs_punch_hole(trans, root, ino, start, len);
5740 error("failed to add hole [%llu, %llu] in inode [%llu]",
5743 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5746 btrfs_commit_transaction(trans, root);
5751 * Check file extent datasum/hole, update the size of the file extents,
5752 * check and update the last offset of the file extent.
5754 * @root: the root of fs/file tree.
5755 * @fkey: the key of the file extent.
5756 * @nodatasum: INODE_NODATASUM feature.
5757 * @size: the sum of all EXTENT_DATA items size for this inode.
5758 * @end: the offset of the last extent.
5760 * Return 0 if no error occurred.
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763 struct extent_buffer *node, int slot,
5764 unsigned int nodatasum, u64 *size, u64 *end)
5766 struct btrfs_file_extent_item *fi;
5769 u64 extent_num_bytes;
5771 u64 csum_found; /* In byte size, sectorsize aligned */
5772 u64 search_start; /* Logical range start we search for csum */
5773 u64 search_len; /* Logical range len we search for csum */
5774 unsigned int extent_type;
5775 unsigned int is_hole;
5780 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5782 /* Check inline extent */
5783 extent_type = btrfs_file_extent_type(node, fi);
5784 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785 struct btrfs_item *e = btrfs_item_nr(slot);
5786 u32 item_inline_len;
5788 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790 compressed = btrfs_file_extent_compression(node, fi);
5791 if (extent_num_bytes == 0) {
5793 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794 root->objectid, fkey->objectid, fkey->offset);
5795 err |= FILE_EXTENT_ERROR;
5797 if (!compressed && extent_num_bytes != item_inline_len) {
5799 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800 root->objectid, fkey->objectid, fkey->offset,
5801 extent_num_bytes, item_inline_len);
5802 err |= FILE_EXTENT_ERROR;
5804 *end += extent_num_bytes;
5805 *size += extent_num_bytes;
5809 /* Check extent type */
5810 if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812 err |= FILE_EXTENT_ERROR;
5813 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814 root->objectid, fkey->objectid, fkey->offset);
5818 /* Check REG_EXTENT/PREALLOC_EXTENT */
5819 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822 extent_offset = btrfs_file_extent_offset(node, fi);
5823 compressed = btrfs_file_extent_compression(node, fi);
5824 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5827 * Check EXTENT_DATA csum
5829 * For plain (uncompressed) extent, we should only check the range
5830 * we're referring to, as it's possible that part of prealloc extent
5831 * has been written, and has csum:
5833 * |<--- Original large preallocated extent A ---->|
5834 * |<- Prealloc File Extent ->|<- Regular Extent ->|
5837 * For compressed extent, we should check the whole range.
5840 search_start = disk_bytenr + extent_offset;
5841 search_len = extent_num_bytes;
5843 search_start = disk_bytenr;
5844 search_len = disk_num_bytes;
5846 ret = count_csum_range(root, search_start, search_len, &csum_found);
5847 if (csum_found > 0 && nodatasum) {
5848 err |= ODD_CSUM_ITEM;
5849 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850 root->objectid, fkey->objectid, fkey->offset);
5851 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852 !is_hole && (ret < 0 || csum_found < search_len)) {
5853 err |= CSUM_ITEM_MISSING;
5854 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855 root->objectid, fkey->objectid, fkey->offset,
5856 csum_found, search_len);
5857 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858 err |= ODD_CSUM_ITEM;
5859 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860 root->objectid, fkey->objectid, fkey->offset, csum_found);
5863 /* Check EXTENT_DATA hole */
5864 if (!no_holes && *end != fkey->offset) {
5866 ret = punch_extent_hole(root, fkey->objectid,
5867 *end, fkey->offset - *end);
5868 if (!repair || ret) {
5869 err |= FILE_EXTENT_ERROR;
5870 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5871 root->objectid, fkey->objectid, fkey->offset);
5875 *end += extent_num_bytes;
5877 *size += extent_num_bytes;
5883 * Set inode item nbytes to @nbytes
5885 * Returns 0 on success
5886 * Returns != 0 on error
5888 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5889 struct btrfs_path *path,
5890 u64 ino, u64 nbytes)
5892 struct btrfs_trans_handle *trans;
5893 struct btrfs_inode_item *ii;
5894 struct btrfs_key key;
5895 struct btrfs_key research_key;
5899 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5902 key.type = BTRFS_INODE_ITEM_KEY;
5905 trans = btrfs_start_transaction(root, 1);
5906 if (IS_ERR(trans)) {
5907 ret = PTR_ERR(trans);
5912 btrfs_release_path(path);
5913 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5921 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5922 struct btrfs_inode_item);
5923 btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5924 btrfs_mark_buffer_dirty(path->nodes[0]);
5926 btrfs_commit_transaction(trans, root);
5929 error("failed to set nbytes in inode %llu root %llu",
5930 ino, root->root_key.objectid);
5932 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5933 root->root_key.objectid, nbytes);
5936 btrfs_release_path(path);
5937 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5944 * Set directory inode isize to @isize.
5946 * Returns 0 on success.
5947 * Returns != 0 on error.
5949 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5950 struct btrfs_path *path,
5953 struct btrfs_trans_handle *trans;
5954 struct btrfs_inode_item *ii;
5955 struct btrfs_key key;
5956 struct btrfs_key research_key;
5960 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5963 key.type = BTRFS_INODE_ITEM_KEY;
5966 trans = btrfs_start_transaction(root, 1);
5967 if (IS_ERR(trans)) {
5968 ret = PTR_ERR(trans);
5973 btrfs_release_path(path);
5974 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5982 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5983 struct btrfs_inode_item);
5984 btrfs_set_inode_size(path->nodes[0], ii, isize);
5985 btrfs_mark_buffer_dirty(path->nodes[0]);
5987 btrfs_commit_transaction(trans, root);
5990 error("failed to set isize in inode %llu root %llu",
5991 ino, root->root_key.objectid);
5993 printf("Set isize in inode %llu root %llu to %llu\n",
5994 ino, root->root_key.objectid, isize);
5996 btrfs_release_path(path);
5997 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6004 * Wrapper function for btrfs_add_orphan_item().
6006 * Returns 0 on success.
6007 * Returns != 0 on error.
6009 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6010 struct btrfs_path *path, u64 ino)
6012 struct btrfs_trans_handle *trans;
6013 struct btrfs_key research_key;
6017 btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6019 trans = btrfs_start_transaction(root, 1);
6020 if (IS_ERR(trans)) {
6021 ret = PTR_ERR(trans);
6026 btrfs_release_path(path);
6027 ret = btrfs_add_orphan_item(trans, root, path, ino);
6029 btrfs_commit_transaction(trans, root);
6032 error("failed to add inode %llu as orphan item root %llu",
6033 ino, root->root_key.objectid);
6035 printf("Added inode %llu as orphan item root %llu\n",
6036 ino, root->root_key.objectid);
6038 btrfs_release_path(path);
6039 ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6045 /* Set inode_item nlink to @ref_count.
6046 * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6048 * Returns 0 on success
6050 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6051 struct btrfs_path *path, u64 ino,
6052 const char *name, u32 namelen,
6053 u64 ref_count, u8 filetype, u64 *nlink)
6055 struct btrfs_trans_handle *trans;
6056 struct btrfs_inode_item *ii;
6057 struct btrfs_key key;
6058 struct btrfs_key old_key;
6059 char namebuf[BTRFS_NAME_LEN] = {0};
6065 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6067 if (name && namelen) {
6068 ASSERT(namelen <= BTRFS_NAME_LEN);
6069 memcpy(namebuf, name, namelen);
6072 sprintf(namebuf, "%llu", ino);
6073 name_len = count_digits(ino);
6074 printf("Can't find file name for inode %llu, use %s instead\n",
6078 trans = btrfs_start_transaction(root, 1);
6079 if (IS_ERR(trans)) {
6080 ret = PTR_ERR(trans);
6084 btrfs_release_path(path);
6085 /* if refs is 0, put it into lostfound */
6086 if (ref_count == 0) {
6087 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6088 name_len, filetype, &ref_count);
6093 /* reset inode_item's nlink to ref_count */
6095 key.type = BTRFS_INODE_ITEM_KEY;
6098 btrfs_release_path(path);
6099 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6105 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6106 struct btrfs_inode_item);
6107 btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6108 btrfs_mark_buffer_dirty(path->nodes[0]);
6113 btrfs_commit_transaction(trans, root);
6117 "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6118 root->objectid, ino, namebuf, filetype);
6120 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6121 root->objectid, ino, namebuf, filetype);
6124 btrfs_release_path(path);
6125 ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6132 * Check INODE_ITEM and related ITEMs (the same inode number)
6133 * 1. check link count
6134 * 2. check inode ref/extref
6135 * 3. check dir item/index
6137 * @ext_ref: the EXTENDED_IREF feature
6139 * Return 0 if no error occurred.
6140 * Return >0 for error or hit the traversal is done(by error bitmap)
6142 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6143 unsigned int ext_ref)
6145 struct extent_buffer *node;
6146 struct btrfs_inode_item *ii;
6147 struct btrfs_key key;
6148 struct btrfs_key last_key;
6157 u64 extent_size = 0;
6159 unsigned int nodatasum;
6163 char namebuf[BTRFS_NAME_LEN] = {0};
6166 node = path->nodes[0];
6167 slot = path->slots[0];
6169 btrfs_item_key_to_cpu(node, &key, slot);
6170 inode_id = key.objectid;
6172 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6173 ret = btrfs_next_item(root, path);
6179 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6180 isize = btrfs_inode_size(node, ii);
6181 nbytes = btrfs_inode_nbytes(node, ii);
6182 mode = btrfs_inode_mode(node, ii);
6183 dir = imode_to_type(mode) == BTRFS_FT_DIR;
6184 nlink = btrfs_inode_nlink(node, ii);
6185 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6188 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6189 ret = btrfs_next_item(root, path);
6191 /* out will fill 'err' rusing current statistics */
6193 } else if (ret > 0) {
6198 node = path->nodes[0];
6199 slot = path->slots[0];
6200 btrfs_item_key_to_cpu(node, &key, slot);
6201 if (key.objectid != inode_id)
6205 case BTRFS_INODE_REF_KEY:
6206 ret = check_inode_ref(root, &key, path, namebuf,
6207 &name_len, &refs, mode);
6210 case BTRFS_INODE_EXTREF_KEY:
6211 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6212 warning("root %llu EXTREF[%llu %llu] isn't supported",
6213 root->objectid, key.objectid,
6215 ret = check_inode_extref(root, &key, node, slot, &refs,
6219 case BTRFS_DIR_ITEM_KEY:
6220 case BTRFS_DIR_INDEX_KEY:
6222 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6223 root->objectid, inode_id,
6224 imode_to_type(mode), key.objectid,
6227 ret = check_dir_item(root, &key, path, &size, ext_ref);
6230 case BTRFS_EXTENT_DATA_KEY:
6232 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6233 root->objectid, inode_id, key.objectid,
6236 ret = check_file_extent(root, &key, node, slot,
6237 nodatasum, &extent_size,
6241 case BTRFS_XATTR_ITEM_KEY:
6244 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6245 key.objectid, key.type, key.offset);
6250 if (err & LAST_ITEM) {
6251 btrfs_release_path(path);
6252 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6257 /* verify INODE_ITEM nlink/isize/nbytes */
6259 if (repair && (err & DIR_COUNT_AGAIN)) {
6260 err &= ~DIR_COUNT_AGAIN;
6261 count_dir_isize(root, inode_id, &size);
6264 if ((nlink != 1 || refs != 1) && repair) {
6265 ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6266 namebuf, name_len, refs, imode_to_type(mode),
6271 err |= LINK_COUNT_ERROR;
6272 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6273 root->objectid, inode_id, nlink);
6277 * Just a warning, as dir inode nbytes is just an
6278 * instructive value.
6280 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6281 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6282 root->objectid, inode_id,
6283 root->fs_info->nodesize);
6286 if (isize != size) {
6288 ret = repair_dir_isize_lowmem(root, path,
6290 if (!repair || ret) {
6293 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6294 root->objectid, inode_id, isize, size);
6298 if (nlink != refs) {
6300 ret = repair_inode_nlinks_lowmem(root, path,
6301 inode_id, namebuf, name_len, refs,
6302 imode_to_type(mode), &nlink);
6303 if (!repair || ret) {
6304 err |= LINK_COUNT_ERROR;
6306 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6307 root->objectid, inode_id, nlink, refs);
6309 } else if (!nlink) {
6311 ret = repair_inode_orphan_item_lowmem(root,
6313 if (!repair || ret) {
6315 error("root %llu INODE[%llu] is orphan item",
6316 root->objectid, inode_id);
6320 if (!nbytes && !no_holes && extent_end < isize) {
6322 ret = punch_extent_hole(root, inode_id,
6323 extent_end, isize - extent_end);
6324 if (!repair || ret) {
6325 err |= NBYTES_ERROR;
6327 "root %llu INODE[%llu] size %llu should have a file extent hole",
6328 root->objectid, inode_id, isize);
6332 if (nbytes != extent_size) {
6334 ret = repair_inode_nbytes_lowmem(root, path,
6335 inode_id, extent_size);
6336 if (!repair || ret) {
6337 err |= NBYTES_ERROR;
6339 "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6340 root->objectid, inode_id, nbytes,
6346 if (err & LAST_ITEM)
6347 btrfs_next_item(root, path);
6352 * Insert the missing inode item and inode ref.
6354 * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6355 * Root dir should be handled specially because root dir is the root of fs.
6357 * returns err (>0 or 0) after repair
6359 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6361 struct btrfs_trans_handle *trans;
6362 struct btrfs_key key;
6363 struct btrfs_path path;
6364 int filetype = BTRFS_FT_DIR;
6367 btrfs_init_path(&path);
6369 if (err & INODE_REF_MISSING) {
6370 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6371 key.type = BTRFS_INODE_REF_KEY;
6372 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6374 trans = btrfs_start_transaction(root, 1);
6375 if (IS_ERR(trans)) {
6376 ret = PTR_ERR(trans);
6380 btrfs_release_path(&path);
6381 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6385 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6386 BTRFS_FIRST_FREE_OBJECTID,
6387 BTRFS_FIRST_FREE_OBJECTID, 0);
6391 printf("Add INODE_REF[%llu %llu] name %s\n",
6392 BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6394 err &= ~INODE_REF_MISSING;
6397 error("fail to insert first inode's ref");
6398 btrfs_commit_transaction(trans, root);
6401 if (err & INODE_ITEM_MISSING) {
6402 ret = repair_inode_item_missing(root,
6403 BTRFS_FIRST_FREE_OBJECTID, filetype);
6406 err &= ~INODE_ITEM_MISSING;
6410 error("fail to repair first inode");
6411 btrfs_release_path(&path);
6416 * check first root dir's inode_item and inode_ref
6418 * returns 0 means no error
6419 * returns >0 means error
6420 * returns <0 means fatal error
6422 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6424 struct btrfs_path path;
6425 struct btrfs_key key;
6426 struct btrfs_inode_item *ii;
6432 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6433 key.type = BTRFS_INODE_ITEM_KEY;
6436 /* For root being dropped, we don't need to check first inode */
6437 if (btrfs_root_refs(&root->root_item) == 0 &&
6438 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6439 BTRFS_FIRST_FREE_OBJECTID)
6442 btrfs_init_path(&path);
6443 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6448 err |= INODE_ITEM_MISSING;
6450 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6451 struct btrfs_inode_item);
6452 mode = btrfs_inode_mode(path.nodes[0], ii);
6453 if (imode_to_type(mode) != BTRFS_FT_DIR)
6454 err |= INODE_ITEM_MISMATCH;
6457 /* lookup first inode ref */
6458 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6459 key.type = BTRFS_INODE_REF_KEY;
6460 /* special index value */
6463 ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6469 btrfs_release_path(&path);
6472 err = repair_fs_first_inode(root, err);
6474 if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6475 error("root dir INODE_ITEM is %s",
6476 err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6477 if (err & INODE_REF_MISSING)
6478 error("root dir INODE_REF is missing");
6480 return ret < 0 ? ret : err;
6483 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6484 u64 parent, u64 root)
6486 struct rb_node *node;
6487 struct tree_backref *back = NULL;
6488 struct tree_backref match = {
6495 match.parent = parent;
6496 match.node.full_backref = 1;
6501 node = rb_search(&rec->backref_tree, &match.node.node,
6502 (rb_compare_keys)compare_extent_backref, NULL);
6504 back = to_tree_backref(rb_node_to_extent_backref(node));
6509 static struct data_backref *find_data_backref(struct extent_record *rec,
6510 u64 parent, u64 root,
6511 u64 owner, u64 offset,
6513 u64 disk_bytenr, u64 bytes)
6515 struct rb_node *node;
6516 struct data_backref *back = NULL;
6517 struct data_backref match = {
6524 .found_ref = found_ref,
6525 .disk_bytenr = disk_bytenr,
6529 match.parent = parent;
6530 match.node.full_backref = 1;
6535 node = rb_search(&rec->backref_tree, &match.node.node,
6536 (rb_compare_keys)compare_extent_backref, NULL);
6538 back = to_data_backref(rb_node_to_extent_backref(node));
6543 * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6544 * blocks and integrity of fs tree items.
6546 * @root: the root of the tree to be checked.
6547 * @ext_ref feature EXTENDED_IREF is enable or not.
6548 * @account if NOT 0 means check the tree (including tree)'s treeblocks.
6549 * otherwise means check fs tree(s) items relationship and
6550 * @root MUST be a fs tree root.
6551 * Returns 0 represents OK.
6552 * Returns not 0 represents error.
6554 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6555 struct btrfs_root *root, unsigned int ext_ref,
6559 struct btrfs_path path;
6560 struct node_refs nrefs;
6561 struct btrfs_root_item *root_item = &root->root_item;
6566 memset(&nrefs, 0, sizeof(nrefs));
6569 * We need to manually check the first inode item (256)
6570 * As the following traversal function will only start from
6571 * the first inode item in the leaf, if inode item (256) is
6572 * missing we will skip it forever.
6574 ret = check_fs_first_inode(root, ext_ref);
6580 level = btrfs_header_level(root->node);
6581 btrfs_init_path(&path);
6583 if (btrfs_root_refs(root_item) > 0 ||
6584 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6585 path.nodes[level] = root->node;
6586 path.slots[level] = 0;
6587 extent_buffer_get(root->node);
6589 struct btrfs_key key;
6591 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6592 level = root_item->drop_level;
6593 path.lowest_level = level;
6594 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6601 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6602 ext_ref, check_all);
6606 /* if ret is negative, walk shall stop */
6612 ret = walk_up_tree_v2(root, &path, &level);
6614 /* Normal exit, reset ret to err */
6621 btrfs_release_path(&path);
6625 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6628 * Iterate all items in the tree and call check_inode_item() to check.
6630 * @root: the root of the tree to be checked.
6631 * @ext_ref: the EXTENDED_IREF feature
6633 * Return 0 if no error found.
6634 * Return <0 for error.
6636 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6638 reset_cached_block_groups(root->fs_info);
6639 return check_btrfs_root(NULL, root, ext_ref, 0);
6643 * Find the relative ref for root_ref and root_backref.
6645 * @root: the root of the root tree.
6646 * @ref_key: the key of the root ref.
6648 * Return 0 if no error occurred.
6650 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6651 struct extent_buffer *node, int slot)
6653 struct btrfs_path path;
6654 struct btrfs_key key;
6655 struct btrfs_root_ref *ref;
6656 struct btrfs_root_ref *backref;
6657 char ref_name[BTRFS_NAME_LEN] = {0};
6658 char backref_name[BTRFS_NAME_LEN] = {0};
6664 u32 backref_namelen;
6669 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6670 ref_dirid = btrfs_root_ref_dirid(node, ref);
6671 ref_seq = btrfs_root_ref_sequence(node, ref);
6672 ref_namelen = btrfs_root_ref_name_len(node, ref);
6674 if (ref_namelen <= BTRFS_NAME_LEN) {
6677 len = BTRFS_NAME_LEN;
6678 warning("%s[%llu %llu] ref_name too long",
6679 ref_key->type == BTRFS_ROOT_REF_KEY ?
6680 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6683 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6685 /* Find relative root_ref */
6686 key.objectid = ref_key->offset;
6687 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6688 key.offset = ref_key->objectid;
6690 btrfs_init_path(&path);
6691 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6693 err |= ROOT_REF_MISSING;
6694 error("%s[%llu %llu] couldn't find relative ref",
6695 ref_key->type == BTRFS_ROOT_REF_KEY ?
6696 "ROOT_REF" : "ROOT_BACKREF",
6697 ref_key->objectid, ref_key->offset);
6701 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6702 struct btrfs_root_ref);
6703 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6704 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6705 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6707 if (backref_namelen <= BTRFS_NAME_LEN) {
6708 len = backref_namelen;
6710 len = BTRFS_NAME_LEN;
6711 warning("%s[%llu %llu] ref_name too long",
6712 key.type == BTRFS_ROOT_REF_KEY ?
6713 "ROOT_REF" : "ROOT_BACKREF",
6714 key.objectid, key.offset);
6716 read_extent_buffer(path.nodes[0], backref_name,
6717 (unsigned long)(backref + 1), len);
6719 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6720 ref_namelen != backref_namelen ||
6721 strncmp(ref_name, backref_name, len)) {
6722 err |= ROOT_REF_MISMATCH;
6723 error("%s[%llu %llu] mismatch relative ref",
6724 ref_key->type == BTRFS_ROOT_REF_KEY ?
6725 "ROOT_REF" : "ROOT_BACKREF",
6726 ref_key->objectid, ref_key->offset);
6729 btrfs_release_path(&path);
6734 * Check all fs/file tree in low_memory mode.
6736 * 1. for fs tree root item, call check_fs_root_v2()
6737 * 2. for fs tree root ref/backref, call check_root_ref()
6739 * Return 0 if no error occurred.
6741 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6743 struct btrfs_root *tree_root = fs_info->tree_root;
6744 struct btrfs_root *cur_root = NULL;
6745 struct btrfs_path path;
6746 struct btrfs_key key;
6747 struct extent_buffer *node;
6748 unsigned int ext_ref;
6753 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6755 btrfs_init_path(&path);
6756 key.objectid = BTRFS_FS_TREE_OBJECTID;
6758 key.type = BTRFS_ROOT_ITEM_KEY;
6760 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6764 } else if (ret > 0) {
6770 node = path.nodes[0];
6771 slot = path.slots[0];
6772 btrfs_item_key_to_cpu(node, &key, slot);
6773 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6775 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6776 fs_root_objectid(key.objectid)) {
6777 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6778 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6781 key.offset = (u64)-1;
6782 cur_root = btrfs_read_fs_root(fs_info, &key);
6785 if (IS_ERR(cur_root)) {
6786 error("Fail to read fs/subvol tree: %lld",
6792 ret = check_fs_root_v2(cur_root, ext_ref);
6795 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6796 btrfs_free_fs_root(cur_root);
6797 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6798 key.type == BTRFS_ROOT_BACKREF_KEY) {
6799 ret = check_root_ref(tree_root, &key, node, slot);
6803 ret = btrfs_next_item(tree_root, &path);
6813 btrfs_release_path(&path);
6817 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6818 struct cache_tree *root_cache)
6822 if (!ctx.progress_enabled)
6823 fprintf(stderr, "checking fs roots\n");
6824 if (check_mode == CHECK_MODE_LOWMEM)
6825 ret = check_fs_roots_v2(fs_info);
6827 ret = check_fs_roots(fs_info, root_cache);
6832 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6834 struct extent_backref *back, *tmp;
6835 struct tree_backref *tback;
6836 struct data_backref *dback;
6840 rbtree_postorder_for_each_entry_safe(back, tmp,
6841 &rec->backref_tree, node) {
6842 if (!back->found_extent_tree) {
6846 if (back->is_data) {
6847 dback = to_data_backref(back);
6848 fprintf(stderr, "Data backref %llu %s %llu"
6849 " owner %llu offset %llu num_refs %lu"
6850 " not found in extent tree\n",
6851 (unsigned long long)rec->start,
6852 back->full_backref ?
6854 back->full_backref ?
6855 (unsigned long long)dback->parent:
6856 (unsigned long long)dback->root,
6857 (unsigned long long)dback->owner,
6858 (unsigned long long)dback->offset,
6859 (unsigned long)dback->num_refs);
6861 tback = to_tree_backref(back);
6862 fprintf(stderr, "Tree backref %llu parent %llu"
6863 " root %llu not found in extent tree\n",
6864 (unsigned long long)rec->start,
6865 (unsigned long long)tback->parent,
6866 (unsigned long long)tback->root);
6869 if (!back->is_data && !back->found_ref) {
6873 tback = to_tree_backref(back);
6874 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6875 (unsigned long long)rec->start,
6876 back->full_backref ? "parent" : "root",
6877 back->full_backref ?
6878 (unsigned long long)tback->parent :
6879 (unsigned long long)tback->root, back);
6881 if (back->is_data) {
6882 dback = to_data_backref(back);
6883 if (dback->found_ref != dback->num_refs) {
6887 fprintf(stderr, "Incorrect local backref count"
6888 " on %llu %s %llu owner %llu"
6889 " offset %llu found %u wanted %u back %p\n",
6890 (unsigned long long)rec->start,
6891 back->full_backref ?
6893 back->full_backref ?
6894 (unsigned long long)dback->parent:
6895 (unsigned long long)dback->root,
6896 (unsigned long long)dback->owner,
6897 (unsigned long long)dback->offset,
6898 dback->found_ref, dback->num_refs, back);
6900 if (dback->disk_bytenr != rec->start) {
6904 fprintf(stderr, "Backref disk bytenr does not"
6905 " match extent record, bytenr=%llu, "
6906 "ref bytenr=%llu\n",
6907 (unsigned long long)rec->start,
6908 (unsigned long long)dback->disk_bytenr);
6911 if (dback->bytes != rec->nr) {
6915 fprintf(stderr, "Backref bytes do not match "
6916 "extent backref, bytenr=%llu, ref "
6917 "bytes=%llu, backref bytes=%llu\n",
6918 (unsigned long long)rec->start,
6919 (unsigned long long)rec->nr,
6920 (unsigned long long)dback->bytes);
6923 if (!back->is_data) {
6926 dback = to_data_backref(back);
6927 found += dback->found_ref;
6930 if (found != rec->refs) {
6934 fprintf(stderr, "Incorrect global backref count "
6935 "on %llu found %llu wanted %llu\n",
6936 (unsigned long long)rec->start,
6937 (unsigned long long)found,
6938 (unsigned long long)rec->refs);
6944 static void __free_one_backref(struct rb_node *node)
6946 struct extent_backref *back = rb_node_to_extent_backref(node);
6951 static void free_all_extent_backrefs(struct extent_record *rec)
6953 rb_free_nodes(&rec->backref_tree, __free_one_backref);
6956 static void free_extent_record_cache(struct cache_tree *extent_cache)
6958 struct cache_extent *cache;
6959 struct extent_record *rec;
6962 cache = first_cache_extent(extent_cache);
6965 rec = container_of(cache, struct extent_record, cache);
6966 remove_cache_extent(extent_cache, cache);
6967 free_all_extent_backrefs(rec);
6972 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6973 struct extent_record *rec)
6975 if (rec->content_checked && rec->owner_ref_checked &&
6976 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6977 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6978 !rec->bad_full_backref && !rec->crossing_stripes &&
6979 !rec->wrong_chunk_type) {
6980 remove_cache_extent(extent_cache, &rec->cache);
6981 free_all_extent_backrefs(rec);
6982 list_del_init(&rec->list);
6988 static int check_owner_ref(struct btrfs_root *root,
6989 struct extent_record *rec,
6990 struct extent_buffer *buf)
6992 struct extent_backref *node, *tmp;
6993 struct tree_backref *back;
6994 struct btrfs_root *ref_root;
6995 struct btrfs_key key;
6996 struct btrfs_path path;
6997 struct extent_buffer *parent;
7002 rbtree_postorder_for_each_entry_safe(node, tmp,
7003 &rec->backref_tree, node) {
7006 if (!node->found_ref)
7008 if (node->full_backref)
7010 back = to_tree_backref(node);
7011 if (btrfs_header_owner(buf) == back->root)
7014 BUG_ON(rec->is_root);
7016 /* try to find the block by search corresponding fs tree */
7017 key.objectid = btrfs_header_owner(buf);
7018 key.type = BTRFS_ROOT_ITEM_KEY;
7019 key.offset = (u64)-1;
7021 ref_root = btrfs_read_fs_root(root->fs_info, &key);
7022 if (IS_ERR(ref_root))
7025 level = btrfs_header_level(buf);
7027 btrfs_item_key_to_cpu(buf, &key, 0);
7029 btrfs_node_key_to_cpu(buf, &key, 0);
7031 btrfs_init_path(&path);
7032 path.lowest_level = level + 1;
7033 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7037 parent = path.nodes[level + 1];
7038 if (parent && buf->start == btrfs_node_blockptr(parent,
7039 path.slots[level + 1]))
7042 btrfs_release_path(&path);
7043 return found ? 0 : 1;
7046 static int is_extent_tree_record(struct extent_record *rec)
7048 struct extent_backref *node, *tmp;
7049 struct tree_backref *back;
7052 rbtree_postorder_for_each_entry_safe(node, tmp,
7053 &rec->backref_tree, node) {
7056 back = to_tree_backref(node);
7057 if (node->full_backref)
7059 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7066 static int record_bad_block_io(struct btrfs_fs_info *info,
7067 struct cache_tree *extent_cache,
7070 struct extent_record *rec;
7071 struct cache_extent *cache;
7072 struct btrfs_key key;
7074 cache = lookup_cache_extent(extent_cache, start, len);
7078 rec = container_of(cache, struct extent_record, cache);
7079 if (!is_extent_tree_record(rec))
7082 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7083 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7086 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7087 struct extent_buffer *buf, int slot)
7089 if (btrfs_header_level(buf)) {
7090 struct btrfs_key_ptr ptr1, ptr2;
7092 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7093 sizeof(struct btrfs_key_ptr));
7094 read_extent_buffer(buf, &ptr2,
7095 btrfs_node_key_ptr_offset(slot + 1),
7096 sizeof(struct btrfs_key_ptr));
7097 write_extent_buffer(buf, &ptr1,
7098 btrfs_node_key_ptr_offset(slot + 1),
7099 sizeof(struct btrfs_key_ptr));
7100 write_extent_buffer(buf, &ptr2,
7101 btrfs_node_key_ptr_offset(slot),
7102 sizeof(struct btrfs_key_ptr));
7104 struct btrfs_disk_key key;
7105 btrfs_node_key(buf, &key, 0);
7106 btrfs_fixup_low_keys(root, path, &key,
7107 btrfs_header_level(buf) + 1);
7110 struct btrfs_item *item1, *item2;
7111 struct btrfs_key k1, k2;
7112 char *item1_data, *item2_data;
7113 u32 item1_offset, item2_offset, item1_size, item2_size;
7115 item1 = btrfs_item_nr(slot);
7116 item2 = btrfs_item_nr(slot + 1);
7117 btrfs_item_key_to_cpu(buf, &k1, slot);
7118 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7119 item1_offset = btrfs_item_offset(buf, item1);
7120 item2_offset = btrfs_item_offset(buf, item2);
7121 item1_size = btrfs_item_size(buf, item1);
7122 item2_size = btrfs_item_size(buf, item2);
7124 item1_data = malloc(item1_size);
7127 item2_data = malloc(item2_size);
7133 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7134 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7136 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7137 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7141 btrfs_set_item_offset(buf, item1, item2_offset);
7142 btrfs_set_item_offset(buf, item2, item1_offset);
7143 btrfs_set_item_size(buf, item1, item2_size);
7144 btrfs_set_item_size(buf, item2, item1_size);
7146 path->slots[0] = slot;
7147 btrfs_set_item_key_unsafe(root, path, &k2);
7148 path->slots[0] = slot + 1;
7149 btrfs_set_item_key_unsafe(root, path, &k1);
7154 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7156 struct extent_buffer *buf;
7157 struct btrfs_key k1, k2;
7159 int level = path->lowest_level;
7162 buf = path->nodes[level];
7163 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7165 btrfs_node_key_to_cpu(buf, &k1, i);
7166 btrfs_node_key_to_cpu(buf, &k2, i + 1);
7168 btrfs_item_key_to_cpu(buf, &k1, i);
7169 btrfs_item_key_to_cpu(buf, &k2, i + 1);
7171 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7173 ret = swap_values(root, path, buf, i);
7176 btrfs_mark_buffer_dirty(buf);
7182 static int delete_bogus_item(struct btrfs_root *root,
7183 struct btrfs_path *path,
7184 struct extent_buffer *buf, int slot)
7186 struct btrfs_key key;
7187 int nritems = btrfs_header_nritems(buf);
7189 btrfs_item_key_to_cpu(buf, &key, slot);
7191 /* These are all the keys we can deal with missing. */
7192 if (key.type != BTRFS_DIR_INDEX_KEY &&
7193 key.type != BTRFS_EXTENT_ITEM_KEY &&
7194 key.type != BTRFS_METADATA_ITEM_KEY &&
7195 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7196 key.type != BTRFS_EXTENT_DATA_REF_KEY)
7199 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7200 (unsigned long long)key.objectid, key.type,
7201 (unsigned long long)key.offset, slot, buf->start);
7202 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7203 btrfs_item_nr_offset(slot + 1),
7204 sizeof(struct btrfs_item) *
7205 (nritems - slot - 1));
7206 btrfs_set_header_nritems(buf, nritems - 1);
7208 struct btrfs_disk_key disk_key;
7210 btrfs_item_key(buf, &disk_key, 0);
7211 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7213 btrfs_mark_buffer_dirty(buf);
7217 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7219 struct extent_buffer *buf;
7223 /* We should only get this for leaves */
7224 BUG_ON(path->lowest_level);
7225 buf = path->nodes[0];
7227 for (i = 0; i < btrfs_header_nritems(buf); i++) {
7228 unsigned int shift = 0, offset;
7230 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7231 BTRFS_LEAF_DATA_SIZE(root)) {
7232 if (btrfs_item_end_nr(buf, i) >
7233 BTRFS_LEAF_DATA_SIZE(root)) {
7234 ret = delete_bogus_item(root, path, buf, i);
7237 fprintf(stderr, "item is off the end of the "
7238 "leaf, can't fix\n");
7242 shift = BTRFS_LEAF_DATA_SIZE(root) -
7243 btrfs_item_end_nr(buf, i);
7244 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7245 btrfs_item_offset_nr(buf, i - 1)) {
7246 if (btrfs_item_end_nr(buf, i) >
7247 btrfs_item_offset_nr(buf, i - 1)) {
7248 ret = delete_bogus_item(root, path, buf, i);
7251 fprintf(stderr, "items overlap, can't fix\n");
7255 shift = btrfs_item_offset_nr(buf, i - 1) -
7256 btrfs_item_end_nr(buf, i);
7261 printf("Shifting item nr %d by %u bytes in block %llu\n",
7262 i, shift, (unsigned long long)buf->start);
7263 offset = btrfs_item_offset_nr(buf, i);
7264 memmove_extent_buffer(buf,
7265 btrfs_leaf_data(buf) + offset + shift,
7266 btrfs_leaf_data(buf) + offset,
7267 btrfs_item_size_nr(buf, i));
7268 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7270 btrfs_mark_buffer_dirty(buf);
7274 * We may have moved things, in which case we want to exit so we don't
7275 * write those changes out. Once we have proper abort functionality in
7276 * progs this can be changed to something nicer.
7283 * Attempt to fix basic block failures. If we can't fix it for whatever reason
7284 * then just return -EIO.
7286 static int try_to_fix_bad_block(struct btrfs_root *root,
7287 struct extent_buffer *buf,
7288 enum btrfs_tree_block_status status)
7290 struct btrfs_trans_handle *trans;
7291 struct ulist *roots;
7292 struct ulist_node *node;
7293 struct btrfs_root *search_root;
7294 struct btrfs_path path;
7295 struct ulist_iterator iter;
7296 struct btrfs_key root_key, key;
7299 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7300 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7303 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7307 btrfs_init_path(&path);
7308 ULIST_ITER_INIT(&iter);
7309 while ((node = ulist_next(roots, &iter))) {
7310 root_key.objectid = node->val;
7311 root_key.type = BTRFS_ROOT_ITEM_KEY;
7312 root_key.offset = (u64)-1;
7314 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7321 trans = btrfs_start_transaction(search_root, 0);
7322 if (IS_ERR(trans)) {
7323 ret = PTR_ERR(trans);
7327 path.lowest_level = btrfs_header_level(buf);
7328 path.skip_check_block = 1;
7329 if (path.lowest_level)
7330 btrfs_node_key_to_cpu(buf, &key, 0);
7332 btrfs_item_key_to_cpu(buf, &key, 0);
7333 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7336 btrfs_commit_transaction(trans, search_root);
7339 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7340 ret = fix_key_order(search_root, &path);
7341 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7342 ret = fix_item_offset(search_root, &path);
7344 btrfs_commit_transaction(trans, search_root);
7347 btrfs_release_path(&path);
7348 btrfs_commit_transaction(trans, search_root);
7351 btrfs_release_path(&path);
7355 static int check_block(struct btrfs_root *root,
7356 struct cache_tree *extent_cache,
7357 struct extent_buffer *buf, u64 flags)
7359 struct extent_record *rec;
7360 struct cache_extent *cache;
7361 struct btrfs_key key;
7362 enum btrfs_tree_block_status status;
7366 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7369 rec = container_of(cache, struct extent_record, cache);
7370 rec->generation = btrfs_header_generation(buf);
7372 level = btrfs_header_level(buf);
7373 if (btrfs_header_nritems(buf) > 0) {
7376 btrfs_item_key_to_cpu(buf, &key, 0);
7378 btrfs_node_key_to_cpu(buf, &key, 0);
7380 rec->info_objectid = key.objectid;
7382 rec->info_level = level;
7384 if (btrfs_is_leaf(buf))
7385 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7387 status = btrfs_check_node(root, &rec->parent_key, buf);
7389 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7391 status = try_to_fix_bad_block(root, buf, status);
7392 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7394 fprintf(stderr, "bad block %llu\n",
7395 (unsigned long long)buf->start);
7398 * Signal to callers we need to start the scan over
7399 * again since we'll have cowed blocks.
7404 rec->content_checked = 1;
7405 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7406 rec->owner_ref_checked = 1;
7408 ret = check_owner_ref(root, rec, buf);
7410 rec->owner_ref_checked = 1;
7414 maybe_free_extent_rec(extent_cache, rec);
7419 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7420 u64 parent, u64 root)
7422 struct list_head *cur = rec->backrefs.next;
7423 struct extent_backref *node;
7424 struct tree_backref *back;
7426 while(cur != &rec->backrefs) {
7427 node = to_extent_backref(cur);
7431 back = to_tree_backref(node);
7433 if (!node->full_backref)
7435 if (parent == back->parent)
7438 if (node->full_backref)
7440 if (back->root == root)
7448 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7449 u64 parent, u64 root)
7451 struct tree_backref *ref = malloc(sizeof(*ref));
7455 memset(&ref->node, 0, sizeof(ref->node));
7457 ref->parent = parent;
7458 ref->node.full_backref = 1;
7461 ref->node.full_backref = 0;
7468 static struct data_backref *find_data_backref(struct extent_record *rec,
7469 u64 parent, u64 root,
7470 u64 owner, u64 offset,
7472 u64 disk_bytenr, u64 bytes)
7474 struct list_head *cur = rec->backrefs.next;
7475 struct extent_backref *node;
7476 struct data_backref *back;
7478 while(cur != &rec->backrefs) {
7479 node = to_extent_backref(cur);
7483 back = to_data_backref(node);
7485 if (!node->full_backref)
7487 if (parent == back->parent)
7490 if (node->full_backref)
7492 if (back->root == root && back->owner == owner &&
7493 back->offset == offset) {
7494 if (found_ref && node->found_ref &&
7495 (back->bytes != bytes ||
7496 back->disk_bytenr != disk_bytenr))
7506 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7507 u64 parent, u64 root,
7508 u64 owner, u64 offset,
7511 struct data_backref *ref = malloc(sizeof(*ref));
7515 memset(&ref->node, 0, sizeof(ref->node));
7516 ref->node.is_data = 1;
7519 ref->parent = parent;
7522 ref->node.full_backref = 1;
7526 ref->offset = offset;
7527 ref->node.full_backref = 0;
7529 ref->bytes = max_size;
7532 if (max_size > rec->max_size)
7533 rec->max_size = max_size;
7537 /* Check if the type of extent matches with its chunk */
7538 static void check_extent_type(struct extent_record *rec)
7540 struct btrfs_block_group_cache *bg_cache;
7542 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7546 /* data extent, check chunk directly*/
7547 if (!rec->metadata) {
7548 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7549 rec->wrong_chunk_type = 1;
7553 /* metadata extent, check the obvious case first */
7554 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7555 BTRFS_BLOCK_GROUP_METADATA))) {
7556 rec->wrong_chunk_type = 1;
7561 * Check SYSTEM extent, as it's also marked as metadata, we can only
7562 * make sure it's a SYSTEM extent by its backref
7564 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7565 struct extent_backref *node;
7566 struct tree_backref *tback;
7569 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7570 if (node->is_data) {
7571 /* tree block shouldn't have data backref */
7572 rec->wrong_chunk_type = 1;
7575 tback = container_of(node, struct tree_backref, node);
7577 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7578 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7580 bg_type = BTRFS_BLOCK_GROUP_METADATA;
7581 if (!(bg_cache->flags & bg_type))
7582 rec->wrong_chunk_type = 1;
7587 * Allocate a new extent record, fill default values from @tmpl and insert int
7588 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7589 * the cache, otherwise it fails.
7591 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7592 struct extent_record *tmpl)
7594 struct extent_record *rec;
7597 BUG_ON(tmpl->max_size == 0);
7598 rec = malloc(sizeof(*rec));
7601 rec->start = tmpl->start;
7602 rec->max_size = tmpl->max_size;
7603 rec->nr = max(tmpl->nr, tmpl->max_size);
7604 rec->found_rec = tmpl->found_rec;
7605 rec->content_checked = tmpl->content_checked;
7606 rec->owner_ref_checked = tmpl->owner_ref_checked;
7607 rec->num_duplicates = 0;
7608 rec->metadata = tmpl->metadata;
7609 rec->flag_block_full_backref = FLAG_UNSET;
7610 rec->bad_full_backref = 0;
7611 rec->crossing_stripes = 0;
7612 rec->wrong_chunk_type = 0;
7613 rec->is_root = tmpl->is_root;
7614 rec->refs = tmpl->refs;
7615 rec->extent_item_refs = tmpl->extent_item_refs;
7616 rec->parent_generation = tmpl->parent_generation;
7617 INIT_LIST_HEAD(&rec->backrefs);
7618 INIT_LIST_HEAD(&rec->dups);
7619 INIT_LIST_HEAD(&rec->list);
7620 rec->backref_tree = RB_ROOT;
7621 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7622 rec->cache.start = tmpl->start;
7623 rec->cache.size = tmpl->nr;
7624 ret = insert_cache_extent(extent_cache, &rec->cache);
7629 bytes_used += rec->nr;
7632 rec->crossing_stripes = check_crossing_stripes(global_info,
7633 rec->start, global_info->nodesize);
7634 check_extent_type(rec);
7639 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7641 * - refs - if found, increase refs
7642 * - is_root - if found, set
7643 * - content_checked - if found, set
7644 * - owner_ref_checked - if found, set
7646 * If not found, create a new one, initialize and insert.
7648 static int add_extent_rec(struct cache_tree *extent_cache,
7649 struct extent_record *tmpl)
7651 struct extent_record *rec;
7652 struct cache_extent *cache;
7656 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7658 rec = container_of(cache, struct extent_record, cache);
7662 rec->nr = max(tmpl->nr, tmpl->max_size);
7665 * We need to make sure to reset nr to whatever the extent
7666 * record says was the real size, this way we can compare it to
7669 if (tmpl->found_rec) {
7670 if (tmpl->start != rec->start || rec->found_rec) {
7671 struct extent_record *tmp;
7674 if (list_empty(&rec->list))
7675 list_add_tail(&rec->list,
7676 &duplicate_extents);
7679 * We have to do this song and dance in case we
7680 * find an extent record that falls inside of
7681 * our current extent record but does not have
7682 * the same objectid.
7684 tmp = malloc(sizeof(*tmp));
7687 tmp->start = tmpl->start;
7688 tmp->max_size = tmpl->max_size;
7691 tmp->metadata = tmpl->metadata;
7692 tmp->extent_item_refs = tmpl->extent_item_refs;
7693 INIT_LIST_HEAD(&tmp->list);
7694 list_add_tail(&tmp->list, &rec->dups);
7695 rec->num_duplicates++;
7702 if (tmpl->extent_item_refs && !dup) {
7703 if (rec->extent_item_refs) {
7704 fprintf(stderr, "block %llu rec "
7705 "extent_item_refs %llu, passed %llu\n",
7706 (unsigned long long)tmpl->start,
7707 (unsigned long long)
7708 rec->extent_item_refs,
7709 (unsigned long long)tmpl->extent_item_refs);
7711 rec->extent_item_refs = tmpl->extent_item_refs;
7715 if (tmpl->content_checked)
7716 rec->content_checked = 1;
7717 if (tmpl->owner_ref_checked)
7718 rec->owner_ref_checked = 1;
7719 memcpy(&rec->parent_key, &tmpl->parent_key,
7720 sizeof(tmpl->parent_key));
7721 if (tmpl->parent_generation)
7722 rec->parent_generation = tmpl->parent_generation;
7723 if (rec->max_size < tmpl->max_size)
7724 rec->max_size = tmpl->max_size;
7727 * A metadata extent can't cross stripe_len boundary, otherwise
7728 * kernel scrub won't be able to handle it.
7729 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7733 rec->crossing_stripes = check_crossing_stripes(
7734 global_info, rec->start,
7735 global_info->nodesize);
7736 check_extent_type(rec);
7737 maybe_free_extent_rec(extent_cache, rec);
7741 ret = add_extent_rec_nolookup(extent_cache, tmpl);
7746 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7747 u64 parent, u64 root, int found_ref)
7749 struct extent_record *rec;
7750 struct tree_backref *back;
7751 struct cache_extent *cache;
7753 bool insert = false;
7755 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7757 struct extent_record tmpl;
7759 memset(&tmpl, 0, sizeof(tmpl));
7760 tmpl.start = bytenr;
7765 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7769 /* really a bug in cache_extent implement now */
7770 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7775 rec = container_of(cache, struct extent_record, cache);
7776 if (rec->start != bytenr) {
7778 * Several cause, from unaligned bytenr to over lapping extents
7783 back = find_tree_backref(rec, parent, root);
7785 back = alloc_tree_backref(rec, parent, root);
7792 if (back->node.found_ref) {
7793 fprintf(stderr, "Extent back ref already exists "
7794 "for %llu parent %llu root %llu \n",
7795 (unsigned long long)bytenr,
7796 (unsigned long long)parent,
7797 (unsigned long long)root);
7799 back->node.found_ref = 1;
7801 if (back->node.found_extent_tree) {
7802 fprintf(stderr, "Extent back ref already exists "
7803 "for %llu parent %llu root %llu \n",
7804 (unsigned long long)bytenr,
7805 (unsigned long long)parent,
7806 (unsigned long long)root);
7808 back->node.found_extent_tree = 1;
7811 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7812 compare_extent_backref));
7813 check_extent_type(rec);
7814 maybe_free_extent_rec(extent_cache, rec);
7818 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7819 u64 parent, u64 root, u64 owner, u64 offset,
7820 u32 num_refs, int found_ref, u64 max_size)
7822 struct extent_record *rec;
7823 struct data_backref *back;
7824 struct cache_extent *cache;
7826 bool insert = false;
7828 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7830 struct extent_record tmpl;
7832 memset(&tmpl, 0, sizeof(tmpl));
7833 tmpl.start = bytenr;
7835 tmpl.max_size = max_size;
7837 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7841 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7846 rec = container_of(cache, struct extent_record, cache);
7847 if (rec->max_size < max_size)
7848 rec->max_size = max_size;
7851 * If found_ref is set then max_size is the real size and must match the
7852 * existing refs. So if we have already found a ref then we need to
7853 * make sure that this ref matches the existing one, otherwise we need
7854 * to add a new backref so we can notice that the backrefs don't match
7855 * and we need to figure out who is telling the truth. This is to
7856 * account for that awful fsync bug I introduced where we'd end up with
7857 * a btrfs_file_extent_item that would have its length include multiple
7858 * prealloc extents or point inside of a prealloc extent.
7860 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7863 back = alloc_data_backref(rec, parent, root, owner, offset,
7870 BUG_ON(num_refs != 1);
7871 if (back->node.found_ref)
7872 BUG_ON(back->bytes != max_size);
7873 back->node.found_ref = 1;
7874 back->found_ref += 1;
7875 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7876 back->bytes = max_size;
7877 back->disk_bytenr = bytenr;
7879 /* Need to reinsert if not already in the tree */
7881 rb_erase(&back->node.node, &rec->backref_tree);
7886 rec->content_checked = 1;
7887 rec->owner_ref_checked = 1;
7889 if (back->node.found_extent_tree) {
7890 fprintf(stderr, "Extent back ref already exists "
7891 "for %llu parent %llu root %llu "
7892 "owner %llu offset %llu num_refs %lu\n",
7893 (unsigned long long)bytenr,
7894 (unsigned long long)parent,
7895 (unsigned long long)root,
7896 (unsigned long long)owner,
7897 (unsigned long long)offset,
7898 (unsigned long)num_refs);
7900 back->num_refs = num_refs;
7901 back->node.found_extent_tree = 1;
7904 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7905 compare_extent_backref));
7907 maybe_free_extent_rec(extent_cache, rec);
7911 static int add_pending(struct cache_tree *pending,
7912 struct cache_tree *seen, u64 bytenr, u32 size)
7915 ret = add_cache_extent(seen, bytenr, size);
7918 add_cache_extent(pending, bytenr, size);
7922 static int pick_next_pending(struct cache_tree *pending,
7923 struct cache_tree *reada,
7924 struct cache_tree *nodes,
7925 u64 last, struct block_info *bits, int bits_nr,
7928 unsigned long node_start = last;
7929 struct cache_extent *cache;
7932 cache = search_cache_extent(reada, 0);
7934 bits[0].start = cache->start;
7935 bits[0].size = cache->size;
7940 if (node_start > 32768)
7941 node_start -= 32768;
7943 cache = search_cache_extent(nodes, node_start);
7945 cache = search_cache_extent(nodes, 0);
7948 cache = search_cache_extent(pending, 0);
7953 bits[ret].start = cache->start;
7954 bits[ret].size = cache->size;
7955 cache = next_cache_extent(cache);
7957 } while (cache && ret < bits_nr);
7963 bits[ret].start = cache->start;
7964 bits[ret].size = cache->size;
7965 cache = next_cache_extent(cache);
7967 } while (cache && ret < bits_nr);
7969 if (bits_nr - ret > 8) {
7970 u64 lookup = bits[0].start + bits[0].size;
7971 struct cache_extent *next;
7972 next = search_cache_extent(pending, lookup);
7974 if (next->start - lookup > 32768)
7976 bits[ret].start = next->start;
7977 bits[ret].size = next->size;
7978 lookup = next->start + next->size;
7982 next = next_cache_extent(next);
7990 static void free_chunk_record(struct cache_extent *cache)
7992 struct chunk_record *rec;
7994 rec = container_of(cache, struct chunk_record, cache);
7995 list_del_init(&rec->list);
7996 list_del_init(&rec->dextents);
8000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8002 cache_tree_free_extents(chunk_cache, free_chunk_record);
8005 static void free_device_record(struct rb_node *node)
8007 struct device_record *rec;
8009 rec = container_of(node, struct device_record, node);
8013 FREE_RB_BASED_TREE(device_cache, free_device_record);
8015 int insert_block_group_record(struct block_group_tree *tree,
8016 struct block_group_record *bg_rec)
8020 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8024 list_add_tail(&bg_rec->list, &tree->block_groups);
8028 static void free_block_group_record(struct cache_extent *cache)
8030 struct block_group_record *rec;
8032 rec = container_of(cache, struct block_group_record, cache);
8033 list_del_init(&rec->list);
8037 void free_block_group_tree(struct block_group_tree *tree)
8039 cache_tree_free_extents(&tree->tree, free_block_group_record);
8042 int insert_device_extent_record(struct device_extent_tree *tree,
8043 struct device_extent_record *de_rec)
8048 * Device extent is a bit different from the other extents, because
8049 * the extents which belong to the different devices may have the
8050 * same start and size, so we need use the special extent cache
8051 * search/insert functions.
8053 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8057 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8058 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8062 static void free_device_extent_record(struct cache_extent *cache)
8064 struct device_extent_record *rec;
8066 rec = container_of(cache, struct device_extent_record, cache);
8067 if (!list_empty(&rec->chunk_list))
8068 list_del_init(&rec->chunk_list);
8069 if (!list_empty(&rec->device_list))
8070 list_del_init(&rec->device_list);
8074 void free_device_extent_tree(struct device_extent_tree *tree)
8076 cache_tree_free_extents(&tree->tree, free_device_extent_record);
8079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8081 struct extent_buffer *leaf, int slot)
8083 struct btrfs_extent_ref_v0 *ref0;
8084 struct btrfs_key key;
8087 btrfs_item_key_to_cpu(leaf, &key, slot);
8088 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8089 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8090 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8093 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8094 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8101 struct btrfs_key *key,
8104 struct btrfs_chunk *ptr;
8105 struct chunk_record *rec;
8108 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8109 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8111 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8113 fprintf(stderr, "memory allocation failed\n");
8117 INIT_LIST_HEAD(&rec->list);
8118 INIT_LIST_HEAD(&rec->dextents);
8121 rec->cache.start = key->offset;
8122 rec->cache.size = btrfs_chunk_length(leaf, ptr);
8124 rec->generation = btrfs_header_generation(leaf);
8126 rec->objectid = key->objectid;
8127 rec->type = key->type;
8128 rec->offset = key->offset;
8130 rec->length = rec->cache.size;
8131 rec->owner = btrfs_chunk_owner(leaf, ptr);
8132 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8133 rec->type_flags = btrfs_chunk_type(leaf, ptr);
8134 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8135 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8136 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8137 rec->num_stripes = num_stripes;
8138 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8140 for (i = 0; i < rec->num_stripes; ++i) {
8141 rec->stripes[i].devid =
8142 btrfs_stripe_devid_nr(leaf, ptr, i);
8143 rec->stripes[i].offset =
8144 btrfs_stripe_offset_nr(leaf, ptr, i);
8145 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8146 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8153 static int process_chunk_item(struct cache_tree *chunk_cache,
8154 struct btrfs_key *key, struct extent_buffer *eb,
8157 struct chunk_record *rec;
8158 struct btrfs_chunk *chunk;
8161 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8163 * Do extra check for this chunk item,
8165 * It's still possible one can craft a leaf with CHUNK_ITEM, with
8166 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8167 * and owner<->key_type check.
8169 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8172 error("chunk(%llu, %llu) is not valid, ignore it",
8173 key->offset, btrfs_chunk_length(eb, chunk));
8176 rec = btrfs_new_chunk_record(eb, key, slot);
8177 ret = insert_cache_extent(chunk_cache, &rec->cache);
8179 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8180 rec->offset, rec->length);
8187 static int process_device_item(struct rb_root *dev_cache,
8188 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8190 struct btrfs_dev_item *ptr;
8191 struct device_record *rec;
8194 ptr = btrfs_item_ptr(eb,
8195 slot, struct btrfs_dev_item);
8197 rec = malloc(sizeof(*rec));
8199 fprintf(stderr, "memory allocation failed\n");
8203 rec->devid = key->offset;
8204 rec->generation = btrfs_header_generation(eb);
8206 rec->objectid = key->objectid;
8207 rec->type = key->type;
8208 rec->offset = key->offset;
8210 rec->devid = btrfs_device_id(eb, ptr);
8211 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8212 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8214 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8216 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8223 struct block_group_record *
8224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8227 struct btrfs_block_group_item *ptr;
8228 struct block_group_record *rec;
8230 rec = calloc(1, sizeof(*rec));
8232 fprintf(stderr, "memory allocation failed\n");
8236 rec->cache.start = key->objectid;
8237 rec->cache.size = key->offset;
8239 rec->generation = btrfs_header_generation(leaf);
8241 rec->objectid = key->objectid;
8242 rec->type = key->type;
8243 rec->offset = key->offset;
8245 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8246 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8248 INIT_LIST_HEAD(&rec->list);
8253 static int process_block_group_item(struct block_group_tree *block_group_cache,
8254 struct btrfs_key *key,
8255 struct extent_buffer *eb, int slot)
8257 struct block_group_record *rec;
8260 rec = btrfs_new_block_group_record(eb, key, slot);
8261 ret = insert_block_group_record(block_group_cache, rec);
8263 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8264 rec->objectid, rec->offset);
8271 struct device_extent_record *
8272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8273 struct btrfs_key *key, int slot)
8275 struct device_extent_record *rec;
8276 struct btrfs_dev_extent *ptr;
8278 rec = calloc(1, sizeof(*rec));
8280 fprintf(stderr, "memory allocation failed\n");
8284 rec->cache.objectid = key->objectid;
8285 rec->cache.start = key->offset;
8287 rec->generation = btrfs_header_generation(leaf);
8289 rec->objectid = key->objectid;
8290 rec->type = key->type;
8291 rec->offset = key->offset;
8293 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8294 rec->chunk_objecteid =
8295 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8297 btrfs_dev_extent_chunk_offset(leaf, ptr);
8298 rec->length = btrfs_dev_extent_length(leaf, ptr);
8299 rec->cache.size = rec->length;
8301 INIT_LIST_HEAD(&rec->chunk_list);
8302 INIT_LIST_HEAD(&rec->device_list);
8308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8309 struct btrfs_key *key, struct extent_buffer *eb,
8312 struct device_extent_record *rec;
8315 rec = btrfs_new_device_extent_record(eb, key, slot);
8316 ret = insert_device_extent_record(dev_extent_cache, rec);
8319 "Device extent[%llu, %llu, %llu] existed.\n",
8320 rec->objectid, rec->offset, rec->length);
8327 static int process_extent_item(struct btrfs_root *root,
8328 struct cache_tree *extent_cache,
8329 struct extent_buffer *eb, int slot)
8331 struct btrfs_extent_item *ei;
8332 struct btrfs_extent_inline_ref *iref;
8333 struct btrfs_extent_data_ref *dref;
8334 struct btrfs_shared_data_ref *sref;
8335 struct btrfs_key key;
8336 struct extent_record tmpl;
8341 u32 item_size = btrfs_item_size_nr(eb, slot);
8347 btrfs_item_key_to_cpu(eb, &key, slot);
8349 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8351 num_bytes = root->fs_info->nodesize;
8353 num_bytes = key.offset;
8356 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8357 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8358 key.objectid, root->fs_info->sectorsize);
8361 if (item_size < sizeof(*ei)) {
8362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8363 struct btrfs_extent_item_v0 *ei0;
8364 BUG_ON(item_size != sizeof(*ei0));
8365 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8366 refs = btrfs_extent_refs_v0(eb, ei0);
8370 memset(&tmpl, 0, sizeof(tmpl));
8371 tmpl.start = key.objectid;
8372 tmpl.nr = num_bytes;
8373 tmpl.extent_item_refs = refs;
8374 tmpl.metadata = metadata;
8376 tmpl.max_size = num_bytes;
8378 return add_extent_rec(extent_cache, &tmpl);
8381 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8382 refs = btrfs_extent_refs(eb, ei);
8383 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8387 if (metadata && num_bytes != root->fs_info->nodesize) {
8388 error("ignore invalid metadata extent, length %llu does not equal to %u",
8389 num_bytes, root->fs_info->nodesize);
8392 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8393 error("ignore invalid data extent, length %llu is not aligned to %u",
8394 num_bytes, root->fs_info->sectorsize);
8398 memset(&tmpl, 0, sizeof(tmpl));
8399 tmpl.start = key.objectid;
8400 tmpl.nr = num_bytes;
8401 tmpl.extent_item_refs = refs;
8402 tmpl.metadata = metadata;
8404 tmpl.max_size = num_bytes;
8405 add_extent_rec(extent_cache, &tmpl);
8407 ptr = (unsigned long)(ei + 1);
8408 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8409 key.type == BTRFS_EXTENT_ITEM_KEY)
8410 ptr += sizeof(struct btrfs_tree_block_info);
8412 end = (unsigned long)ei + item_size;
8414 iref = (struct btrfs_extent_inline_ref *)ptr;
8415 type = btrfs_extent_inline_ref_type(eb, iref);
8416 offset = btrfs_extent_inline_ref_offset(eb, iref);
8418 case BTRFS_TREE_BLOCK_REF_KEY:
8419 ret = add_tree_backref(extent_cache, key.objectid,
8423 "add_tree_backref failed (extent items tree block): %s",
8426 case BTRFS_SHARED_BLOCK_REF_KEY:
8427 ret = add_tree_backref(extent_cache, key.objectid,
8431 "add_tree_backref failed (extent items shared block): %s",
8434 case BTRFS_EXTENT_DATA_REF_KEY:
8435 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8436 add_data_backref(extent_cache, key.objectid, 0,
8437 btrfs_extent_data_ref_root(eb, dref),
8438 btrfs_extent_data_ref_objectid(eb,
8440 btrfs_extent_data_ref_offset(eb, dref),
8441 btrfs_extent_data_ref_count(eb, dref),
8444 case BTRFS_SHARED_DATA_REF_KEY:
8445 sref = (struct btrfs_shared_data_ref *)(iref + 1);
8446 add_data_backref(extent_cache, key.objectid, offset,
8448 btrfs_shared_data_ref_count(eb, sref),
8452 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8453 key.objectid, key.type, num_bytes);
8456 ptr += btrfs_extent_inline_ref_size(type);
8463 static int check_cache_range(struct btrfs_root *root,
8464 struct btrfs_block_group_cache *cache,
8465 u64 offset, u64 bytes)
8467 struct btrfs_free_space *entry;
8473 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8474 bytenr = btrfs_sb_offset(i);
8475 ret = btrfs_rmap_block(root->fs_info,
8476 cache->key.objectid, bytenr, 0,
8477 &logical, &nr, &stripe_len);
8482 if (logical[nr] + stripe_len <= offset)
8484 if (offset + bytes <= logical[nr])
8486 if (logical[nr] == offset) {
8487 if (stripe_len >= bytes) {
8491 bytes -= stripe_len;
8492 offset += stripe_len;
8493 } else if (logical[nr] < offset) {
8494 if (logical[nr] + stripe_len >=
8499 bytes = (offset + bytes) -
8500 (logical[nr] + stripe_len);
8501 offset = logical[nr] + stripe_len;
8504 * Could be tricky, the super may land in the
8505 * middle of the area we're checking. First
8506 * check the easiest case, it's at the end.
8508 if (logical[nr] + stripe_len >=
8510 bytes = logical[nr] - offset;
8514 /* Check the left side */
8515 ret = check_cache_range(root, cache,
8517 logical[nr] - offset);
8523 /* Now we continue with the right side */
8524 bytes = (offset + bytes) -
8525 (logical[nr] + stripe_len);
8526 offset = logical[nr] + stripe_len;
8533 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8535 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8536 offset, offset+bytes);
8540 if (entry->offset != offset) {
8541 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8546 if (entry->bytes != bytes) {
8547 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8548 bytes, entry->bytes, offset);
8552 unlink_free_space(cache->free_space_ctl, entry);
8557 static int verify_space_cache(struct btrfs_root *root,
8558 struct btrfs_block_group_cache *cache)
8560 struct btrfs_path path;
8561 struct extent_buffer *leaf;
8562 struct btrfs_key key;
8566 root = root->fs_info->extent_root;
8568 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8570 btrfs_init_path(&path);
8571 key.objectid = last;
8573 key.type = BTRFS_EXTENT_ITEM_KEY;
8574 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8579 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8580 ret = btrfs_next_leaf(root, &path);
8588 leaf = path.nodes[0];
8589 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8590 if (key.objectid >= cache->key.offset + cache->key.objectid)
8592 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8593 key.type != BTRFS_METADATA_ITEM_KEY) {
8598 if (last == key.objectid) {
8599 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8600 last = key.objectid + key.offset;
8602 last = key.objectid + root->fs_info->nodesize;
8607 ret = check_cache_range(root, cache, last,
8608 key.objectid - last);
8611 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8612 last = key.objectid + key.offset;
8614 last = key.objectid + root->fs_info->nodesize;
8618 if (last < cache->key.objectid + cache->key.offset)
8619 ret = check_cache_range(root, cache, last,
8620 cache->key.objectid +
8621 cache->key.offset - last);
8624 btrfs_release_path(&path);
8627 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8628 fprintf(stderr, "There are still entries left in the space "
8636 static int check_space_cache(struct btrfs_root *root)
8638 struct btrfs_block_group_cache *cache;
8639 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8643 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8644 btrfs_super_generation(root->fs_info->super_copy) !=
8645 btrfs_super_cache_generation(root->fs_info->super_copy)) {
8646 printf("cache and super generation don't match, space cache "
8647 "will be invalidated\n");
8651 if (ctx.progress_enabled) {
8652 ctx.tp = TASK_FREE_SPACE;
8653 task_start(ctx.info);
8657 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8661 start = cache->key.objectid + cache->key.offset;
8662 if (!cache->free_space_ctl) {
8663 if (btrfs_init_free_space_ctl(cache,
8664 root->fs_info->sectorsize)) {
8669 btrfs_remove_free_space_cache(cache);
8672 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8673 ret = exclude_super_stripes(root, cache);
8675 fprintf(stderr, "could not exclude super stripes: %s\n",
8680 ret = load_free_space_tree(root->fs_info, cache);
8681 free_excluded_extents(root, cache);
8683 fprintf(stderr, "could not load free space tree: %s\n",
8690 ret = load_free_space_cache(root->fs_info, cache);
8695 ret = verify_space_cache(root, cache);
8697 fprintf(stderr, "cache appears valid but isn't %Lu\n",
8698 cache->key.objectid);
8703 task_stop(ctx.info);
8705 return error ? -EINVAL : 0;
8708 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8709 u64 num_bytes, unsigned long leaf_offset,
8710 struct extent_buffer *eb) {
8712 struct btrfs_fs_info *fs_info = root->fs_info;
8714 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8716 unsigned long csum_offset;
8720 u64 data_checked = 0;
8726 if (num_bytes % fs_info->sectorsize)
8729 data = malloc(num_bytes);
8733 while (offset < num_bytes) {
8736 read_len = num_bytes - offset;
8737 /* read as much space once a time */
8738 ret = read_extent_data(fs_info, data + offset,
8739 bytenr + offset, &read_len, mirror);
8743 /* verify every 4k data's checksum */
8744 while (data_checked < read_len) {
8746 tmp = offset + data_checked;
8748 csum = btrfs_csum_data((char *)data + tmp,
8749 csum, fs_info->sectorsize);
8750 btrfs_csum_final(csum, (u8 *)&csum);
8752 csum_offset = leaf_offset +
8753 tmp / fs_info->sectorsize * csum_size;
8754 read_extent_buffer(eb, (char *)&csum_expected,
8755 csum_offset, csum_size);
8756 /* try another mirror */
8757 if (csum != csum_expected) {
8758 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8759 mirror, bytenr + tmp,
8760 csum, csum_expected);
8761 num_copies = btrfs_num_copies(root->fs_info,
8763 if (mirror < num_copies - 1) {
8768 data_checked += fs_info->sectorsize;
8777 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8780 struct btrfs_path path;
8781 struct extent_buffer *leaf;
8782 struct btrfs_key key;
8785 btrfs_init_path(&path);
8786 key.objectid = bytenr;
8787 key.type = BTRFS_EXTENT_ITEM_KEY;
8788 key.offset = (u64)-1;
8791 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8794 fprintf(stderr, "Error looking up extent record %d\n", ret);
8795 btrfs_release_path(&path);
8798 if (path.slots[0] > 0) {
8801 ret = btrfs_prev_leaf(root, &path);
8804 } else if (ret > 0) {
8811 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8814 * Block group items come before extent items if they have the same
8815 * bytenr, so walk back one more just in case. Dear future traveller,
8816 * first congrats on mastering time travel. Now if it's not too much
8817 * trouble could you go back to 2006 and tell Chris to make the
8818 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8819 * EXTENT_ITEM_KEY please?
8821 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8822 if (path.slots[0] > 0) {
8825 ret = btrfs_prev_leaf(root, &path);
8828 } else if (ret > 0) {
8833 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8837 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8838 ret = btrfs_next_leaf(root, &path);
8840 fprintf(stderr, "Error going to next leaf "
8842 btrfs_release_path(&path);
8848 leaf = path.nodes[0];
8849 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8850 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8854 if (key.objectid + key.offset < bytenr) {
8858 if (key.objectid > bytenr + num_bytes)
8861 if (key.objectid == bytenr) {
8862 if (key.offset >= num_bytes) {
8866 num_bytes -= key.offset;
8867 bytenr += key.offset;
8868 } else if (key.objectid < bytenr) {
8869 if (key.objectid + key.offset >= bytenr + num_bytes) {
8873 num_bytes = (bytenr + num_bytes) -
8874 (key.objectid + key.offset);
8875 bytenr = key.objectid + key.offset;
8877 if (key.objectid + key.offset < bytenr + num_bytes) {
8878 u64 new_start = key.objectid + key.offset;
8879 u64 new_bytes = bytenr + num_bytes - new_start;
8882 * Weird case, the extent is in the middle of
8883 * our range, we'll have to search one side
8884 * and then the other. Not sure if this happens
8885 * in real life, but no harm in coding it up
8886 * anyway just in case.
8888 btrfs_release_path(&path);
8889 ret = check_extent_exists(root, new_start,
8892 fprintf(stderr, "Right section didn't "
8896 num_bytes = key.objectid - bytenr;
8899 num_bytes = key.objectid - bytenr;
8906 if (num_bytes && !ret) {
8907 fprintf(stderr, "There are no extents for csum range "
8908 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8912 btrfs_release_path(&path);
8916 static int check_csums(struct btrfs_root *root)
8918 struct btrfs_path path;
8919 struct extent_buffer *leaf;
8920 struct btrfs_key key;
8921 u64 offset = 0, num_bytes = 0;
8922 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8926 unsigned long leaf_offset;
8928 root = root->fs_info->csum_root;
8929 if (!extent_buffer_uptodate(root->node)) {
8930 fprintf(stderr, "No valid csum tree found\n");
8934 btrfs_init_path(&path);
8935 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8936 key.type = BTRFS_EXTENT_CSUM_KEY;
8938 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8940 fprintf(stderr, "Error searching csum tree %d\n", ret);
8941 btrfs_release_path(&path);
8945 if (ret > 0 && path.slots[0])
8950 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8951 ret = btrfs_next_leaf(root, &path);
8953 fprintf(stderr, "Error going to next leaf "
8960 leaf = path.nodes[0];
8962 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8963 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8968 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8969 csum_size) * root->fs_info->sectorsize;
8970 if (!check_data_csum)
8971 goto skip_csum_check;
8972 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8973 ret = check_extent_csums(root, key.offset, data_len,
8979 offset = key.offset;
8980 } else if (key.offset != offset + num_bytes) {
8981 ret = check_extent_exists(root, offset, num_bytes);
8983 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8984 "there is no extent record\n",
8985 offset, offset+num_bytes);
8988 offset = key.offset;
8991 num_bytes += data_len;
8995 btrfs_release_path(&path);
8999 static int is_dropped_key(struct btrfs_key *key,
9000 struct btrfs_key *drop_key) {
9001 if (key->objectid < drop_key->objectid)
9003 else if (key->objectid == drop_key->objectid) {
9004 if (key->type < drop_key->type)
9006 else if (key->type == drop_key->type) {
9007 if (key->offset < drop_key->offset)
9015 * Here are the rules for FULL_BACKREF.
9017 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9018 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9020 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
9021 * if it happened after the relocation occurred since we'll have dropped the
9022 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9023 * have no real way to know for sure.
9025 * We process the blocks one root at a time, and we start from the lowest root
9026 * objectid and go to the highest. So we can just lookup the owner backref for
9027 * the record and if we don't find it then we know it doesn't exist and we have
9030 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9031 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9032 * be set or not and then we can check later once we've gathered all the refs.
9034 static int calc_extent_flag(struct cache_tree *extent_cache,
9035 struct extent_buffer *buf,
9036 struct root_item_record *ri,
9039 struct extent_record *rec;
9040 struct cache_extent *cache;
9041 struct tree_backref *tback;
9044 cache = lookup_cache_extent(extent_cache, buf->start, 1);
9045 /* we have added this extent before */
9049 rec = container_of(cache, struct extent_record, cache);
9052 * Except file/reloc tree, we can not have
9055 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9060 if (buf->start == ri->bytenr)
9063 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9066 owner = btrfs_header_owner(buf);
9067 if (owner == ri->objectid)
9070 tback = find_tree_backref(rec, 0, owner);
9075 if (rec->flag_block_full_backref != FLAG_UNSET &&
9076 rec->flag_block_full_backref != 0)
9077 rec->bad_full_backref = 1;
9080 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9081 if (rec->flag_block_full_backref != FLAG_UNSET &&
9082 rec->flag_block_full_backref != 1)
9083 rec->bad_full_backref = 1;
9087 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9089 fprintf(stderr, "Invalid key type(");
9090 print_key_type(stderr, 0, key_type);
9091 fprintf(stderr, ") found in root(");
9092 print_objectid(stderr, rootid, 0);
9093 fprintf(stderr, ")\n");
9097 * Check if the key is valid with its extent buffer.
9099 * This is a early check in case invalid key exists in a extent buffer
9100 * This is not comprehensive yet, but should prevent wrong key/item passed
9103 static int check_type_with_root(u64 rootid, u8 key_type)
9106 /* Only valid in chunk tree */
9107 case BTRFS_DEV_ITEM_KEY:
9108 case BTRFS_CHUNK_ITEM_KEY:
9109 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9112 /* valid in csum and log tree */
9113 case BTRFS_CSUM_TREE_OBJECTID:
9114 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9118 case BTRFS_EXTENT_ITEM_KEY:
9119 case BTRFS_METADATA_ITEM_KEY:
9120 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9121 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9124 case BTRFS_ROOT_ITEM_KEY:
9125 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9128 case BTRFS_DEV_EXTENT_KEY:
9129 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9135 report_mismatch_key_root(key_type, rootid);
9139 static int run_next_block(struct btrfs_root *root,
9140 struct block_info *bits,
9143 struct cache_tree *pending,
9144 struct cache_tree *seen,
9145 struct cache_tree *reada,
9146 struct cache_tree *nodes,
9147 struct cache_tree *extent_cache,
9148 struct cache_tree *chunk_cache,
9149 struct rb_root *dev_cache,
9150 struct block_group_tree *block_group_cache,
9151 struct device_extent_tree *dev_extent_cache,
9152 struct root_item_record *ri)
9154 struct btrfs_fs_info *fs_info = root->fs_info;
9155 struct extent_buffer *buf;
9156 struct extent_record *rec = NULL;
9167 struct btrfs_key key;
9168 struct cache_extent *cache;
9171 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9172 bits_nr, &reada_bits);
9177 for(i = 0; i < nritems; i++) {
9178 ret = add_cache_extent(reada, bits[i].start,
9183 /* fixme, get the parent transid */
9184 readahead_tree_block(fs_info, bits[i].start, 0);
9187 *last = bits[0].start;
9188 bytenr = bits[0].start;
9189 size = bits[0].size;
9191 cache = lookup_cache_extent(pending, bytenr, size);
9193 remove_cache_extent(pending, cache);
9196 cache = lookup_cache_extent(reada, bytenr, size);
9198 remove_cache_extent(reada, cache);
9201 cache = lookup_cache_extent(nodes, bytenr, size);
9203 remove_cache_extent(nodes, cache);
9206 cache = lookup_cache_extent(extent_cache, bytenr, size);
9208 rec = container_of(cache, struct extent_record, cache);
9209 gen = rec->parent_generation;
9212 /* fixme, get the real parent transid */
9213 buf = read_tree_block(root->fs_info, bytenr, gen);
9214 if (!extent_buffer_uptodate(buf)) {
9215 record_bad_block_io(root->fs_info,
9216 extent_cache, bytenr, size);
9220 nritems = btrfs_header_nritems(buf);
9223 if (!init_extent_tree) {
9224 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9225 btrfs_header_level(buf), 1, NULL,
9228 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9230 fprintf(stderr, "Couldn't calc extent flags\n");
9231 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9236 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9238 fprintf(stderr, "Couldn't calc extent flags\n");
9239 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9243 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9245 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9246 ri->objectid == btrfs_header_owner(buf)) {
9248 * Ok we got to this block from it's original owner and
9249 * we have FULL_BACKREF set. Relocation can leave
9250 * converted blocks over so this is altogether possible,
9251 * however it's not possible if the generation > the
9252 * last snapshot, so check for this case.
9254 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9255 btrfs_header_generation(buf) > ri->last_snapshot) {
9256 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9257 rec->bad_full_backref = 1;
9262 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9263 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9264 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9265 rec->bad_full_backref = 1;
9269 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9270 rec->flag_block_full_backref = 1;
9274 rec->flag_block_full_backref = 0;
9276 owner = btrfs_header_owner(buf);
9279 ret = check_block(root, extent_cache, buf, flags);
9283 if (btrfs_is_leaf(buf)) {
9284 btree_space_waste += btrfs_leaf_free_space(root, buf);
9285 for (i = 0; i < nritems; i++) {
9286 struct btrfs_file_extent_item *fi;
9287 btrfs_item_key_to_cpu(buf, &key, i);
9289 * Check key type against the leaf owner.
9290 * Could filter quite a lot of early error if
9293 if (check_type_with_root(btrfs_header_owner(buf),
9295 fprintf(stderr, "ignoring invalid key\n");
9298 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9299 process_extent_item(root, extent_cache, buf,
9303 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9304 process_extent_item(root, extent_cache, buf,
9308 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9310 btrfs_item_size_nr(buf, i);
9313 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9314 process_chunk_item(chunk_cache, &key, buf, i);
9317 if (key.type == BTRFS_DEV_ITEM_KEY) {
9318 process_device_item(dev_cache, &key, buf, i);
9321 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9322 process_block_group_item(block_group_cache,
9326 if (key.type == BTRFS_DEV_EXTENT_KEY) {
9327 process_device_extent_item(dev_extent_cache,
9332 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9333 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9334 process_extent_ref_v0(extent_cache, buf, i);
9341 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9342 ret = add_tree_backref(extent_cache,
9343 key.objectid, 0, key.offset, 0);
9346 "add_tree_backref failed (leaf tree block): %s",
9350 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9351 ret = add_tree_backref(extent_cache,
9352 key.objectid, key.offset, 0, 0);
9355 "add_tree_backref failed (leaf shared block): %s",
9359 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9360 struct btrfs_extent_data_ref *ref;
9361 ref = btrfs_item_ptr(buf, i,
9362 struct btrfs_extent_data_ref);
9363 add_data_backref(extent_cache,
9365 btrfs_extent_data_ref_root(buf, ref),
9366 btrfs_extent_data_ref_objectid(buf,
9368 btrfs_extent_data_ref_offset(buf, ref),
9369 btrfs_extent_data_ref_count(buf, ref),
9370 0, root->fs_info->sectorsize);
9373 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9374 struct btrfs_shared_data_ref *ref;
9375 ref = btrfs_item_ptr(buf, i,
9376 struct btrfs_shared_data_ref);
9377 add_data_backref(extent_cache,
9378 key.objectid, key.offset, 0, 0, 0,
9379 btrfs_shared_data_ref_count(buf, ref),
9380 0, root->fs_info->sectorsize);
9383 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9384 struct bad_item *bad;
9386 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9390 bad = malloc(sizeof(struct bad_item));
9393 INIT_LIST_HEAD(&bad->list);
9394 memcpy(&bad->key, &key,
9395 sizeof(struct btrfs_key));
9396 bad->root_id = owner;
9397 list_add_tail(&bad->list, &delete_items);
9400 if (key.type != BTRFS_EXTENT_DATA_KEY)
9402 fi = btrfs_item_ptr(buf, i,
9403 struct btrfs_file_extent_item);
9404 if (btrfs_file_extent_type(buf, fi) ==
9405 BTRFS_FILE_EXTENT_INLINE)
9407 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9410 data_bytes_allocated +=
9411 btrfs_file_extent_disk_num_bytes(buf, fi);
9412 if (data_bytes_allocated < root->fs_info->sectorsize) {
9415 data_bytes_referenced +=
9416 btrfs_file_extent_num_bytes(buf, fi);
9417 add_data_backref(extent_cache,
9418 btrfs_file_extent_disk_bytenr(buf, fi),
9419 parent, owner, key.objectid, key.offset -
9420 btrfs_file_extent_offset(buf, fi), 1, 1,
9421 btrfs_file_extent_disk_num_bytes(buf, fi));
9425 struct btrfs_key first_key;
9427 first_key.objectid = 0;
9430 btrfs_item_key_to_cpu(buf, &first_key, 0);
9431 level = btrfs_header_level(buf);
9432 for (i = 0; i < nritems; i++) {
9433 struct extent_record tmpl;
9435 ptr = btrfs_node_blockptr(buf, i);
9436 size = root->fs_info->nodesize;
9437 btrfs_node_key_to_cpu(buf, &key, i);
9439 if ((level == ri->drop_level)
9440 && is_dropped_key(&key, &ri->drop_key)) {
9445 memset(&tmpl, 0, sizeof(tmpl));
9446 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9447 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9452 tmpl.max_size = size;
9453 ret = add_extent_rec(extent_cache, &tmpl);
9457 ret = add_tree_backref(extent_cache, ptr, parent,
9461 "add_tree_backref failed (non-leaf block): %s",
9467 add_pending(nodes, seen, ptr, size);
9469 add_pending(pending, seen, ptr, size);
9472 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9473 nritems) * sizeof(struct btrfs_key_ptr);
9475 total_btree_bytes += buf->len;
9476 if (fs_root_objectid(btrfs_header_owner(buf)))
9477 total_fs_tree_bytes += buf->len;
9478 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9479 total_extent_tree_bytes += buf->len;
9481 free_extent_buffer(buf);
9485 static int add_root_to_pending(struct extent_buffer *buf,
9486 struct cache_tree *extent_cache,
9487 struct cache_tree *pending,
9488 struct cache_tree *seen,
9489 struct cache_tree *nodes,
9492 struct extent_record tmpl;
9495 if (btrfs_header_level(buf) > 0)
9496 add_pending(nodes, seen, buf->start, buf->len);
9498 add_pending(pending, seen, buf->start, buf->len);
9500 memset(&tmpl, 0, sizeof(tmpl));
9501 tmpl.start = buf->start;
9506 tmpl.max_size = buf->len;
9507 add_extent_rec(extent_cache, &tmpl);
9509 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9510 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9511 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9514 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9519 /* as we fix the tree, we might be deleting blocks that
9520 * we're tracking for repair. This hook makes sure we
9521 * remove any backrefs for blocks as we are fixing them.
9523 static int free_extent_hook(struct btrfs_trans_handle *trans,
9524 struct btrfs_root *root,
9525 u64 bytenr, u64 num_bytes, u64 parent,
9526 u64 root_objectid, u64 owner, u64 offset,
9529 struct extent_record *rec;
9530 struct cache_extent *cache;
9532 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9534 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9535 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9539 rec = container_of(cache, struct extent_record, cache);
9541 struct data_backref *back;
9542 back = find_data_backref(rec, parent, root_objectid, owner,
9543 offset, 1, bytenr, num_bytes);
9546 if (back->node.found_ref) {
9547 back->found_ref -= refs_to_drop;
9549 rec->refs -= refs_to_drop;
9551 if (back->node.found_extent_tree) {
9552 back->num_refs -= refs_to_drop;
9553 if (rec->extent_item_refs)
9554 rec->extent_item_refs -= refs_to_drop;
9556 if (back->found_ref == 0)
9557 back->node.found_ref = 0;
9558 if (back->num_refs == 0)
9559 back->node.found_extent_tree = 0;
9561 if (!back->node.found_extent_tree && back->node.found_ref) {
9562 rb_erase(&back->node.node, &rec->backref_tree);
9566 struct tree_backref *back;
9567 back = find_tree_backref(rec, parent, root_objectid);
9570 if (back->node.found_ref) {
9573 back->node.found_ref = 0;
9575 if (back->node.found_extent_tree) {
9576 if (rec->extent_item_refs)
9577 rec->extent_item_refs--;
9578 back->node.found_extent_tree = 0;
9580 if (!back->node.found_extent_tree && back->node.found_ref) {
9581 rb_erase(&back->node.node, &rec->backref_tree);
9585 maybe_free_extent_rec(extent_cache, rec);
9590 static int delete_extent_records(struct btrfs_trans_handle *trans,
9591 struct btrfs_root *root,
9592 struct btrfs_path *path,
9595 struct btrfs_key key;
9596 struct btrfs_key found_key;
9597 struct extent_buffer *leaf;
9602 key.objectid = bytenr;
9604 key.offset = (u64)-1;
9607 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9614 if (path->slots[0] == 0)
9620 leaf = path->nodes[0];
9621 slot = path->slots[0];
9623 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9624 if (found_key.objectid != bytenr)
9627 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9628 found_key.type != BTRFS_METADATA_ITEM_KEY &&
9629 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9630 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9631 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9632 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9633 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9634 btrfs_release_path(path);
9635 if (found_key.type == 0) {
9636 if (found_key.offset == 0)
9638 key.offset = found_key.offset - 1;
9639 key.type = found_key.type;
9641 key.type = found_key.type - 1;
9642 key.offset = (u64)-1;
9646 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9647 found_key.objectid, found_key.type, found_key.offset);
9649 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9652 btrfs_release_path(path);
9654 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9655 found_key.type == BTRFS_METADATA_ITEM_KEY) {
9656 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9657 found_key.offset : root->fs_info->nodesize;
9659 ret = btrfs_update_block_group(trans, root, bytenr,
9666 btrfs_release_path(path);
9671 * for a single backref, this will allocate a new extent
9672 * and add the backref to it.
9674 static int record_extent(struct btrfs_trans_handle *trans,
9675 struct btrfs_fs_info *info,
9676 struct btrfs_path *path,
9677 struct extent_record *rec,
9678 struct extent_backref *back,
9679 int allocated, u64 flags)
9682 struct btrfs_root *extent_root = info->extent_root;
9683 struct extent_buffer *leaf;
9684 struct btrfs_key ins_key;
9685 struct btrfs_extent_item *ei;
9686 struct data_backref *dback;
9687 struct btrfs_tree_block_info *bi;
9690 rec->max_size = max_t(u64, rec->max_size,
9694 u32 item_size = sizeof(*ei);
9697 item_size += sizeof(*bi);
9699 ins_key.objectid = rec->start;
9700 ins_key.offset = rec->max_size;
9701 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9703 ret = btrfs_insert_empty_item(trans, extent_root, path,
9704 &ins_key, item_size);
9708 leaf = path->nodes[0];
9709 ei = btrfs_item_ptr(leaf, path->slots[0],
9710 struct btrfs_extent_item);
9712 btrfs_set_extent_refs(leaf, ei, 0);
9713 btrfs_set_extent_generation(leaf, ei, rec->generation);
9715 if (back->is_data) {
9716 btrfs_set_extent_flags(leaf, ei,
9717 BTRFS_EXTENT_FLAG_DATA);
9719 struct btrfs_disk_key copy_key;;
9721 bi = (struct btrfs_tree_block_info *)(ei + 1);
9722 memset_extent_buffer(leaf, 0, (unsigned long)bi,
9725 btrfs_set_disk_key_objectid(©_key,
9726 rec->info_objectid);
9727 btrfs_set_disk_key_type(©_key, 0);
9728 btrfs_set_disk_key_offset(©_key, 0);
9730 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9731 btrfs_set_tree_block_key(leaf, bi, ©_key);
9733 btrfs_set_extent_flags(leaf, ei,
9734 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9737 btrfs_mark_buffer_dirty(leaf);
9738 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9739 rec->max_size, 1, 0);
9742 btrfs_release_path(path);
9745 if (back->is_data) {
9749 dback = to_data_backref(back);
9750 if (back->full_backref)
9751 parent = dback->parent;
9755 for (i = 0; i < dback->found_ref; i++) {
9756 /* if parent != 0, we're doing a full backref
9757 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9758 * just makes the backref allocator create a data
9761 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9762 rec->start, rec->max_size,
9766 BTRFS_FIRST_FREE_OBJECTID :
9772 fprintf(stderr, "adding new data backref"
9773 " on %llu %s %llu owner %llu"
9774 " offset %llu found %d\n",
9775 (unsigned long long)rec->start,
9776 back->full_backref ?
9778 back->full_backref ?
9779 (unsigned long long)parent :
9780 (unsigned long long)dback->root,
9781 (unsigned long long)dback->owner,
9782 (unsigned long long)dback->offset,
9786 struct tree_backref *tback;
9788 tback = to_tree_backref(back);
9789 if (back->full_backref)
9790 parent = tback->parent;
9794 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9795 rec->start, rec->max_size,
9796 parent, tback->root, 0, 0);
9797 fprintf(stderr, "adding new tree backref on "
9798 "start %llu len %llu parent %llu root %llu\n",
9799 rec->start, rec->max_size, parent, tback->root);
9802 btrfs_release_path(path);
9806 static struct extent_entry *find_entry(struct list_head *entries,
9807 u64 bytenr, u64 bytes)
9809 struct extent_entry *entry = NULL;
9811 list_for_each_entry(entry, entries, list) {
9812 if (entry->bytenr == bytenr && entry->bytes == bytes)
9819 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9821 struct extent_entry *entry, *best = NULL, *prev = NULL;
9823 list_for_each_entry(entry, entries, list) {
9825 * If there are as many broken entries as entries then we know
9826 * not to trust this particular entry.
9828 if (entry->broken == entry->count)
9832 * Special case, when there are only two entries and 'best' is
9842 * If our current entry == best then we can't be sure our best
9843 * is really the best, so we need to keep searching.
9845 if (best && best->count == entry->count) {
9851 /* Prev == entry, not good enough, have to keep searching */
9852 if (!prev->broken && prev->count == entry->count)
9856 best = (prev->count > entry->count) ? prev : entry;
9857 else if (best->count < entry->count)
9865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9866 struct data_backref *dback, struct extent_entry *entry)
9868 struct btrfs_trans_handle *trans;
9869 struct btrfs_root *root;
9870 struct btrfs_file_extent_item *fi;
9871 struct extent_buffer *leaf;
9872 struct btrfs_key key;
9876 key.objectid = dback->root;
9877 key.type = BTRFS_ROOT_ITEM_KEY;
9878 key.offset = (u64)-1;
9879 root = btrfs_read_fs_root(info, &key);
9881 fprintf(stderr, "Couldn't find root for our ref\n");
9886 * The backref points to the original offset of the extent if it was
9887 * split, so we need to search down to the offset we have and then walk
9888 * forward until we find the backref we're looking for.
9890 key.objectid = dback->owner;
9891 key.type = BTRFS_EXTENT_DATA_KEY;
9892 key.offset = dback->offset;
9893 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9895 fprintf(stderr, "Error looking up ref %d\n", ret);
9900 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9901 ret = btrfs_next_leaf(root, path);
9903 fprintf(stderr, "Couldn't find our ref, next\n");
9907 leaf = path->nodes[0];
9908 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9909 if (key.objectid != dback->owner ||
9910 key.type != BTRFS_EXTENT_DATA_KEY) {
9911 fprintf(stderr, "Couldn't find our ref, search\n");
9914 fi = btrfs_item_ptr(leaf, path->slots[0],
9915 struct btrfs_file_extent_item);
9916 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9917 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9919 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9924 btrfs_release_path(path);
9926 trans = btrfs_start_transaction(root, 1);
9928 return PTR_ERR(trans);
9931 * Ok we have the key of the file extent we want to fix, now we can cow
9932 * down to the thing and fix it.
9934 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9936 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9937 key.objectid, key.type, key.offset, ret);
9941 fprintf(stderr, "Well that's odd, we just found this key "
9942 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9947 leaf = path->nodes[0];
9948 fi = btrfs_item_ptr(leaf, path->slots[0],
9949 struct btrfs_file_extent_item);
9951 if (btrfs_file_extent_compression(leaf, fi) &&
9952 dback->disk_bytenr != entry->bytenr) {
9953 fprintf(stderr, "Ref doesn't match the record start and is "
9954 "compressed, please take a btrfs-image of this file "
9955 "system and send it to a btrfs developer so they can "
9956 "complete this functionality for bytenr %Lu\n",
9957 dback->disk_bytenr);
9962 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9963 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9964 } else if (dback->disk_bytenr > entry->bytenr) {
9965 u64 off_diff, offset;
9967 off_diff = dback->disk_bytenr - entry->bytenr;
9968 offset = btrfs_file_extent_offset(leaf, fi);
9969 if (dback->disk_bytenr + offset +
9970 btrfs_file_extent_num_bytes(leaf, fi) >
9971 entry->bytenr + entry->bytes) {
9972 fprintf(stderr, "Ref is past the entry end, please "
9973 "take a btrfs-image of this file system and "
9974 "send it to a btrfs developer, ref %Lu\n",
9975 dback->disk_bytenr);
9980 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9981 btrfs_set_file_extent_offset(leaf, fi, offset);
9982 } else if (dback->disk_bytenr < entry->bytenr) {
9985 offset = btrfs_file_extent_offset(leaf, fi);
9986 if (dback->disk_bytenr + offset < entry->bytenr) {
9987 fprintf(stderr, "Ref is before the entry start, please"
9988 " take a btrfs-image of this file system and "
9989 "send it to a btrfs developer, ref %Lu\n",
9990 dback->disk_bytenr);
9995 offset += dback->disk_bytenr;
9996 offset -= entry->bytenr;
9997 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9998 btrfs_set_file_extent_offset(leaf, fi, offset);
10001 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10004 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10005 * only do this if we aren't using compression, otherwise it's a
10008 if (!btrfs_file_extent_compression(leaf, fi))
10009 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10011 printf("ram bytes may be wrong?\n");
10012 btrfs_mark_buffer_dirty(leaf);
10014 err = btrfs_commit_transaction(trans, root);
10015 btrfs_release_path(path);
10016 return ret ? ret : err;
10019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10020 struct extent_record *rec)
10022 struct extent_backref *back, *tmp;
10023 struct data_backref *dback;
10024 struct extent_entry *entry, *best = NULL;
10025 LIST_HEAD(entries);
10026 int nr_entries = 0;
10027 int broken_entries = 0;
10029 short mismatch = 0;
10032 * Metadata is easy and the backrefs should always agree on bytenr and
10033 * size, if not we've got bigger issues.
10038 rbtree_postorder_for_each_entry_safe(back, tmp,
10039 &rec->backref_tree, node) {
10040 if (back->full_backref || !back->is_data)
10043 dback = to_data_backref(back);
10046 * We only pay attention to backrefs that we found a real
10049 if (dback->found_ref == 0)
10053 * For now we only catch when the bytes don't match, not the
10054 * bytenr. We can easily do this at the same time, but I want
10055 * to have a fs image to test on before we just add repair
10056 * functionality willy-nilly so we know we won't screw up the
10060 entry = find_entry(&entries, dback->disk_bytenr,
10063 entry = malloc(sizeof(struct extent_entry));
10068 memset(entry, 0, sizeof(*entry));
10069 entry->bytenr = dback->disk_bytenr;
10070 entry->bytes = dback->bytes;
10071 list_add_tail(&entry->list, &entries);
10076 * If we only have on entry we may think the entries agree when
10077 * in reality they don't so we have to do some extra checking.
10079 if (dback->disk_bytenr != rec->start ||
10080 dback->bytes != rec->nr || back->broken)
10083 if (back->broken) {
10091 /* Yay all the backrefs agree, carry on good sir */
10092 if (nr_entries <= 1 && !mismatch)
10095 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10096 "%Lu\n", rec->start);
10099 * First we want to see if the backrefs can agree amongst themselves who
10100 * is right, so figure out which one of the entries has the highest
10103 best = find_most_right_entry(&entries);
10106 * Ok so we may have an even split between what the backrefs think, so
10107 * this is where we use the extent ref to see what it thinks.
10110 entry = find_entry(&entries, rec->start, rec->nr);
10111 if (!entry && (!broken_entries || !rec->found_rec)) {
10112 fprintf(stderr, "Backrefs don't agree with each other "
10113 "and extent record doesn't agree with anybody,"
10114 " so we can't fix bytenr %Lu bytes %Lu\n",
10115 rec->start, rec->nr);
10118 } else if (!entry) {
10120 * Ok our backrefs were broken, we'll assume this is the
10121 * correct value and add an entry for this range.
10123 entry = malloc(sizeof(struct extent_entry));
10128 memset(entry, 0, sizeof(*entry));
10129 entry->bytenr = rec->start;
10130 entry->bytes = rec->nr;
10131 list_add_tail(&entry->list, &entries);
10135 best = find_most_right_entry(&entries);
10137 fprintf(stderr, "Backrefs and extent record evenly "
10138 "split on who is right, this is going to "
10139 "require user input to fix bytenr %Lu bytes "
10140 "%Lu\n", rec->start, rec->nr);
10147 * I don't think this can happen currently as we'll abort() if we catch
10148 * this case higher up, but in case somebody removes that we still can't
10149 * deal with it properly here yet, so just bail out of that's the case.
10151 if (best->bytenr != rec->start) {
10152 fprintf(stderr, "Extent start and backref starts don't match, "
10153 "please use btrfs-image on this file system and send "
10154 "it to a btrfs developer so they can make fsck fix "
10155 "this particular case. bytenr is %Lu, bytes is %Lu\n",
10156 rec->start, rec->nr);
10162 * Ok great we all agreed on an extent record, let's go find the real
10163 * references and fix up the ones that don't match.
10165 rbtree_postorder_for_each_entry_safe(back, tmp,
10166 &rec->backref_tree, node) {
10167 if (back->full_backref || !back->is_data)
10170 dback = to_data_backref(back);
10173 * Still ignoring backrefs that don't have a real ref attached
10176 if (dback->found_ref == 0)
10179 if (dback->bytes == best->bytes &&
10180 dback->disk_bytenr == best->bytenr)
10183 ret = repair_ref(info, path, dback, best);
10189 * Ok we messed with the actual refs, which means we need to drop our
10190 * entire cache and go back and rescan. I know this is a huge pain and
10191 * adds a lot of extra work, but it's the only way to be safe. Once all
10192 * the backrefs agree we may not need to do anything to the extent
10197 while (!list_empty(&entries)) {
10198 entry = list_entry(entries.next, struct extent_entry, list);
10199 list_del_init(&entry->list);
10205 static int process_duplicates(struct cache_tree *extent_cache,
10206 struct extent_record *rec)
10208 struct extent_record *good, *tmp;
10209 struct cache_extent *cache;
10213 * If we found a extent record for this extent then return, or if we
10214 * have more than one duplicate we are likely going to need to delete
10217 if (rec->found_rec || rec->num_duplicates > 1)
10220 /* Shouldn't happen but just in case */
10221 BUG_ON(!rec->num_duplicates);
10224 * So this happens if we end up with a backref that doesn't match the
10225 * actual extent entry. So either the backref is bad or the extent
10226 * entry is bad. Either way we want to have the extent_record actually
10227 * reflect what we found in the extent_tree, so we need to take the
10228 * duplicate out and use that as the extent_record since the only way we
10229 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10231 remove_cache_extent(extent_cache, &rec->cache);
10233 good = to_extent_record(rec->dups.next);
10234 list_del_init(&good->list);
10235 INIT_LIST_HEAD(&good->backrefs);
10236 INIT_LIST_HEAD(&good->dups);
10237 good->cache.start = good->start;
10238 good->cache.size = good->nr;
10239 good->content_checked = 0;
10240 good->owner_ref_checked = 0;
10241 good->num_duplicates = 0;
10242 good->refs = rec->refs;
10243 list_splice_init(&rec->backrefs, &good->backrefs);
10245 cache = lookup_cache_extent(extent_cache, good->start,
10249 tmp = container_of(cache, struct extent_record, cache);
10252 * If we find another overlapping extent and it's found_rec is
10253 * set then it's a duplicate and we need to try and delete
10256 if (tmp->found_rec || tmp->num_duplicates > 0) {
10257 if (list_empty(&good->list))
10258 list_add_tail(&good->list,
10259 &duplicate_extents);
10260 good->num_duplicates += tmp->num_duplicates + 1;
10261 list_splice_init(&tmp->dups, &good->dups);
10262 list_del_init(&tmp->list);
10263 list_add_tail(&tmp->list, &good->dups);
10264 remove_cache_extent(extent_cache, &tmp->cache);
10269 * Ok we have another non extent item backed extent rec, so lets
10270 * just add it to this extent and carry on like we did above.
10272 good->refs += tmp->refs;
10273 list_splice_init(&tmp->backrefs, &good->backrefs);
10274 remove_cache_extent(extent_cache, &tmp->cache);
10277 ret = insert_cache_extent(extent_cache, &good->cache);
10280 return good->num_duplicates ? 0 : 1;
10283 static int delete_duplicate_records(struct btrfs_root *root,
10284 struct extent_record *rec)
10286 struct btrfs_trans_handle *trans;
10287 LIST_HEAD(delete_list);
10288 struct btrfs_path path;
10289 struct extent_record *tmp, *good, *n;
10292 struct btrfs_key key;
10294 btrfs_init_path(&path);
10297 /* Find the record that covers all of the duplicates. */
10298 list_for_each_entry(tmp, &rec->dups, list) {
10299 if (good->start < tmp->start)
10301 if (good->nr > tmp->nr)
10304 if (tmp->start + tmp->nr < good->start + good->nr) {
10305 fprintf(stderr, "Ok we have overlapping extents that "
10306 "aren't completely covered by each other, this "
10307 "is going to require more careful thought. "
10308 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10309 tmp->start, tmp->nr, good->start, good->nr);
10316 list_add_tail(&rec->list, &delete_list);
10318 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10321 list_move_tail(&tmp->list, &delete_list);
10324 root = root->fs_info->extent_root;
10325 trans = btrfs_start_transaction(root, 1);
10326 if (IS_ERR(trans)) {
10327 ret = PTR_ERR(trans);
10331 list_for_each_entry(tmp, &delete_list, list) {
10332 if (tmp->found_rec == 0)
10334 key.objectid = tmp->start;
10335 key.type = BTRFS_EXTENT_ITEM_KEY;
10336 key.offset = tmp->nr;
10338 /* Shouldn't happen but just in case */
10339 if (tmp->metadata) {
10340 fprintf(stderr, "Well this shouldn't happen, extent "
10341 "record overlaps but is metadata? "
10342 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10346 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10352 ret = btrfs_del_item(trans, root, &path);
10355 btrfs_release_path(&path);
10358 err = btrfs_commit_transaction(trans, root);
10362 while (!list_empty(&delete_list)) {
10363 tmp = to_extent_record(delete_list.next);
10364 list_del_init(&tmp->list);
10370 while (!list_empty(&rec->dups)) {
10371 tmp = to_extent_record(rec->dups.next);
10372 list_del_init(&tmp->list);
10376 btrfs_release_path(&path);
10378 if (!ret && !nr_del)
10379 rec->num_duplicates = 0;
10381 return ret ? ret : nr_del;
10384 static int find_possible_backrefs(struct btrfs_fs_info *info,
10385 struct btrfs_path *path,
10386 struct cache_tree *extent_cache,
10387 struct extent_record *rec)
10389 struct btrfs_root *root;
10390 struct extent_backref *back, *tmp;
10391 struct data_backref *dback;
10392 struct cache_extent *cache;
10393 struct btrfs_file_extent_item *fi;
10394 struct btrfs_key key;
10398 rbtree_postorder_for_each_entry_safe(back, tmp,
10399 &rec->backref_tree, node) {
10400 /* Don't care about full backrefs (poor unloved backrefs) */
10401 if (back->full_backref || !back->is_data)
10404 dback = to_data_backref(back);
10406 /* We found this one, we don't need to do a lookup */
10407 if (dback->found_ref)
10410 key.objectid = dback->root;
10411 key.type = BTRFS_ROOT_ITEM_KEY;
10412 key.offset = (u64)-1;
10414 root = btrfs_read_fs_root(info, &key);
10416 /* No root, definitely a bad ref, skip */
10417 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10419 /* Other err, exit */
10421 return PTR_ERR(root);
10423 key.objectid = dback->owner;
10424 key.type = BTRFS_EXTENT_DATA_KEY;
10425 key.offset = dback->offset;
10426 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10428 btrfs_release_path(path);
10431 /* Didn't find it, we can carry on */
10436 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10437 struct btrfs_file_extent_item);
10438 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10439 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10440 btrfs_release_path(path);
10441 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10443 struct extent_record *tmp;
10444 tmp = container_of(cache, struct extent_record, cache);
10447 * If we found an extent record for the bytenr for this
10448 * particular backref then we can't add it to our
10449 * current extent record. We only want to add backrefs
10450 * that don't have a corresponding extent item in the
10451 * extent tree since they likely belong to this record
10452 * and we need to fix it if it doesn't match bytenrs.
10454 if (tmp->found_rec)
10458 dback->found_ref += 1;
10459 dback->disk_bytenr = bytenr;
10460 dback->bytes = bytes;
10463 * Set this so the verify backref code knows not to trust the
10464 * values in this backref.
10473 * Record orphan data ref into corresponding root.
10475 * Return 0 if the extent item contains data ref and recorded.
10476 * Return 1 if the extent item contains no useful data ref
10477 * On that case, it may contains only shared_dataref or metadata backref
10478 * or the file extent exists(this should be handled by the extent bytenr
10479 * recovery routine)
10480 * Return <0 if something goes wrong.
10482 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10483 struct extent_record *rec)
10485 struct btrfs_key key;
10486 struct btrfs_root *dest_root;
10487 struct extent_backref *back, *tmp;
10488 struct data_backref *dback;
10489 struct orphan_data_extent *orphan;
10490 struct btrfs_path path;
10491 int recorded_data_ref = 0;
10496 btrfs_init_path(&path);
10497 rbtree_postorder_for_each_entry_safe(back, tmp,
10498 &rec->backref_tree, node) {
10499 if (back->full_backref || !back->is_data ||
10500 !back->found_extent_tree)
10502 dback = to_data_backref(back);
10503 if (dback->found_ref)
10505 key.objectid = dback->root;
10506 key.type = BTRFS_ROOT_ITEM_KEY;
10507 key.offset = (u64)-1;
10509 dest_root = btrfs_read_fs_root(fs_info, &key);
10511 /* For non-exist root we just skip it */
10512 if (IS_ERR(dest_root) || !dest_root)
10515 key.objectid = dback->owner;
10516 key.type = BTRFS_EXTENT_DATA_KEY;
10517 key.offset = dback->offset;
10519 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10520 btrfs_release_path(&path);
10522 * For ret < 0, it's OK since the fs-tree may be corrupted,
10523 * we need to record it for inode/file extent rebuild.
10524 * For ret > 0, we record it only for file extent rebuild.
10525 * For ret == 0, the file extent exists but only bytenr
10526 * mismatch, let the original bytenr fix routine to handle,
10532 orphan = malloc(sizeof(*orphan));
10537 INIT_LIST_HEAD(&orphan->list);
10538 orphan->root = dback->root;
10539 orphan->objectid = dback->owner;
10540 orphan->offset = dback->offset;
10541 orphan->disk_bytenr = rec->cache.start;
10542 orphan->disk_len = rec->cache.size;
10543 list_add(&dest_root->orphan_data_extents, &orphan->list);
10544 recorded_data_ref = 1;
10547 btrfs_release_path(&path);
10549 return !recorded_data_ref;
10555 * when an incorrect extent item is found, this will delete
10556 * all of the existing entries for it and recreate them
10557 * based on what the tree scan found.
10559 static int fixup_extent_refs(struct btrfs_fs_info *info,
10560 struct cache_tree *extent_cache,
10561 struct extent_record *rec)
10563 struct btrfs_trans_handle *trans = NULL;
10565 struct btrfs_path path;
10566 struct cache_extent *cache;
10567 struct extent_backref *back, *tmp;
10571 if (rec->flag_block_full_backref)
10572 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10574 btrfs_init_path(&path);
10575 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10577 * Sometimes the backrefs themselves are so broken they don't
10578 * get attached to any meaningful rec, so first go back and
10579 * check any of our backrefs that we couldn't find and throw
10580 * them into the list if we find the backref so that
10581 * verify_backrefs can figure out what to do.
10583 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10588 /* step one, make sure all of the backrefs agree */
10589 ret = verify_backrefs(info, &path, rec);
10593 trans = btrfs_start_transaction(info->extent_root, 1);
10594 if (IS_ERR(trans)) {
10595 ret = PTR_ERR(trans);
10599 /* step two, delete all the existing records */
10600 ret = delete_extent_records(trans, info->extent_root, &path,
10606 /* was this block corrupt? If so, don't add references to it */
10607 cache = lookup_cache_extent(info->corrupt_blocks,
10608 rec->start, rec->max_size);
10614 /* step three, recreate all the refs we did find */
10615 rbtree_postorder_for_each_entry_safe(back, tmp,
10616 &rec->backref_tree, node) {
10618 * if we didn't find any references, don't create a
10619 * new extent record
10621 if (!back->found_ref)
10624 rec->bad_full_backref = 0;
10625 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10633 int err = btrfs_commit_transaction(trans, info->extent_root);
10639 fprintf(stderr, "Repaired extent references for %llu\n",
10640 (unsigned long long)rec->start);
10642 btrfs_release_path(&path);
10646 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10647 struct extent_record *rec)
10649 struct btrfs_trans_handle *trans;
10650 struct btrfs_root *root = fs_info->extent_root;
10651 struct btrfs_path path;
10652 struct btrfs_extent_item *ei;
10653 struct btrfs_key key;
10657 key.objectid = rec->start;
10658 if (rec->metadata) {
10659 key.type = BTRFS_METADATA_ITEM_KEY;
10660 key.offset = rec->info_level;
10662 key.type = BTRFS_EXTENT_ITEM_KEY;
10663 key.offset = rec->max_size;
10666 trans = btrfs_start_transaction(root, 0);
10668 return PTR_ERR(trans);
10670 btrfs_init_path(&path);
10671 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10673 btrfs_release_path(&path);
10674 btrfs_commit_transaction(trans, root);
10677 fprintf(stderr, "Didn't find extent for %llu\n",
10678 (unsigned long long)rec->start);
10679 btrfs_release_path(&path);
10680 btrfs_commit_transaction(trans, root);
10684 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10685 struct btrfs_extent_item);
10686 flags = btrfs_extent_flags(path.nodes[0], ei);
10687 if (rec->flag_block_full_backref) {
10688 fprintf(stderr, "setting full backref on %llu\n",
10689 (unsigned long long)key.objectid);
10690 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10692 fprintf(stderr, "clearing full backref on %llu\n",
10693 (unsigned long long)key.objectid);
10694 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696 btrfs_set_extent_flags(path.nodes[0], ei, flags);
10697 btrfs_mark_buffer_dirty(path.nodes[0]);
10698 btrfs_release_path(&path);
10699 ret = btrfs_commit_transaction(trans, root);
10701 fprintf(stderr, "Repaired extent flags for %llu\n",
10702 (unsigned long long)rec->start);
10707 /* right now we only prune from the extent allocation tree */
10708 static int prune_one_block(struct btrfs_trans_handle *trans,
10709 struct btrfs_fs_info *info,
10710 struct btrfs_corrupt_block *corrupt)
10713 struct btrfs_path path;
10714 struct extent_buffer *eb;
10718 int level = corrupt->level + 1;
10720 btrfs_init_path(&path);
10722 /* we want to stop at the parent to our busted block */
10723 path.lowest_level = level;
10725 ret = btrfs_search_slot(trans, info->extent_root,
10726 &corrupt->key, &path, -1, 1);
10731 eb = path.nodes[level];
10738 * hopefully the search gave us the block we want to prune,
10739 * lets try that first
10741 slot = path.slots[level];
10742 found = btrfs_node_blockptr(eb, slot);
10743 if (found == corrupt->cache.start)
10746 nritems = btrfs_header_nritems(eb);
10748 /* the search failed, lets scan this node and hope we find it */
10749 for (slot = 0; slot < nritems; slot++) {
10750 found = btrfs_node_blockptr(eb, slot);
10751 if (found == corrupt->cache.start)
10755 * we couldn't find the bad block. TODO, search all the nodes for pointers
10758 if (eb == info->extent_root->node) {
10763 btrfs_release_path(&path);
10768 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10769 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10772 btrfs_release_path(&path);
10776 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10778 struct btrfs_trans_handle *trans = NULL;
10779 struct cache_extent *cache;
10780 struct btrfs_corrupt_block *corrupt;
10783 cache = search_cache_extent(info->corrupt_blocks, 0);
10787 trans = btrfs_start_transaction(info->extent_root, 1);
10789 return PTR_ERR(trans);
10791 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10792 prune_one_block(trans, info, corrupt);
10793 remove_cache_extent(info->corrupt_blocks, cache);
10796 return btrfs_commit_transaction(trans, info->extent_root);
10800 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10802 struct btrfs_block_group_cache *cache;
10807 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10808 &start, &end, EXTENT_DIRTY);
10811 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10816 cache = btrfs_lookup_first_block_group(fs_info, start);
10821 start = cache->key.objectid + cache->key.offset;
10825 static int check_extent_refs(struct btrfs_root *root,
10826 struct cache_tree *extent_cache)
10828 struct extent_record *rec;
10829 struct cache_extent *cache;
10836 * if we're doing a repair, we have to make sure
10837 * we don't allocate from the problem extents.
10838 * In the worst case, this will be all the
10839 * extents in the FS
10841 cache = search_cache_extent(extent_cache, 0);
10843 rec = container_of(cache, struct extent_record, cache);
10844 set_extent_dirty(root->fs_info->excluded_extents,
10846 rec->start + rec->max_size - 1);
10847 cache = next_cache_extent(cache);
10850 /* pin down all the corrupted blocks too */
10851 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10853 set_extent_dirty(root->fs_info->excluded_extents,
10855 cache->start + cache->size - 1);
10856 cache = next_cache_extent(cache);
10858 prune_corrupt_blocks(root->fs_info);
10859 reset_cached_block_groups(root->fs_info);
10862 reset_cached_block_groups(root->fs_info);
10865 * We need to delete any duplicate entries we find first otherwise we
10866 * could mess up the extent tree when we have backrefs that actually
10867 * belong to a different extent item and not the weird duplicate one.
10869 while (repair && !list_empty(&duplicate_extents)) {
10870 rec = to_extent_record(duplicate_extents.next);
10871 list_del_init(&rec->list);
10873 /* Sometimes we can find a backref before we find an actual
10874 * extent, so we need to process it a little bit to see if there
10875 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10876 * if this is a backref screwup. If we need to delete stuff
10877 * process_duplicates() will return 0, otherwise it will return
10880 if (process_duplicates(extent_cache, rec))
10882 ret = delete_duplicate_records(root, rec);
10886 * delete_duplicate_records will return the number of entries
10887 * deleted, so if it's greater than 0 then we know we actually
10888 * did something and we need to remove.
10901 cache = search_cache_extent(extent_cache, 0);
10904 rec = container_of(cache, struct extent_record, cache);
10905 if (rec->num_duplicates) {
10906 fprintf(stderr, "extent item %llu has multiple extent "
10907 "items\n", (unsigned long long)rec->start);
10911 if (rec->refs != rec->extent_item_refs) {
10912 fprintf(stderr, "ref mismatch on [%llu %llu] ",
10913 (unsigned long long)rec->start,
10914 (unsigned long long)rec->nr);
10915 fprintf(stderr, "extent item %llu, found %llu\n",
10916 (unsigned long long)rec->extent_item_refs,
10917 (unsigned long long)rec->refs);
10918 ret = record_orphan_data_extents(root->fs_info, rec);
10924 if (all_backpointers_checked(rec, 1)) {
10925 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10926 (unsigned long long)rec->start,
10927 (unsigned long long)rec->nr);
10931 if (!rec->owner_ref_checked) {
10932 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10933 (unsigned long long)rec->start,
10934 (unsigned long long)rec->nr);
10939 if (repair && fix) {
10940 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10946 if (rec->bad_full_backref) {
10947 fprintf(stderr, "bad full backref, on [%llu]\n",
10948 (unsigned long long)rec->start);
10950 ret = fixup_extent_flags(root->fs_info, rec);
10958 * Although it's not a extent ref's problem, we reuse this
10959 * routine for error reporting.
10960 * No repair function yet.
10962 if (rec->crossing_stripes) {
10964 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10965 rec->start, rec->start + rec->max_size);
10969 if (rec->wrong_chunk_type) {
10971 "bad extent [%llu, %llu), type mismatch with chunk\n",
10972 rec->start, rec->start + rec->max_size);
10977 remove_cache_extent(extent_cache, cache);
10978 free_all_extent_backrefs(rec);
10979 if (!init_extent_tree && repair && (!cur_err || fix))
10980 clear_extent_dirty(root->fs_info->excluded_extents,
10982 rec->start + rec->max_size - 1);
10987 if (ret && ret != -EAGAIN) {
10988 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10991 struct btrfs_trans_handle *trans;
10993 root = root->fs_info->extent_root;
10994 trans = btrfs_start_transaction(root, 1);
10995 if (IS_ERR(trans)) {
10996 ret = PTR_ERR(trans);
11000 ret = btrfs_fix_block_accounting(trans, root);
11003 ret = btrfs_commit_transaction(trans, root);
11015 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11019 if (type & BTRFS_BLOCK_GROUP_RAID0) {
11020 stripe_size = length;
11021 stripe_size /= num_stripes;
11022 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11023 stripe_size = length * 2;
11024 stripe_size /= num_stripes;
11025 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11026 stripe_size = length;
11027 stripe_size /= (num_stripes - 1);
11028 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11029 stripe_size = length;
11030 stripe_size /= (num_stripes - 2);
11032 stripe_size = length;
11034 return stripe_size;
11038 * Check the chunk with its block group/dev list ref:
11039 * Return 0 if all refs seems valid.
11040 * Return 1 if part of refs seems valid, need later check for rebuild ref
11041 * like missing block group and needs to search extent tree to rebuild them.
11042 * Return -1 if essential refs are missing and unable to rebuild.
11044 static int check_chunk_refs(struct chunk_record *chunk_rec,
11045 struct block_group_tree *block_group_cache,
11046 struct device_extent_tree *dev_extent_cache,
11049 struct cache_extent *block_group_item;
11050 struct block_group_record *block_group_rec;
11051 struct cache_extent *dev_extent_item;
11052 struct device_extent_record *dev_extent_rec;
11056 int metadump_v2 = 0;
11060 block_group_item = lookup_cache_extent(&block_group_cache->tree,
11062 chunk_rec->length);
11063 if (block_group_item) {
11064 block_group_rec = container_of(block_group_item,
11065 struct block_group_record,
11067 if (chunk_rec->length != block_group_rec->offset ||
11068 chunk_rec->offset != block_group_rec->objectid ||
11070 chunk_rec->type_flags != block_group_rec->flags)) {
11073 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11074 chunk_rec->objectid,
11079 chunk_rec->type_flags,
11080 block_group_rec->objectid,
11081 block_group_rec->type,
11082 block_group_rec->offset,
11083 block_group_rec->offset,
11084 block_group_rec->objectid,
11085 block_group_rec->flags);
11088 list_del_init(&block_group_rec->list);
11089 chunk_rec->bg_rec = block_group_rec;
11094 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11095 chunk_rec->objectid,
11100 chunk_rec->type_flags);
11107 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11108 chunk_rec->num_stripes);
11109 for (i = 0; i < chunk_rec->num_stripes; ++i) {
11110 devid = chunk_rec->stripes[i].devid;
11111 offset = chunk_rec->stripes[i].offset;
11112 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11113 devid, offset, length);
11114 if (dev_extent_item) {
11115 dev_extent_rec = container_of(dev_extent_item,
11116 struct device_extent_record,
11118 if (dev_extent_rec->objectid != devid ||
11119 dev_extent_rec->offset != offset ||
11120 dev_extent_rec->chunk_offset != chunk_rec->offset ||
11121 dev_extent_rec->length != length) {
11124 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11125 chunk_rec->objectid,
11128 chunk_rec->stripes[i].devid,
11129 chunk_rec->stripes[i].offset,
11130 dev_extent_rec->objectid,
11131 dev_extent_rec->offset,
11132 dev_extent_rec->length);
11135 list_move(&dev_extent_rec->chunk_list,
11136 &chunk_rec->dextents);
11141 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11142 chunk_rec->objectid,
11145 chunk_rec->stripes[i].devid,
11146 chunk_rec->stripes[i].offset);
11153 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11154 int check_chunks(struct cache_tree *chunk_cache,
11155 struct block_group_tree *block_group_cache,
11156 struct device_extent_tree *dev_extent_cache,
11157 struct list_head *good, struct list_head *bad,
11158 struct list_head *rebuild, int silent)
11160 struct cache_extent *chunk_item;
11161 struct chunk_record *chunk_rec;
11162 struct block_group_record *bg_rec;
11163 struct device_extent_record *dext_rec;
11167 chunk_item = first_cache_extent(chunk_cache);
11168 while (chunk_item) {
11169 chunk_rec = container_of(chunk_item, struct chunk_record,
11171 err = check_chunk_refs(chunk_rec, block_group_cache,
11172 dev_extent_cache, silent);
11175 if (err == 0 && good)
11176 list_add_tail(&chunk_rec->list, good);
11177 if (err > 0 && rebuild)
11178 list_add_tail(&chunk_rec->list, rebuild);
11179 if (err < 0 && bad)
11180 list_add_tail(&chunk_rec->list, bad);
11181 chunk_item = next_cache_extent(chunk_item);
11184 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11187 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11195 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11199 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11200 dext_rec->objectid,
11210 static int check_device_used(struct device_record *dev_rec,
11211 struct device_extent_tree *dext_cache)
11213 struct cache_extent *cache;
11214 struct device_extent_record *dev_extent_rec;
11215 u64 total_byte = 0;
11217 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11219 dev_extent_rec = container_of(cache,
11220 struct device_extent_record,
11222 if (dev_extent_rec->objectid != dev_rec->devid)
11225 list_del_init(&dev_extent_rec->device_list);
11226 total_byte += dev_extent_rec->length;
11227 cache = next_cache_extent(cache);
11230 if (total_byte != dev_rec->byte_used) {
11232 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11233 total_byte, dev_rec->byte_used, dev_rec->objectid,
11234 dev_rec->type, dev_rec->offset);
11241 /* check btrfs_dev_item -> btrfs_dev_extent */
11242 static int check_devices(struct rb_root *dev_cache,
11243 struct device_extent_tree *dev_extent_cache)
11245 struct rb_node *dev_node;
11246 struct device_record *dev_rec;
11247 struct device_extent_record *dext_rec;
11251 dev_node = rb_first(dev_cache);
11253 dev_rec = container_of(dev_node, struct device_record, node);
11254 err = check_device_used(dev_rec, dev_extent_cache);
11258 dev_node = rb_next(dev_node);
11260 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11263 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11264 dext_rec->objectid, dext_rec->offset, dext_rec->length);
11271 static int add_root_item_to_list(struct list_head *head,
11272 u64 objectid, u64 bytenr, u64 last_snapshot,
11273 u8 level, u8 drop_level,
11274 struct btrfs_key *drop_key)
11277 struct root_item_record *ri_rec;
11278 ri_rec = malloc(sizeof(*ri_rec));
11281 ri_rec->bytenr = bytenr;
11282 ri_rec->objectid = objectid;
11283 ri_rec->level = level;
11284 ri_rec->drop_level = drop_level;
11285 ri_rec->last_snapshot = last_snapshot;
11287 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11288 list_add_tail(&ri_rec->list, head);
11293 static void free_root_item_list(struct list_head *list)
11295 struct root_item_record *ri_rec;
11297 while (!list_empty(list)) {
11298 ri_rec = list_first_entry(list, struct root_item_record,
11300 list_del_init(&ri_rec->list);
11305 static int deal_root_from_list(struct list_head *list,
11306 struct btrfs_root *root,
11307 struct block_info *bits,
11309 struct cache_tree *pending,
11310 struct cache_tree *seen,
11311 struct cache_tree *reada,
11312 struct cache_tree *nodes,
11313 struct cache_tree *extent_cache,
11314 struct cache_tree *chunk_cache,
11315 struct rb_root *dev_cache,
11316 struct block_group_tree *block_group_cache,
11317 struct device_extent_tree *dev_extent_cache)
11322 while (!list_empty(list)) {
11323 struct root_item_record *rec;
11324 struct extent_buffer *buf;
11325 rec = list_entry(list->next,
11326 struct root_item_record, list);
11328 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11329 if (!extent_buffer_uptodate(buf)) {
11330 free_extent_buffer(buf);
11334 ret = add_root_to_pending(buf, extent_cache, pending,
11335 seen, nodes, rec->objectid);
11339 * To rebuild extent tree, we need deal with snapshot
11340 * one by one, otherwise we deal with node firstly which
11341 * can maximize readahead.
11344 ret = run_next_block(root, bits, bits_nr, &last,
11345 pending, seen, reada, nodes,
11346 extent_cache, chunk_cache,
11347 dev_cache, block_group_cache,
11348 dev_extent_cache, rec);
11352 free_extent_buffer(buf);
11353 list_del(&rec->list);
11359 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11360 reada, nodes, extent_cache, chunk_cache,
11361 dev_cache, block_group_cache,
11362 dev_extent_cache, NULL);
11372 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11374 struct rb_root dev_cache;
11375 struct cache_tree chunk_cache;
11376 struct block_group_tree block_group_cache;
11377 struct device_extent_tree dev_extent_cache;
11378 struct cache_tree extent_cache;
11379 struct cache_tree seen;
11380 struct cache_tree pending;
11381 struct cache_tree reada;
11382 struct cache_tree nodes;
11383 struct extent_io_tree excluded_extents;
11384 struct cache_tree corrupt_blocks;
11385 struct btrfs_path path;
11386 struct btrfs_key key;
11387 struct btrfs_key found_key;
11389 struct block_info *bits;
11391 struct extent_buffer *leaf;
11393 struct btrfs_root_item ri;
11394 struct list_head dropping_trees;
11395 struct list_head normal_trees;
11396 struct btrfs_root *root1;
11397 struct btrfs_root *root;
11401 root = fs_info->fs_root;
11402 dev_cache = RB_ROOT;
11403 cache_tree_init(&chunk_cache);
11404 block_group_tree_init(&block_group_cache);
11405 device_extent_tree_init(&dev_extent_cache);
11407 cache_tree_init(&extent_cache);
11408 cache_tree_init(&seen);
11409 cache_tree_init(&pending);
11410 cache_tree_init(&nodes);
11411 cache_tree_init(&reada);
11412 cache_tree_init(&corrupt_blocks);
11413 extent_io_tree_init(&excluded_extents);
11414 INIT_LIST_HEAD(&dropping_trees);
11415 INIT_LIST_HEAD(&normal_trees);
11418 fs_info->excluded_extents = &excluded_extents;
11419 fs_info->fsck_extent_cache = &extent_cache;
11420 fs_info->free_extent_hook = free_extent_hook;
11421 fs_info->corrupt_blocks = &corrupt_blocks;
11425 bits = malloc(bits_nr * sizeof(struct block_info));
11431 if (ctx.progress_enabled) {
11432 ctx.tp = TASK_EXTENTS;
11433 task_start(ctx.info);
11437 root1 = fs_info->tree_root;
11438 level = btrfs_header_level(root1->node);
11439 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11440 root1->node->start, 0, level, 0, NULL);
11443 root1 = fs_info->chunk_root;
11444 level = btrfs_header_level(root1->node);
11445 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11446 root1->node->start, 0, level, 0, NULL);
11449 btrfs_init_path(&path);
11452 key.type = BTRFS_ROOT_ITEM_KEY;
11453 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11457 leaf = path.nodes[0];
11458 slot = path.slots[0];
11459 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11460 ret = btrfs_next_leaf(root, &path);
11463 leaf = path.nodes[0];
11464 slot = path.slots[0];
11466 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11467 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11468 unsigned long offset;
11471 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11472 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11473 last_snapshot = btrfs_root_last_snapshot(&ri);
11474 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11475 level = btrfs_root_level(&ri);
11476 ret = add_root_item_to_list(&normal_trees,
11477 found_key.objectid,
11478 btrfs_root_bytenr(&ri),
11479 last_snapshot, level,
11484 level = btrfs_root_level(&ri);
11485 objectid = found_key.objectid;
11486 btrfs_disk_key_to_cpu(&found_key,
11487 &ri.drop_progress);
11488 ret = add_root_item_to_list(&dropping_trees,
11490 btrfs_root_bytenr(&ri),
11491 last_snapshot, level,
11492 ri.drop_level, &found_key);
11499 btrfs_release_path(&path);
11502 * check_block can return -EAGAIN if it fixes something, please keep
11503 * this in mind when dealing with return values from these functions, if
11504 * we get -EAGAIN we want to fall through and restart the loop.
11506 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11507 &seen, &reada, &nodes, &extent_cache,
11508 &chunk_cache, &dev_cache, &block_group_cache,
11509 &dev_extent_cache);
11511 if (ret == -EAGAIN)
11515 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11516 &pending, &seen, &reada, &nodes,
11517 &extent_cache, &chunk_cache, &dev_cache,
11518 &block_group_cache, &dev_extent_cache);
11520 if (ret == -EAGAIN)
11525 ret = check_chunks(&chunk_cache, &block_group_cache,
11526 &dev_extent_cache, NULL, NULL, NULL, 0);
11528 if (ret == -EAGAIN)
11533 ret = check_extent_refs(root, &extent_cache);
11535 if (ret == -EAGAIN)
11540 ret = check_devices(&dev_cache, &dev_extent_cache);
11545 task_stop(ctx.info);
11547 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11548 extent_io_tree_cleanup(&excluded_extents);
11549 fs_info->fsck_extent_cache = NULL;
11550 fs_info->free_extent_hook = NULL;
11551 fs_info->corrupt_blocks = NULL;
11552 fs_info->excluded_extents = NULL;
11555 free_chunk_cache_tree(&chunk_cache);
11556 free_device_cache_tree(&dev_cache);
11557 free_block_group_tree(&block_group_cache);
11558 free_device_extent_tree(&dev_extent_cache);
11559 free_extent_cache_tree(&seen);
11560 free_extent_cache_tree(&pending);
11561 free_extent_cache_tree(&reada);
11562 free_extent_cache_tree(&nodes);
11563 free_root_item_list(&normal_trees);
11564 free_root_item_list(&dropping_trees);
11567 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11568 free_extent_cache_tree(&seen);
11569 free_extent_cache_tree(&pending);
11570 free_extent_cache_tree(&reada);
11571 free_extent_cache_tree(&nodes);
11572 free_chunk_cache_tree(&chunk_cache);
11573 free_block_group_tree(&block_group_cache);
11574 free_device_cache_tree(&dev_cache);
11575 free_device_extent_tree(&dev_extent_cache);
11576 free_extent_record_cache(&extent_cache);
11577 free_root_item_list(&normal_trees);
11578 free_root_item_list(&dropping_trees);
11579 extent_io_tree_cleanup(&excluded_extents);
11583 static int check_extent_inline_ref(struct extent_buffer *eb,
11584 struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11587 u8 type = btrfs_extent_inline_ref_type(eb, iref);
11590 case BTRFS_TREE_BLOCK_REF_KEY:
11591 case BTRFS_EXTENT_DATA_REF_KEY:
11592 case BTRFS_SHARED_BLOCK_REF_KEY:
11593 case BTRFS_SHARED_DATA_REF_KEY:
11597 error("extent[%llu %u %llu] has unknown ref type: %d",
11598 key->objectid, key->type, key->offset, type);
11599 ret = UNKNOWN_TYPE;
11607 * Check backrefs of a tree block given by @bytenr or @eb.
11609 * @root: the root containing the @bytenr or @eb
11610 * @eb: tree block extent buffer, can be NULL
11611 * @bytenr: bytenr of the tree block to search
11612 * @level: tree level of the tree block
11613 * @owner: owner of the tree block
11615 * Return >0 for any error found and output error message
11616 * Return 0 for no error found
11618 static int check_tree_block_ref(struct btrfs_root *root,
11619 struct extent_buffer *eb, u64 bytenr,
11620 int level, u64 owner, struct node_refs *nrefs)
11622 struct btrfs_key key;
11623 struct btrfs_root *extent_root = root->fs_info->extent_root;
11624 struct btrfs_path path;
11625 struct btrfs_extent_item *ei;
11626 struct btrfs_extent_inline_ref *iref;
11627 struct extent_buffer *leaf;
11632 int root_level = btrfs_header_level(root->node);
11634 u32 nodesize = root->fs_info->nodesize;
11637 int tree_reloc_root = 0;
11644 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11645 btrfs_header_bytenr(root->node) == bytenr)
11646 tree_reloc_root = 1;
11647 btrfs_init_path(&path);
11648 key.objectid = bytenr;
11649 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11650 key.type = BTRFS_METADATA_ITEM_KEY;
11652 key.type = BTRFS_EXTENT_ITEM_KEY;
11653 key.offset = (u64)-1;
11655 /* Search for the backref in extent tree */
11656 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11658 err |= BACKREF_MISSING;
11661 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11663 err |= BACKREF_MISSING;
11667 leaf = path.nodes[0];
11668 slot = path.slots[0];
11669 btrfs_item_key_to_cpu(leaf, &key, slot);
11671 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11673 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11674 skinny_level = (int)key.offset;
11675 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11677 struct btrfs_tree_block_info *info;
11679 info = (struct btrfs_tree_block_info *)(ei + 1);
11680 skinny_level = btrfs_tree_block_level(leaf, info);
11681 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11690 * Due to the feature of shared tree blocks, if the upper node
11691 * is a fs root or shared node, the extent of checked node may
11692 * not be updated until the next CoW.
11695 strict = should_check_extent_strictly(root, nrefs,
11697 if (!(btrfs_extent_flags(leaf, ei) &
11698 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11700 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11701 key.objectid, nodesize,
11702 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11703 err = BACKREF_MISMATCH;
11705 header_gen = btrfs_header_generation(eb);
11706 extent_gen = btrfs_extent_generation(leaf, ei);
11707 if (header_gen != extent_gen) {
11709 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11710 key.objectid, nodesize, header_gen,
11712 err = BACKREF_MISMATCH;
11714 if (level != skinny_level) {
11716 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11717 key.objectid, nodesize, level, skinny_level);
11718 err = BACKREF_MISMATCH;
11720 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11722 "extent[%llu %u] is referred by other roots than %llu",
11723 key.objectid, nodesize, root->objectid);
11724 err = BACKREF_MISMATCH;
11729 * Iterate the extent/metadata item to find the exact backref
11731 item_size = btrfs_item_size_nr(leaf, slot);
11732 ptr = (unsigned long)iref;
11733 end = (unsigned long)ei + item_size;
11735 while (ptr < end) {
11736 iref = (struct btrfs_extent_inline_ref *)ptr;
11737 type = btrfs_extent_inline_ref_type(leaf, iref);
11738 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11740 ret = check_extent_inline_ref(leaf, &key, iref);
11745 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11746 if (offset == root->objectid)
11748 if (!strict && owner == offset)
11750 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11752 * Backref of tree reloc root points to itself, no need
11753 * to check backref any more.
11755 if (tree_reloc_root) {
11759 * Check if the backref points to valid
11762 found_ref = !check_tree_block_ref( root, NULL,
11763 offset, level + 1, owner,
11770 ptr += btrfs_extent_inline_ref_size(type);
11774 * Inlined extent item doesn't have what we need, check
11775 * TREE_BLOCK_REF_KEY
11778 btrfs_release_path(&path);
11779 key.objectid = bytenr;
11780 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11781 key.offset = root->objectid;
11783 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11788 err |= BACKREF_MISSING;
11790 btrfs_release_path(&path);
11791 if (nrefs && strict &&
11792 level < root_level && nrefs->full_backref[level + 1])
11793 parent = nrefs->bytenr[level + 1];
11794 if (eb && (err & BACKREF_MISSING))
11796 "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11797 bytenr, nodesize, owner, level,
11798 parent ? "parent" : "root",
11799 parent ? parent : root->objectid);
11804 * If @err contains BACKREF_MISSING then add extent of the
11805 * file_extent_data_item.
11807 * Returns error bits after reapir.
11809 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11810 struct btrfs_root *root,
11811 struct btrfs_path *pathp,
11812 struct node_refs *nrefs,
11815 struct btrfs_file_extent_item *fi;
11816 struct btrfs_key fi_key;
11817 struct btrfs_key key;
11818 struct btrfs_extent_item *ei;
11819 struct btrfs_path path;
11820 struct btrfs_root *extent_root = root->fs_info->extent_root;
11821 struct extent_buffer *eb;
11833 eb = pathp->nodes[0];
11834 slot = pathp->slots[0];
11835 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11836 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11838 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11839 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11842 file_offset = fi_key.offset;
11843 generation = btrfs_file_extent_generation(eb, fi);
11844 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11845 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11846 extent_offset = btrfs_file_extent_offset(eb, fi);
11847 offset = file_offset - extent_offset;
11849 /* now repair only adds backref */
11850 if ((err & BACKREF_MISSING) == 0)
11853 /* search extent item */
11854 key.objectid = disk_bytenr;
11855 key.type = BTRFS_EXTENT_ITEM_KEY;
11856 key.offset = num_bytes;
11858 btrfs_init_path(&path);
11859 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11865 /* insert an extent item */
11867 key.objectid = disk_bytenr;
11868 key.type = BTRFS_EXTENT_ITEM_KEY;
11869 key.offset = num_bytes;
11870 size = sizeof(*ei);
11872 btrfs_release_path(&path);
11873 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11877 eb = path.nodes[0];
11878 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11880 btrfs_set_extent_refs(eb, ei, 0);
11881 btrfs_set_extent_generation(eb, ei, generation);
11882 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11884 btrfs_mark_buffer_dirty(eb);
11885 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11887 btrfs_release_path(&path);
11890 if (nrefs->full_backref[0])
11891 parent = btrfs_header_bytenr(eb);
11895 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11897 parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11901 "failed to increase extent data backref[%llu %llu] root %llu",
11902 disk_bytenr, num_bytes, root->objectid);
11905 printf("Add one extent data backref [%llu %llu]\n",
11906 disk_bytenr, num_bytes);
11909 err &= ~BACKREF_MISSING;
11912 error("can't repair root %llu extent data item[%llu %llu]",
11913 root->objectid, disk_bytenr, num_bytes);
11918 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11920 * Return >0 any error found and output error message
11921 * Return 0 for no error found
11923 static int check_extent_data_item(struct btrfs_root *root,
11924 struct btrfs_path *pathp,
11925 struct node_refs *nrefs, int account_bytes)
11927 struct btrfs_file_extent_item *fi;
11928 struct extent_buffer *eb = pathp->nodes[0];
11929 struct btrfs_path path;
11930 struct btrfs_root *extent_root = root->fs_info->extent_root;
11931 struct btrfs_key fi_key;
11932 struct btrfs_key dbref_key;
11933 struct extent_buffer *leaf;
11934 struct btrfs_extent_item *ei;
11935 struct btrfs_extent_inline_ref *iref;
11936 struct btrfs_extent_data_ref *dref;
11939 u64 disk_num_bytes;
11940 u64 extent_num_bytes;
11947 int found_dbackref = 0;
11948 int slot = pathp->slots[0];
11953 btrfs_item_key_to_cpu(eb, &fi_key, slot);
11954 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11956 /* Nothing to check for hole and inline data extents */
11957 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11958 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11961 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11962 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11963 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11965 /* Check unaligned disk_num_bytes and num_bytes */
11966 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11968 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11969 fi_key.objectid, fi_key.offset, disk_num_bytes,
11970 root->fs_info->sectorsize);
11971 err |= BYTES_UNALIGNED;
11972 } else if (account_bytes) {
11973 data_bytes_allocated += disk_num_bytes;
11975 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11977 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11978 fi_key.objectid, fi_key.offset, extent_num_bytes,
11979 root->fs_info->sectorsize);
11980 err |= BYTES_UNALIGNED;
11981 } else if (account_bytes) {
11982 data_bytes_referenced += extent_num_bytes;
11984 owner = btrfs_header_owner(eb);
11986 /* Check the extent item of the file extent in extent tree */
11987 btrfs_init_path(&path);
11988 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11989 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11990 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11992 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11996 leaf = path.nodes[0];
11997 slot = path.slots[0];
11998 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12000 extent_flags = btrfs_extent_flags(leaf, ei);
12002 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12004 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12005 disk_bytenr, disk_num_bytes,
12006 BTRFS_EXTENT_FLAG_DATA);
12007 err |= BACKREF_MISMATCH;
12010 /* Check data backref inside that extent item */
12011 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12012 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12013 ptr = (unsigned long)iref;
12014 end = (unsigned long)ei + item_size;
12015 strict = should_check_extent_strictly(root, nrefs, -1);
12017 while (ptr < end) {
12018 iref = (struct btrfs_extent_inline_ref *)ptr;
12019 type = btrfs_extent_inline_ref_type(leaf, iref);
12020 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12022 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12027 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12028 ref_root = btrfs_extent_data_ref_root(leaf, dref);
12029 if (ref_root == root->objectid)
12030 found_dbackref = 1;
12031 else if (!strict && owner == ref_root)
12032 found_dbackref = 1;
12033 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12034 found_dbackref = !check_tree_block_ref(root, NULL,
12035 btrfs_extent_inline_ref_offset(leaf, iref),
12039 if (found_dbackref)
12041 ptr += btrfs_extent_inline_ref_size(type);
12044 if (!found_dbackref) {
12045 btrfs_release_path(&path);
12047 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12048 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12049 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12050 dbref_key.offset = hash_extent_data_ref(root->objectid,
12051 fi_key.objectid, fi_key.offset);
12053 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12054 &dbref_key, &path, 0, 0);
12056 found_dbackref = 1;
12060 btrfs_release_path(&path);
12063 * Neither inlined nor EXTENT_DATA_REF found, try
12064 * SHARED_DATA_REF as last chance.
12066 dbref_key.objectid = disk_bytenr;
12067 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12068 dbref_key.offset = eb->start;
12070 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12071 &dbref_key, &path, 0, 0);
12073 found_dbackref = 1;
12079 if (!found_dbackref)
12080 err |= BACKREF_MISSING;
12081 btrfs_release_path(&path);
12082 if (err & BACKREF_MISSING) {
12083 error("data extent[%llu %llu] backref lost",
12084 disk_bytenr, disk_num_bytes);
12090 * Get real tree block level for the case like shared block
12091 * Return >= 0 as tree level
12092 * Return <0 for error
12094 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12096 struct extent_buffer *eb;
12097 struct btrfs_path path;
12098 struct btrfs_key key;
12099 struct btrfs_extent_item *ei;
12106 /* Search extent tree for extent generation and level */
12107 key.objectid = bytenr;
12108 key.type = BTRFS_METADATA_ITEM_KEY;
12109 key.offset = (u64)-1;
12111 btrfs_init_path(&path);
12112 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12115 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12123 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12124 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12125 struct btrfs_extent_item);
12126 flags = btrfs_extent_flags(path.nodes[0], ei);
12127 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12132 /* Get transid for later read_tree_block() check */
12133 transid = btrfs_extent_generation(path.nodes[0], ei);
12135 /* Get backref level as one source */
12136 if (key.type == BTRFS_METADATA_ITEM_KEY) {
12137 backref_level = key.offset;
12139 struct btrfs_tree_block_info *info;
12141 info = (struct btrfs_tree_block_info *)(ei + 1);
12142 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12144 btrfs_release_path(&path);
12146 /* Get level from tree block as an alternative source */
12147 eb = read_tree_block(fs_info, bytenr, transid);
12148 if (!extent_buffer_uptodate(eb)) {
12149 free_extent_buffer(eb);
12152 header_level = btrfs_header_level(eb);
12153 free_extent_buffer(eb);
12155 if (header_level != backref_level)
12157 return header_level;
12160 btrfs_release_path(&path);
12165 * Check if a tree block backref is valid (points to a valid tree block)
12166 * if level == -1, level will be resolved
12167 * Return >0 for any error found and print error message
12169 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12170 u64 bytenr, int level)
12172 struct btrfs_root *root;
12173 struct btrfs_key key;
12174 struct btrfs_path path;
12175 struct extent_buffer *eb;
12176 struct extent_buffer *node;
12177 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12181 /* Query level for level == -1 special case */
12183 level = query_tree_block_level(fs_info, bytenr);
12185 err |= REFERENCER_MISSING;
12189 key.objectid = root_id;
12190 key.type = BTRFS_ROOT_ITEM_KEY;
12191 key.offset = (u64)-1;
12193 root = btrfs_read_fs_root(fs_info, &key);
12194 if (IS_ERR(root)) {
12195 err |= REFERENCER_MISSING;
12199 /* Read out the tree block to get item/node key */
12200 eb = read_tree_block(fs_info, bytenr, 0);
12201 if (!extent_buffer_uptodate(eb)) {
12202 err |= REFERENCER_MISSING;
12203 free_extent_buffer(eb);
12207 /* Empty tree, no need to check key */
12208 if (!btrfs_header_nritems(eb) && !level) {
12209 free_extent_buffer(eb);
12214 btrfs_node_key_to_cpu(eb, &key, 0);
12216 btrfs_item_key_to_cpu(eb, &key, 0);
12218 free_extent_buffer(eb);
12220 btrfs_init_path(&path);
12221 path.lowest_level = level;
12222 /* Search with the first key, to ensure we can reach it */
12223 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12225 err |= REFERENCER_MISSING;
12229 node = path.nodes[level];
12230 if (btrfs_header_bytenr(node) != bytenr) {
12232 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12233 bytenr, nodesize, bytenr,
12234 btrfs_header_bytenr(node));
12235 err |= REFERENCER_MISMATCH;
12237 if (btrfs_header_level(node) != level) {
12239 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12240 bytenr, nodesize, level,
12241 btrfs_header_level(node));
12242 err |= REFERENCER_MISMATCH;
12246 btrfs_release_path(&path);
12248 if (err & REFERENCER_MISSING) {
12250 error("extent [%llu %d] lost referencer (owner: %llu)",
12251 bytenr, nodesize, root_id);
12254 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12255 bytenr, nodesize, root_id, level);
12262 * Check if tree block @eb is tree reloc root.
12263 * Return 0 if it's not or any problem happens
12264 * Return 1 if it's a tree reloc root
12266 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12267 struct extent_buffer *eb)
12269 struct btrfs_root *tree_reloc_root;
12270 struct btrfs_key key;
12271 u64 bytenr = btrfs_header_bytenr(eb);
12272 u64 owner = btrfs_header_owner(eb);
12275 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12276 key.offset = owner;
12277 key.type = BTRFS_ROOT_ITEM_KEY;
12279 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12280 if (IS_ERR(tree_reloc_root))
12283 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12285 btrfs_free_fs_root(tree_reloc_root);
12290 * Check referencer for shared block backref
12291 * If level == -1, this function will resolve the level.
12293 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12294 u64 parent, u64 bytenr, int level)
12296 struct extent_buffer *eb;
12298 int found_parent = 0;
12301 eb = read_tree_block(fs_info, parent, 0);
12302 if (!extent_buffer_uptodate(eb))
12306 level = query_tree_block_level(fs_info, bytenr);
12310 /* It's possible it's a tree reloc root */
12311 if (parent == bytenr) {
12312 if (is_tree_reloc_root(fs_info, eb))
12317 if (level + 1 != btrfs_header_level(eb))
12320 nr = btrfs_header_nritems(eb);
12321 for (i = 0; i < nr; i++) {
12322 if (bytenr == btrfs_node_blockptr(eb, i)) {
12328 free_extent_buffer(eb);
12329 if (!found_parent) {
12331 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12332 bytenr, fs_info->nodesize, parent, level);
12333 return REFERENCER_MISSING;
12339 * Check referencer for normal (inlined) data ref
12340 * If len == 0, it will be resolved by searching in extent tree
12342 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12343 u64 root_id, u64 objectid, u64 offset,
12344 u64 bytenr, u64 len, u32 count)
12346 struct btrfs_root *root;
12347 struct btrfs_root *extent_root = fs_info->extent_root;
12348 struct btrfs_key key;
12349 struct btrfs_path path;
12350 struct extent_buffer *leaf;
12351 struct btrfs_file_extent_item *fi;
12352 u32 found_count = 0;
12357 key.objectid = bytenr;
12358 key.type = BTRFS_EXTENT_ITEM_KEY;
12359 key.offset = (u64)-1;
12361 btrfs_init_path(&path);
12362 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12365 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12368 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12369 if (key.objectid != bytenr ||
12370 key.type != BTRFS_EXTENT_ITEM_KEY)
12373 btrfs_release_path(&path);
12375 key.objectid = root_id;
12376 key.type = BTRFS_ROOT_ITEM_KEY;
12377 key.offset = (u64)-1;
12378 btrfs_init_path(&path);
12380 root = btrfs_read_fs_root(fs_info, &key);
12384 key.objectid = objectid;
12385 key.type = BTRFS_EXTENT_DATA_KEY;
12387 * It can be nasty as data backref offset is
12388 * file offset - file extent offset, which is smaller or
12389 * equal to original backref offset. The only special case is
12390 * overflow. So we need to special check and do further search.
12392 key.offset = offset & (1ULL << 63) ? 0 : offset;
12394 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12399 * Search afterwards to get correct one
12400 * NOTE: As we must do a comprehensive check on the data backref to
12401 * make sure the dref count also matches, we must iterate all file
12402 * extents for that inode.
12405 leaf = path.nodes[0];
12406 slot = path.slots[0];
12408 if (slot >= btrfs_header_nritems(leaf))
12410 btrfs_item_key_to_cpu(leaf, &key, slot);
12411 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12413 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12415 * Except normal disk bytenr and disk num bytes, we still
12416 * need to do extra check on dbackref offset as
12417 * dbackref offset = file_offset - file_extent_offset
12419 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12420 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12421 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12426 ret = btrfs_next_item(root, &path);
12431 btrfs_release_path(&path);
12432 if (found_count != count) {
12434 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12435 bytenr, len, root_id, objectid, offset, count, found_count);
12436 return REFERENCER_MISSING;
12442 * Check if the referencer of a shared data backref exists
12444 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12445 u64 parent, u64 bytenr)
12447 struct extent_buffer *eb;
12448 struct btrfs_key key;
12449 struct btrfs_file_extent_item *fi;
12451 int found_parent = 0;
12454 eb = read_tree_block(fs_info, parent, 0);
12455 if (!extent_buffer_uptodate(eb))
12458 nr = btrfs_header_nritems(eb);
12459 for (i = 0; i < nr; i++) {
12460 btrfs_item_key_to_cpu(eb, &key, i);
12461 if (key.type != BTRFS_EXTENT_DATA_KEY)
12464 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12465 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12468 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12475 free_extent_buffer(eb);
12476 if (!found_parent) {
12477 error("shared extent %llu referencer lost (parent: %llu)",
12479 return REFERENCER_MISSING;
12485 * Only delete backref if REFERENCER_MISSING now
12487 * Returns <0 the extent was deleted
12488 * Returns >0 the backref was deleted but extent still exists, returned value
12489 * means error after repair
12490 * Returns 0 nothing happened
12492 static int repair_extent_item(struct btrfs_trans_handle *trans,
12493 struct btrfs_root *root, struct btrfs_path *path,
12494 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12495 u64 owner, u64 offset, int err)
12497 struct btrfs_key old_key;
12501 btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12503 if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12504 /* delete the backref */
12505 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12506 num_bytes, parent, root_objectid, owner, offset);
12509 err &= ~REFERENCER_MISSING;
12510 printf("Delete backref in extent [%llu %llu]\n",
12511 bytenr, num_bytes);
12513 error("fail to delete backref in extent [%llu %llu]",
12514 bytenr, num_bytes);
12518 /* btrfs_free_extent may delete the extent */
12519 btrfs_release_path(path);
12520 ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12530 * This function will check a given extent item, including its backref and
12531 * itself (like crossing stripe boundary and type)
12533 * Since we don't use extent_record anymore, introduce new error bit
12535 static int check_extent_item(struct btrfs_trans_handle *trans,
12536 struct btrfs_fs_info *fs_info,
12537 struct btrfs_path *path)
12539 struct btrfs_extent_item *ei;
12540 struct btrfs_extent_inline_ref *iref;
12541 struct btrfs_extent_data_ref *dref;
12542 struct extent_buffer *eb = path->nodes[0];
12545 int slot = path->slots[0];
12547 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12548 u32 item_size = btrfs_item_size_nr(eb, slot);
12558 struct btrfs_key key;
12562 btrfs_item_key_to_cpu(eb, &key, slot);
12563 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12564 bytes_used += key.offset;
12565 num_bytes = key.offset;
12567 bytes_used += nodesize;
12568 num_bytes = nodesize;
12571 if (item_size < sizeof(*ei)) {
12573 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12574 * old thing when on disk format is still un-determined.
12575 * No need to care about it anymore
12577 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12581 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12582 flags = btrfs_extent_flags(eb, ei);
12584 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12586 if (metadata && check_crossing_stripes(global_info, key.objectid,
12588 error("bad metadata [%llu, %llu) crossing stripe boundary",
12589 key.objectid, key.objectid + nodesize);
12590 err |= CROSSING_STRIPE_BOUNDARY;
12593 ptr = (unsigned long)(ei + 1);
12595 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12596 /* Old EXTENT_ITEM metadata */
12597 struct btrfs_tree_block_info *info;
12599 info = (struct btrfs_tree_block_info *)ptr;
12600 level = btrfs_tree_block_level(eb, info);
12601 ptr += sizeof(struct btrfs_tree_block_info);
12603 /* New METADATA_ITEM */
12604 level = key.offset;
12606 end = (unsigned long)ei + item_size;
12609 /* Reached extent item end normally */
12613 /* Beyond extent item end, wrong item size */
12615 err |= ITEM_SIZE_MISMATCH;
12616 error("extent item at bytenr %llu slot %d has wrong size",
12625 /* Now check every backref in this extent item */
12626 iref = (struct btrfs_extent_inline_ref *)ptr;
12627 type = btrfs_extent_inline_ref_type(eb, iref);
12628 offset = btrfs_extent_inline_ref_offset(eb, iref);
12630 case BTRFS_TREE_BLOCK_REF_KEY:
12631 root_objectid = offset;
12633 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12637 case BTRFS_SHARED_BLOCK_REF_KEY:
12639 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12643 case BTRFS_EXTENT_DATA_REF_KEY:
12644 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12645 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12646 owner = btrfs_extent_data_ref_objectid(eb, dref);
12647 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12648 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12649 owner_offset, key.objectid, key.offset,
12650 btrfs_extent_data_ref_count(eb, dref));
12653 case BTRFS_SHARED_DATA_REF_KEY:
12655 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12659 error("extent[%llu %d %llu] has unknown ref type: %d",
12660 key.objectid, key.type, key.offset, type);
12661 ret = UNKNOWN_TYPE;
12666 if (err && repair) {
12667 ret = repair_extent_item(trans, fs_info->extent_root, path,
12668 key.objectid, num_bytes, parent, root_objectid,
12669 owner, owner_offset, ret);
12678 ptr += btrfs_extent_inline_ref_size(type);
12686 * Check if a dev extent item is referred correctly by its chunk
12688 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12689 struct extent_buffer *eb, int slot)
12691 struct btrfs_root *chunk_root = fs_info->chunk_root;
12692 struct btrfs_dev_extent *ptr;
12693 struct btrfs_path path;
12694 struct btrfs_key chunk_key;
12695 struct btrfs_key devext_key;
12696 struct btrfs_chunk *chunk;
12697 struct extent_buffer *l;
12701 int found_chunk = 0;
12704 btrfs_item_key_to_cpu(eb, &devext_key, slot);
12705 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12706 length = btrfs_dev_extent_length(eb, ptr);
12708 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12709 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12710 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12712 btrfs_init_path(&path);
12713 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12718 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12719 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12724 if (btrfs_stripe_length(fs_info, l, chunk) != length)
12727 num_stripes = btrfs_chunk_num_stripes(l, chunk);
12728 for (i = 0; i < num_stripes; i++) {
12729 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12730 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12732 if (devid == devext_key.objectid &&
12733 offset == devext_key.offset) {
12739 btrfs_release_path(&path);
12740 if (!found_chunk) {
12742 "device extent[%llu, %llu, %llu] did not find the related chunk",
12743 devext_key.objectid, devext_key.offset, length);
12744 return REFERENCER_MISSING;
12750 * Check if the used space is correct with the dev item
12752 static int check_dev_item(struct btrfs_fs_info *fs_info,
12753 struct extent_buffer *eb, int slot)
12755 struct btrfs_root *dev_root = fs_info->dev_root;
12756 struct btrfs_dev_item *dev_item;
12757 struct btrfs_path path;
12758 struct btrfs_key key;
12759 struct btrfs_dev_extent *ptr;
12765 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12766 dev_id = btrfs_device_id(eb, dev_item);
12767 used = btrfs_device_bytes_used(eb, dev_item);
12769 key.objectid = dev_id;
12770 key.type = BTRFS_DEV_EXTENT_KEY;
12773 btrfs_init_path(&path);
12774 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12776 btrfs_item_key_to_cpu(eb, &key, slot);
12777 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12778 key.objectid, key.type, key.offset);
12779 btrfs_release_path(&path);
12780 return REFERENCER_MISSING;
12783 /* Iterate dev_extents to calculate the used space of a device */
12785 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12788 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12789 if (key.objectid > dev_id)
12791 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12794 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12795 struct btrfs_dev_extent);
12796 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12798 ret = btrfs_next_item(dev_root, &path);
12802 btrfs_release_path(&path);
12804 if (used != total) {
12805 btrfs_item_key_to_cpu(eb, &key, slot);
12807 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12808 total, used, BTRFS_ROOT_TREE_OBJECTID,
12809 BTRFS_DEV_EXTENT_KEY, dev_id);
12810 return ACCOUNTING_MISMATCH;
12816 * Check a block group item with its referener (chunk) and its used space
12817 * with extent/metadata item
12819 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12820 struct extent_buffer *eb, int slot)
12822 struct btrfs_root *extent_root = fs_info->extent_root;
12823 struct btrfs_root *chunk_root = fs_info->chunk_root;
12824 struct btrfs_block_group_item *bi;
12825 struct btrfs_block_group_item bg_item;
12826 struct btrfs_path path;
12827 struct btrfs_key bg_key;
12828 struct btrfs_key chunk_key;
12829 struct btrfs_key extent_key;
12830 struct btrfs_chunk *chunk;
12831 struct extent_buffer *leaf;
12832 struct btrfs_extent_item *ei;
12833 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12841 btrfs_item_key_to_cpu(eb, &bg_key, slot);
12842 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12843 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12844 used = btrfs_block_group_used(&bg_item);
12845 bg_flags = btrfs_block_group_flags(&bg_item);
12847 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12848 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12849 chunk_key.offset = bg_key.objectid;
12851 btrfs_init_path(&path);
12852 /* Search for the referencer chunk */
12853 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12856 "block group[%llu %llu] did not find the related chunk item",
12857 bg_key.objectid, bg_key.offset);
12858 err |= REFERENCER_MISSING;
12860 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12861 struct btrfs_chunk);
12862 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12865 "block group[%llu %llu] related chunk item length does not match",
12866 bg_key.objectid, bg_key.offset);
12867 err |= REFERENCER_MISMATCH;
12870 btrfs_release_path(&path);
12872 /* Search from the block group bytenr */
12873 extent_key.objectid = bg_key.objectid;
12874 extent_key.type = 0;
12875 extent_key.offset = 0;
12877 btrfs_init_path(&path);
12878 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12882 /* Iterate extent tree to account used space */
12884 leaf = path.nodes[0];
12886 /* Search slot can point to the last item beyond leaf nritems */
12887 if (path.slots[0] >= btrfs_header_nritems(leaf))
12890 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12891 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12894 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12895 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12897 if (extent_key.objectid < bg_key.objectid)
12900 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12903 total += extent_key.offset;
12905 ei = btrfs_item_ptr(leaf, path.slots[0],
12906 struct btrfs_extent_item);
12907 flags = btrfs_extent_flags(leaf, ei);
12908 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12909 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12911 "bad extent[%llu, %llu) type mismatch with chunk",
12912 extent_key.objectid,
12913 extent_key.objectid + extent_key.offset);
12914 err |= CHUNK_TYPE_MISMATCH;
12916 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12917 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12918 BTRFS_BLOCK_GROUP_METADATA))) {
12920 "bad extent[%llu, %llu) type mismatch with chunk",
12921 extent_key.objectid,
12922 extent_key.objectid + nodesize);
12923 err |= CHUNK_TYPE_MISMATCH;
12927 ret = btrfs_next_item(extent_root, &path);
12933 btrfs_release_path(&path);
12935 if (total != used) {
12937 "block group[%llu %llu] used %llu but extent items used %llu",
12938 bg_key.objectid, bg_key.offset, used, total);
12939 err |= BG_ACCOUNTING_ERROR;
12945 * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12946 * FIXME: We still need to repair error of dev_item.
12948 * Returns error after repair.
12950 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12951 struct btrfs_root *chunk_root,
12952 struct btrfs_path *path, int err)
12954 struct btrfs_chunk *chunk;
12955 struct btrfs_key chunk_key;
12956 struct extent_buffer *eb = path->nodes[0];
12958 int slot = path->slots[0];
12962 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12963 if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12965 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12966 type = btrfs_chunk_type(path->nodes[0], chunk);
12967 length = btrfs_chunk_length(eb, chunk);
12969 if (err & REFERENCER_MISSING) {
12970 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12971 type, chunk_key.objectid, chunk_key.offset, length);
12973 error("fail to add block group item[%llu %llu]",
12974 chunk_key.offset, length);
12977 err &= ~REFERENCER_MISSING;
12978 printf("Added block group item[%llu %llu]\n",
12979 chunk_key.offset, length);
12988 * Check a chunk item.
12989 * Including checking all referred dev_extents and block group
12991 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12992 struct extent_buffer *eb, int slot)
12994 struct btrfs_root *extent_root = fs_info->extent_root;
12995 struct btrfs_root *dev_root = fs_info->dev_root;
12996 struct btrfs_path path;
12997 struct btrfs_key chunk_key;
12998 struct btrfs_key bg_key;
12999 struct btrfs_key devext_key;
13000 struct btrfs_chunk *chunk;
13001 struct extent_buffer *leaf;
13002 struct btrfs_block_group_item *bi;
13003 struct btrfs_block_group_item bg_item;
13004 struct btrfs_dev_extent *ptr;
13016 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13017 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13018 length = btrfs_chunk_length(eb, chunk);
13019 chunk_end = chunk_key.offset + length;
13020 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13023 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13025 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13028 type = btrfs_chunk_type(eb, chunk);
13030 bg_key.objectid = chunk_key.offset;
13031 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13032 bg_key.offset = length;
13034 btrfs_init_path(&path);
13035 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13038 "chunk[%llu %llu) did not find the related block group item",
13039 chunk_key.offset, chunk_end);
13040 err |= REFERENCER_MISSING;
13042 leaf = path.nodes[0];
13043 bi = btrfs_item_ptr(leaf, path.slots[0],
13044 struct btrfs_block_group_item);
13045 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13047 if (btrfs_block_group_flags(&bg_item) != type) {
13049 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13050 chunk_key.offset, chunk_end, type,
13051 btrfs_block_group_flags(&bg_item));
13052 err |= REFERENCER_MISSING;
13056 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13057 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13058 for (i = 0; i < num_stripes; i++) {
13059 btrfs_release_path(&path);
13060 btrfs_init_path(&path);
13061 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13062 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13063 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13065 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13068 goto not_match_dev;
13070 leaf = path.nodes[0];
13071 ptr = btrfs_item_ptr(leaf, path.slots[0],
13072 struct btrfs_dev_extent);
13073 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13074 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13075 if (objectid != chunk_key.objectid ||
13076 offset != chunk_key.offset ||
13077 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13078 goto not_match_dev;
13081 err |= BACKREF_MISSING;
13083 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13084 chunk_key.objectid, chunk_end, i);
13087 btrfs_release_path(&path);
13092 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13093 struct btrfs_root *root,
13094 struct btrfs_path *path)
13096 struct btrfs_key key;
13099 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13100 btrfs_release_path(path);
13101 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13107 ret = btrfs_del_item(trans, root, path);
13111 if (path->slots[0] == 0)
13112 btrfs_prev_leaf(root, path);
13117 error("failed to delete root %llu item[%llu, %u, %llu]",
13118 root->objectid, key.objectid, key.type, key.offset);
13120 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13121 root->objectid, key.objectid, key.type, key.offset);
13126 * Main entry function to check known items and update related accounting info
13128 static int check_leaf_items(struct btrfs_trans_handle *trans,
13129 struct btrfs_root *root, struct btrfs_path *path,
13130 struct node_refs *nrefs, int account_bytes)
13132 struct btrfs_fs_info *fs_info = root->fs_info;
13133 struct btrfs_key key;
13134 struct extent_buffer *eb;
13137 struct btrfs_extent_data_ref *dref;
13142 eb = path->nodes[0];
13143 slot = path->slots[0];
13144 if (slot >= btrfs_header_nritems(eb)) {
13146 error("empty leaf [%llu %u] root %llu", eb->start,
13147 root->fs_info->nodesize, root->objectid);
13153 btrfs_item_key_to_cpu(eb, &key, slot);
13157 case BTRFS_EXTENT_DATA_KEY:
13158 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13160 ret = repair_extent_data_item(trans, root, path, nrefs,
13164 case BTRFS_BLOCK_GROUP_ITEM_KEY:
13165 ret = check_block_group_item(fs_info, eb, slot);
13167 ret & REFERENCER_MISSING)
13168 ret = delete_extent_tree_item(trans, root, path);
13171 case BTRFS_DEV_ITEM_KEY:
13172 ret = check_dev_item(fs_info, eb, slot);
13175 case BTRFS_CHUNK_ITEM_KEY:
13176 ret = check_chunk_item(fs_info, eb, slot);
13178 ret = repair_chunk_item(trans, root, path, ret);
13181 case BTRFS_DEV_EXTENT_KEY:
13182 ret = check_dev_extent_item(fs_info, eb, slot);
13185 case BTRFS_EXTENT_ITEM_KEY:
13186 case BTRFS_METADATA_ITEM_KEY:
13187 ret = check_extent_item(trans, fs_info, path);
13190 case BTRFS_EXTENT_CSUM_KEY:
13191 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13194 case BTRFS_TREE_BLOCK_REF_KEY:
13195 ret = check_tree_block_backref(fs_info, key.offset,
13198 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13199 ret = delete_extent_tree_item(trans, root, path);
13202 case BTRFS_EXTENT_DATA_REF_KEY:
13203 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13204 ret = check_extent_data_backref(fs_info,
13205 btrfs_extent_data_ref_root(eb, dref),
13206 btrfs_extent_data_ref_objectid(eb, dref),
13207 btrfs_extent_data_ref_offset(eb, dref),
13209 btrfs_extent_data_ref_count(eb, dref));
13211 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13212 ret = delete_extent_tree_item(trans, root, path);
13215 case BTRFS_SHARED_BLOCK_REF_KEY:
13216 ret = check_shared_block_backref(fs_info, key.offset,
13219 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13220 ret = delete_extent_tree_item(trans, root, path);
13223 case BTRFS_SHARED_DATA_REF_KEY:
13224 ret = check_shared_data_backref(fs_info, key.offset,
13227 ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13228 ret = delete_extent_tree_item(trans, root, path);
13241 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13244 * Low memory usage version check_chunks_and_extents.
13246 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13248 struct btrfs_trans_handle *trans = NULL;
13249 struct btrfs_path path;
13250 struct btrfs_key old_key;
13251 struct btrfs_key key;
13252 struct btrfs_root *root1;
13253 struct btrfs_root *root;
13254 struct btrfs_root *cur_root;
13258 root = fs_info->fs_root;
13261 /* pin every tree block to avoid extent overwrite */
13262 ret = pin_metadata_blocks(fs_info);
13264 error("failed to pin metadata blocks");
13267 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13268 if (IS_ERR(trans)) {
13269 error("failed to start transaction before check");
13270 return PTR_ERR(trans);
13274 root1 = root->fs_info->chunk_root;
13275 ret = check_btrfs_root(trans, root1, 0, 1);
13278 root1 = root->fs_info->tree_root;
13279 ret = check_btrfs_root(trans, root1, 0, 1);
13282 btrfs_init_path(&path);
13283 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13285 key.type = BTRFS_ROOT_ITEM_KEY;
13287 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13289 error("cannot find extent tree in tree_root");
13294 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13295 if (key.type != BTRFS_ROOT_ITEM_KEY)
13298 key.offset = (u64)-1;
13300 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13301 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13304 cur_root = btrfs_read_fs_root(root->fs_info, &key);
13305 if (IS_ERR(cur_root) || !cur_root) {
13306 error("failed to read tree: %lld", key.objectid);
13310 ret = check_btrfs_root(trans, cur_root, 0, 1);
13313 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13314 btrfs_free_fs_root(cur_root);
13316 btrfs_release_path(&path);
13317 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13318 &old_key, &path, 0, 0);
13322 ret = btrfs_next_item(root1, &path);
13328 /* if repair, update block accounting */
13330 ret = btrfs_fix_block_accounting(trans, root);
13334 err &= ~BG_ACCOUNTING_ERROR;
13338 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13340 btrfs_release_path(&path);
13345 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13349 if (!ctx.progress_enabled)
13350 fprintf(stderr, "checking extents\n");
13351 if (check_mode == CHECK_MODE_LOWMEM)
13352 ret = check_chunks_and_extents_v2(fs_info);
13354 ret = check_chunks_and_extents(fs_info);
13359 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13360 struct btrfs_root *root, int overwrite)
13362 struct extent_buffer *c;
13363 struct extent_buffer *old = root->node;
13366 struct btrfs_disk_key disk_key = {0,0,0};
13372 extent_buffer_get(c);
13375 c = btrfs_alloc_free_block(trans, root,
13376 root->fs_info->nodesize,
13377 root->root_key.objectid,
13378 &disk_key, level, 0, 0);
13381 extent_buffer_get(c);
13385 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13386 btrfs_set_header_level(c, level);
13387 btrfs_set_header_bytenr(c, c->start);
13388 btrfs_set_header_generation(c, trans->transid);
13389 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13390 btrfs_set_header_owner(c, root->root_key.objectid);
13392 write_extent_buffer(c, root->fs_info->fsid,
13393 btrfs_header_fsid(), BTRFS_FSID_SIZE);
13395 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13396 btrfs_header_chunk_tree_uuid(c),
13399 btrfs_mark_buffer_dirty(c);
13401 * this case can happen in the following case:
13403 * 1.overwrite previous root.
13405 * 2.reinit reloc data root, this is because we skip pin
13406 * down reloc data tree before which means we can allocate
13407 * same block bytenr here.
13409 if (old->start == c->start) {
13410 btrfs_set_root_generation(&root->root_item,
13412 root->root_item.level = btrfs_header_level(root->node);
13413 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13414 &root->root_key, &root->root_item);
13416 free_extent_buffer(c);
13420 free_extent_buffer(old);
13422 add_root_to_dirty_list(root);
13426 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13427 struct extent_buffer *eb, int tree_root)
13429 struct extent_buffer *tmp;
13430 struct btrfs_root_item *ri;
13431 struct btrfs_key key;
13433 int level = btrfs_header_level(eb);
13439 * If we have pinned this block before, don't pin it again.
13440 * This can not only avoid forever loop with broken filesystem
13441 * but also give us some speedups.
13443 if (test_range_bit(&fs_info->pinned_extents, eb->start,
13444 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13447 btrfs_pin_extent(fs_info, eb->start, eb->len);
13449 nritems = btrfs_header_nritems(eb);
13450 for (i = 0; i < nritems; i++) {
13452 btrfs_item_key_to_cpu(eb, &key, i);
13453 if (key.type != BTRFS_ROOT_ITEM_KEY)
13455 /* Skip the extent root and reloc roots */
13456 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13457 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13458 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13460 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13461 bytenr = btrfs_disk_root_bytenr(eb, ri);
13464 * If at any point we start needing the real root we
13465 * will have to build a stump root for the root we are
13466 * in, but for now this doesn't actually use the root so
13467 * just pass in extent_root.
13469 tmp = read_tree_block(fs_info, bytenr, 0);
13470 if (!extent_buffer_uptodate(tmp)) {
13471 fprintf(stderr, "Error reading root block\n");
13474 ret = pin_down_tree_blocks(fs_info, tmp, 0);
13475 free_extent_buffer(tmp);
13479 bytenr = btrfs_node_blockptr(eb, i);
13481 /* If we aren't the tree root don't read the block */
13482 if (level == 1 && !tree_root) {
13483 btrfs_pin_extent(fs_info, bytenr,
13484 fs_info->nodesize);
13488 tmp = read_tree_block(fs_info, bytenr, 0);
13489 if (!extent_buffer_uptodate(tmp)) {
13490 fprintf(stderr, "Error reading tree block\n");
13493 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13494 free_extent_buffer(tmp);
13503 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13507 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13511 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13514 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13516 struct btrfs_block_group_cache *cache;
13517 struct btrfs_path path;
13518 struct extent_buffer *leaf;
13519 struct btrfs_chunk *chunk;
13520 struct btrfs_key key;
13524 btrfs_init_path(&path);
13526 key.type = BTRFS_CHUNK_ITEM_KEY;
13528 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13530 btrfs_release_path(&path);
13535 * We do this in case the block groups were screwed up and had alloc
13536 * bits that aren't actually set on the chunks. This happens with
13537 * restored images every time and could happen in real life I guess.
13539 fs_info->avail_data_alloc_bits = 0;
13540 fs_info->avail_metadata_alloc_bits = 0;
13541 fs_info->avail_system_alloc_bits = 0;
13543 /* First we need to create the in-memory block groups */
13545 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13546 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13548 btrfs_release_path(&path);
13556 leaf = path.nodes[0];
13557 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13558 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13563 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13564 btrfs_add_block_group(fs_info, 0,
13565 btrfs_chunk_type(leaf, chunk),
13566 key.objectid, key.offset,
13567 btrfs_chunk_length(leaf, chunk));
13568 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13569 key.offset + btrfs_chunk_length(leaf, chunk));
13574 cache = btrfs_lookup_first_block_group(fs_info, start);
13578 start = cache->key.objectid + cache->key.offset;
13581 btrfs_release_path(&path);
13585 static int reset_balance(struct btrfs_trans_handle *trans,
13586 struct btrfs_fs_info *fs_info)
13588 struct btrfs_root *root = fs_info->tree_root;
13589 struct btrfs_path path;
13590 struct extent_buffer *leaf;
13591 struct btrfs_key key;
13592 int del_slot, del_nr = 0;
13596 btrfs_init_path(&path);
13597 key.objectid = BTRFS_BALANCE_OBJECTID;
13598 key.type = BTRFS_BALANCE_ITEM_KEY;
13600 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13605 goto reinit_data_reloc;
13610 ret = btrfs_del_item(trans, root, &path);
13613 btrfs_release_path(&path);
13615 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13616 key.type = BTRFS_ROOT_ITEM_KEY;
13618 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13622 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13627 ret = btrfs_del_items(trans, root, &path,
13634 btrfs_release_path(&path);
13637 ret = btrfs_search_slot(trans, root, &key, &path,
13644 leaf = path.nodes[0];
13645 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13646 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13648 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13653 del_slot = path.slots[0];
13662 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13666 btrfs_release_path(&path);
13669 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13670 key.type = BTRFS_ROOT_ITEM_KEY;
13671 key.offset = (u64)-1;
13672 root = btrfs_read_fs_root(fs_info, &key);
13673 if (IS_ERR(root)) {
13674 fprintf(stderr, "Error reading data reloc tree\n");
13675 ret = PTR_ERR(root);
13678 record_root_in_trans(trans, root);
13679 ret = btrfs_fsck_reinit_root(trans, root, 0);
13682 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13684 btrfs_release_path(&path);
13688 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13689 struct btrfs_fs_info *fs_info)
13695 * The only reason we don't do this is because right now we're just
13696 * walking the trees we find and pinning down their bytes, we don't look
13697 * at any of the leaves. In order to do mixed groups we'd have to check
13698 * the leaves of any fs roots and pin down the bytes for any file
13699 * extents we find. Not hard but why do it if we don't have to?
13701 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13702 fprintf(stderr, "We don't support re-initing the extent tree "
13703 "for mixed block groups yet, please notify a btrfs "
13704 "developer you want to do this so they can add this "
13705 "functionality.\n");
13710 * first we need to walk all of the trees except the extent tree and pin
13711 * down the bytes that are in use so we don't overwrite any existing
13714 ret = pin_metadata_blocks(fs_info);
13716 fprintf(stderr, "error pinning down used bytes\n");
13721 * Need to drop all the block groups since we're going to recreate all
13724 btrfs_free_block_groups(fs_info);
13725 ret = reset_block_groups(fs_info);
13727 fprintf(stderr, "error resetting the block groups\n");
13731 /* Ok we can allocate now, reinit the extent root */
13732 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13734 fprintf(stderr, "extent root initialization failed\n");
13736 * When the transaction code is updated we should end the
13737 * transaction, but for now progs only knows about commit so
13738 * just return an error.
13744 * Now we have all the in-memory block groups setup so we can make
13745 * allocations properly, and the metadata we care about is safe since we
13746 * pinned all of it above.
13749 struct btrfs_block_group_cache *cache;
13751 cache = btrfs_lookup_first_block_group(fs_info, start);
13754 start = cache->key.objectid + cache->key.offset;
13755 ret = btrfs_insert_item(trans, fs_info->extent_root,
13756 &cache->key, &cache->item,
13757 sizeof(cache->item));
13759 fprintf(stderr, "Error adding block group\n");
13762 btrfs_extent_post_op(trans, fs_info->extent_root);
13765 ret = reset_balance(trans, fs_info);
13767 fprintf(stderr, "error resetting the pending balance\n");
13772 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13774 struct btrfs_path path;
13775 struct btrfs_trans_handle *trans;
13776 struct btrfs_key key;
13779 printf("Recowing metadata block %llu\n", eb->start);
13780 key.objectid = btrfs_header_owner(eb);
13781 key.type = BTRFS_ROOT_ITEM_KEY;
13782 key.offset = (u64)-1;
13784 root = btrfs_read_fs_root(root->fs_info, &key);
13785 if (IS_ERR(root)) {
13786 fprintf(stderr, "Couldn't find owner root %llu\n",
13788 return PTR_ERR(root);
13791 trans = btrfs_start_transaction(root, 1);
13793 return PTR_ERR(trans);
13795 btrfs_init_path(&path);
13796 path.lowest_level = btrfs_header_level(eb);
13797 if (path.lowest_level)
13798 btrfs_node_key_to_cpu(eb, &key, 0);
13800 btrfs_item_key_to_cpu(eb, &key, 0);
13802 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13803 btrfs_commit_transaction(trans, root);
13804 btrfs_release_path(&path);
13808 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13810 struct btrfs_path path;
13811 struct btrfs_trans_handle *trans;
13812 struct btrfs_key key;
13815 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13816 bad->key.type, bad->key.offset);
13817 key.objectid = bad->root_id;
13818 key.type = BTRFS_ROOT_ITEM_KEY;
13819 key.offset = (u64)-1;
13821 root = btrfs_read_fs_root(root->fs_info, &key);
13822 if (IS_ERR(root)) {
13823 fprintf(stderr, "Couldn't find owner root %llu\n",
13825 return PTR_ERR(root);
13828 trans = btrfs_start_transaction(root, 1);
13830 return PTR_ERR(trans);
13832 btrfs_init_path(&path);
13833 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13839 ret = btrfs_del_item(trans, root, &path);
13841 btrfs_commit_transaction(trans, root);
13842 btrfs_release_path(&path);
13846 static int zero_log_tree(struct btrfs_root *root)
13848 struct btrfs_trans_handle *trans;
13851 trans = btrfs_start_transaction(root, 1);
13852 if (IS_ERR(trans)) {
13853 ret = PTR_ERR(trans);
13856 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13857 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13858 ret = btrfs_commit_transaction(trans, root);
13862 static int populate_csum(struct btrfs_trans_handle *trans,
13863 struct btrfs_root *csum_root, char *buf, u64 start,
13866 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13871 while (offset < len) {
13872 sectorsize = fs_info->sectorsize;
13873 ret = read_extent_data(fs_info, buf, start + offset,
13877 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13878 start + offset, buf, sectorsize);
13881 offset += sectorsize;
13886 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13887 struct btrfs_root *csum_root,
13888 struct btrfs_root *cur_root)
13890 struct btrfs_path path;
13891 struct btrfs_key key;
13892 struct extent_buffer *node;
13893 struct btrfs_file_extent_item *fi;
13900 buf = malloc(cur_root->fs_info->sectorsize);
13904 btrfs_init_path(&path);
13908 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13911 /* Iterate all regular file extents and fill its csum */
13913 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13915 if (key.type != BTRFS_EXTENT_DATA_KEY)
13917 node = path.nodes[0];
13918 slot = path.slots[0];
13919 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13920 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13922 start = btrfs_file_extent_disk_bytenr(node, fi);
13923 len = btrfs_file_extent_disk_num_bytes(node, fi);
13925 ret = populate_csum(trans, csum_root, buf, start, len);
13926 if (ret == -EEXIST)
13932 * TODO: if next leaf is corrupted, jump to nearest next valid
13935 ret = btrfs_next_item(cur_root, &path);
13945 btrfs_release_path(&path);
13950 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13951 struct btrfs_root *csum_root)
13953 struct btrfs_fs_info *fs_info = csum_root->fs_info;
13954 struct btrfs_path path;
13955 struct btrfs_root *tree_root = fs_info->tree_root;
13956 struct btrfs_root *cur_root;
13957 struct extent_buffer *node;
13958 struct btrfs_key key;
13962 btrfs_init_path(&path);
13963 key.objectid = BTRFS_FS_TREE_OBJECTID;
13965 key.type = BTRFS_ROOT_ITEM_KEY;
13966 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13975 node = path.nodes[0];
13976 slot = path.slots[0];
13977 btrfs_item_key_to_cpu(node, &key, slot);
13978 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13980 if (key.type != BTRFS_ROOT_ITEM_KEY)
13982 if (!is_fstree(key.objectid))
13984 key.offset = (u64)-1;
13986 cur_root = btrfs_read_fs_root(fs_info, &key);
13987 if (IS_ERR(cur_root) || !cur_root) {
13988 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13992 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13997 ret = btrfs_next_item(tree_root, &path);
14007 btrfs_release_path(&path);
14011 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14012 struct btrfs_root *csum_root)
14014 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14015 struct btrfs_path path;
14016 struct btrfs_extent_item *ei;
14017 struct extent_buffer *leaf;
14019 struct btrfs_key key;
14022 btrfs_init_path(&path);
14024 key.type = BTRFS_EXTENT_ITEM_KEY;
14026 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14028 btrfs_release_path(&path);
14032 buf = malloc(csum_root->fs_info->sectorsize);
14034 btrfs_release_path(&path);
14039 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14040 ret = btrfs_next_leaf(extent_root, &path);
14048 leaf = path.nodes[0];
14050 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14051 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14056 ei = btrfs_item_ptr(leaf, path.slots[0],
14057 struct btrfs_extent_item);
14058 if (!(btrfs_extent_flags(leaf, ei) &
14059 BTRFS_EXTENT_FLAG_DATA)) {
14064 ret = populate_csum(trans, csum_root, buf, key.objectid,
14071 btrfs_release_path(&path);
14077 * Recalculate the csum and put it into the csum tree.
14079 * Extent tree init will wipe out all the extent info, so in that case, we
14080 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
14081 * will use fs/subvol trees to init the csum tree.
14083 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14084 struct btrfs_root *csum_root,
14085 int search_fs_tree)
14087 if (search_fs_tree)
14088 return fill_csum_tree_from_fs(trans, csum_root);
14090 return fill_csum_tree_from_extent(trans, csum_root);
14093 static void free_roots_info_cache(void)
14095 if (!roots_info_cache)
14098 while (!cache_tree_empty(roots_info_cache)) {
14099 struct cache_extent *entry;
14100 struct root_item_info *rii;
14102 entry = first_cache_extent(roots_info_cache);
14105 remove_cache_extent(roots_info_cache, entry);
14106 rii = container_of(entry, struct root_item_info, cache_extent);
14110 free(roots_info_cache);
14111 roots_info_cache = NULL;
14114 static int build_roots_info_cache(struct btrfs_fs_info *info)
14117 struct btrfs_key key;
14118 struct extent_buffer *leaf;
14119 struct btrfs_path path;
14121 if (!roots_info_cache) {
14122 roots_info_cache = malloc(sizeof(*roots_info_cache));
14123 if (!roots_info_cache)
14125 cache_tree_init(roots_info_cache);
14128 btrfs_init_path(&path);
14130 key.type = BTRFS_EXTENT_ITEM_KEY;
14132 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14135 leaf = path.nodes[0];
14138 struct btrfs_key found_key;
14139 struct btrfs_extent_item *ei;
14140 struct btrfs_extent_inline_ref *iref;
14141 int slot = path.slots[0];
14146 struct cache_extent *entry;
14147 struct root_item_info *rii;
14149 if (slot >= btrfs_header_nritems(leaf)) {
14150 ret = btrfs_next_leaf(info->extent_root, &path);
14157 leaf = path.nodes[0];
14158 slot = path.slots[0];
14161 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14163 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14164 found_key.type != BTRFS_METADATA_ITEM_KEY)
14167 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14168 flags = btrfs_extent_flags(leaf, ei);
14170 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14171 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14174 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14175 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14176 level = found_key.offset;
14178 struct btrfs_tree_block_info *binfo;
14180 binfo = (struct btrfs_tree_block_info *)(ei + 1);
14181 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14182 level = btrfs_tree_block_level(leaf, binfo);
14186 * For a root extent, it must be of the following type and the
14187 * first (and only one) iref in the item.
14189 type = btrfs_extent_inline_ref_type(leaf, iref);
14190 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14193 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14194 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14196 rii = malloc(sizeof(struct root_item_info));
14201 rii->cache_extent.start = root_id;
14202 rii->cache_extent.size = 1;
14203 rii->level = (u8)-1;
14204 entry = &rii->cache_extent;
14205 ret = insert_cache_extent(roots_info_cache, entry);
14208 rii = container_of(entry, struct root_item_info,
14212 ASSERT(rii->cache_extent.start == root_id);
14213 ASSERT(rii->cache_extent.size == 1);
14215 if (level > rii->level || rii->level == (u8)-1) {
14216 rii->level = level;
14217 rii->bytenr = found_key.objectid;
14218 rii->gen = btrfs_extent_generation(leaf, ei);
14219 rii->node_count = 1;
14220 } else if (level == rii->level) {
14228 btrfs_release_path(&path);
14233 static int maybe_repair_root_item(struct btrfs_path *path,
14234 const struct btrfs_key *root_key,
14235 const int read_only_mode)
14237 const u64 root_id = root_key->objectid;
14238 struct cache_extent *entry;
14239 struct root_item_info *rii;
14240 struct btrfs_root_item ri;
14241 unsigned long offset;
14243 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14246 "Error: could not find extent items for root %llu\n",
14247 root_key->objectid);
14251 rii = container_of(entry, struct root_item_info, cache_extent);
14252 ASSERT(rii->cache_extent.start == root_id);
14253 ASSERT(rii->cache_extent.size == 1);
14255 if (rii->node_count != 1) {
14257 "Error: could not find btree root extent for root %llu\n",
14262 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14263 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14265 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14266 btrfs_root_level(&ri) != rii->level ||
14267 btrfs_root_generation(&ri) != rii->gen) {
14270 * If we're in repair mode but our caller told us to not update
14271 * the root item, i.e. just check if it needs to be updated, don't
14272 * print this message, since the caller will call us again shortly
14273 * for the same root item without read only mode (the caller will
14274 * open a transaction first).
14276 if (!(read_only_mode && repair))
14278 "%sroot item for root %llu,"
14279 " current bytenr %llu, current gen %llu, current level %u,"
14280 " new bytenr %llu, new gen %llu, new level %u\n",
14281 (read_only_mode ? "" : "fixing "),
14283 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14284 btrfs_root_level(&ri),
14285 rii->bytenr, rii->gen, rii->level);
14287 if (btrfs_root_generation(&ri) > rii->gen) {
14289 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14290 root_id, btrfs_root_generation(&ri), rii->gen);
14294 if (!read_only_mode) {
14295 btrfs_set_root_bytenr(&ri, rii->bytenr);
14296 btrfs_set_root_level(&ri, rii->level);
14297 btrfs_set_root_generation(&ri, rii->gen);
14298 write_extent_buffer(path->nodes[0], &ri,
14299 offset, sizeof(ri));
14309 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14310 * caused read-only snapshots to be corrupted if they were created at a moment
14311 * when the source subvolume/snapshot had orphan items. The issue was that the
14312 * on-disk root items became incorrect, referring to the pre orphan cleanup root
14313 * node instead of the post orphan cleanup root node.
14314 * So this function, and its callees, just detects and fixes those cases. Even
14315 * though the regression was for read-only snapshots, this function applies to
14316 * any snapshot/subvolume root.
14317 * This must be run before any other repair code - not doing it so, makes other
14318 * repair code delete or modify backrefs in the extent tree for example, which
14319 * will result in an inconsistent fs after repairing the root items.
14321 static int repair_root_items(struct btrfs_fs_info *info)
14323 struct btrfs_path path;
14324 struct btrfs_key key;
14325 struct extent_buffer *leaf;
14326 struct btrfs_trans_handle *trans = NULL;
14329 int need_trans = 0;
14331 btrfs_init_path(&path);
14333 ret = build_roots_info_cache(info);
14337 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14338 key.type = BTRFS_ROOT_ITEM_KEY;
14343 * Avoid opening and committing transactions if a leaf doesn't have
14344 * any root items that need to be fixed, so that we avoid rotating
14345 * backup roots unnecessarily.
14348 trans = btrfs_start_transaction(info->tree_root, 1);
14349 if (IS_ERR(trans)) {
14350 ret = PTR_ERR(trans);
14355 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14359 leaf = path.nodes[0];
14362 struct btrfs_key found_key;
14364 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14365 int no_more_keys = find_next_key(&path, &key);
14367 btrfs_release_path(&path);
14369 ret = btrfs_commit_transaction(trans,
14381 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14383 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14385 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14388 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14392 if (!trans && repair) {
14395 btrfs_release_path(&path);
14405 free_roots_info_cache();
14406 btrfs_release_path(&path);
14408 btrfs_commit_transaction(trans, info->tree_root);
14415 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14417 struct btrfs_trans_handle *trans;
14418 struct btrfs_block_group_cache *bg_cache;
14422 /* Clear all free space cache inodes and its extent data */
14424 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14427 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14430 current = bg_cache->key.objectid + bg_cache->key.offset;
14433 /* Don't forget to set cache_generation to -1 */
14434 trans = btrfs_start_transaction(fs_info->tree_root, 0);
14435 if (IS_ERR(trans)) {
14436 error("failed to update super block cache generation");
14437 return PTR_ERR(trans);
14439 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14440 btrfs_commit_transaction(trans, fs_info->tree_root);
14445 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14450 if (clear_version == 1) {
14451 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14453 "free space cache v2 detected, use --clear-space-cache v2");
14457 printf("Clearing free space cache\n");
14458 ret = clear_free_space_cache(fs_info);
14460 error("failed to clear free space cache");
14463 printf("Free space cache cleared\n");
14465 } else if (clear_version == 2) {
14466 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14467 printf("no free space cache v2 to clear\n");
14471 printf("Clear free space cache v2\n");
14472 ret = btrfs_clear_free_space_tree(fs_info);
14474 error("failed to clear free space cache v2: %d", ret);
14477 printf("free space cache v2 cleared\n");
14484 const char * const cmd_check_usage[] = {
14485 "btrfs check [options] <device>",
14486 "Check structural integrity of a filesystem (unmounted).",
14487 "Check structural integrity of an unmounted filesystem. Verify internal",
14488 "trees' consistency and item connectivity. In the repair mode try to",
14489 "fix the problems found. ",
14490 "WARNING: the repair mode is considered dangerous",
14492 "-s|--super <superblock> use this superblock copy",
14493 "-b|--backup use the first valid backup root copy",
14494 "--force skip mount checks, repair is not possible",
14495 "--repair try to repair the filesystem",
14496 "--readonly run in read-only mode (default)",
14497 "--init-csum-tree create a new CRC tree",
14498 "--init-extent-tree create a new extent tree",
14499 "--mode <MODE> allows choice of memory/IO trade-offs",
14500 " where MODE is one of:",
14501 " original - read inodes and extents to memory (requires",
14502 " more memory, does less IO)",
14503 " lowmem - try to use less memory but read blocks again",
14505 "--check-data-csum verify checksums of data blocks",
14506 "-Q|--qgroup-report print a report on qgroup consistency",
14507 "-E|--subvol-extents <subvolid>",
14508 " print subvolume extents and sharing state",
14509 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
14510 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
14511 "-p|--progress indicate progress",
14512 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
14516 int cmd_check(int argc, char **argv)
14518 struct cache_tree root_cache;
14519 struct btrfs_root *root;
14520 struct btrfs_fs_info *info;
14523 u64 tree_root_bytenr = 0;
14524 u64 chunk_root_bytenr = 0;
14525 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14529 int init_csum_tree = 0;
14531 int clear_space_cache = 0;
14532 int qgroup_report = 0;
14533 int qgroups_repaired = 0;
14534 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14539 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14540 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14541 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14542 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14543 GETOPT_VAL_FORCE };
14544 static const struct option long_options[] = {
14545 { "super", required_argument, NULL, 's' },
14546 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14547 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14548 { "init-csum-tree", no_argument, NULL,
14549 GETOPT_VAL_INIT_CSUM },
14550 { "init-extent-tree", no_argument, NULL,
14551 GETOPT_VAL_INIT_EXTENT },
14552 { "check-data-csum", no_argument, NULL,
14553 GETOPT_VAL_CHECK_CSUM },
14554 { "backup", no_argument, NULL, 'b' },
14555 { "subvol-extents", required_argument, NULL, 'E' },
14556 { "qgroup-report", no_argument, NULL, 'Q' },
14557 { "tree-root", required_argument, NULL, 'r' },
14558 { "chunk-root", required_argument, NULL,
14559 GETOPT_VAL_CHUNK_TREE },
14560 { "progress", no_argument, NULL, 'p' },
14561 { "mode", required_argument, NULL,
14563 { "clear-space-cache", required_argument, NULL,
14564 GETOPT_VAL_CLEAR_SPACE_CACHE},
14565 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14566 { NULL, 0, NULL, 0}
14569 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14573 case 'a': /* ignored */ break;
14575 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14578 num = arg_strtou64(optarg);
14579 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14581 "super mirror should be less than %d",
14582 BTRFS_SUPER_MIRROR_MAX);
14585 bytenr = btrfs_sb_offset(((int)num));
14586 printf("using SB copy %llu, bytenr %llu\n", num,
14587 (unsigned long long)bytenr);
14593 subvolid = arg_strtou64(optarg);
14596 tree_root_bytenr = arg_strtou64(optarg);
14598 case GETOPT_VAL_CHUNK_TREE:
14599 chunk_root_bytenr = arg_strtou64(optarg);
14602 ctx.progress_enabled = true;
14606 usage(cmd_check_usage);
14607 case GETOPT_VAL_REPAIR:
14608 printf("enabling repair mode\n");
14610 ctree_flags |= OPEN_CTREE_WRITES;
14612 case GETOPT_VAL_READONLY:
14615 case GETOPT_VAL_INIT_CSUM:
14616 printf("Creating a new CRC tree\n");
14617 init_csum_tree = 1;
14619 ctree_flags |= OPEN_CTREE_WRITES;
14621 case GETOPT_VAL_INIT_EXTENT:
14622 init_extent_tree = 1;
14623 ctree_flags |= (OPEN_CTREE_WRITES |
14624 OPEN_CTREE_NO_BLOCK_GROUPS);
14627 case GETOPT_VAL_CHECK_CSUM:
14628 check_data_csum = 1;
14630 case GETOPT_VAL_MODE:
14631 check_mode = parse_check_mode(optarg);
14632 if (check_mode == CHECK_MODE_UNKNOWN) {
14633 error("unknown mode: %s", optarg);
14637 case GETOPT_VAL_CLEAR_SPACE_CACHE:
14638 if (strcmp(optarg, "v1") == 0) {
14639 clear_space_cache = 1;
14640 } else if (strcmp(optarg, "v2") == 0) {
14641 clear_space_cache = 2;
14642 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14645 "invalid argument to --clear-space-cache, must be v1 or v2");
14648 ctree_flags |= OPEN_CTREE_WRITES;
14650 case GETOPT_VAL_FORCE:
14656 if (check_argc_exact(argc - optind, 1))
14657 usage(cmd_check_usage);
14659 if (ctx.progress_enabled) {
14660 ctx.tp = TASK_NOTHING;
14661 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14664 /* This check is the only reason for --readonly to exist */
14665 if (readonly && repair) {
14666 error("repair options are not compatible with --readonly");
14671 * experimental and dangerous
14673 if (repair && check_mode == CHECK_MODE_LOWMEM)
14674 warning("low-memory mode repair support is only partial");
14677 cache_tree_init(&root_cache);
14679 ret = check_mounted(argv[optind]);
14682 error("could not check mount status: %s",
14688 "%s is currently mounted, use --force if you really intend to check the filesystem",
14696 error("repair and --force is not yet supported");
14703 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14707 "filesystem mounted, continuing because of --force");
14709 /* A block device is mounted in exclusive mode by kernel */
14710 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14713 /* only allow partial opening under repair mode */
14715 ctree_flags |= OPEN_CTREE_PARTIAL;
14717 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14718 chunk_root_bytenr, ctree_flags);
14720 error("cannot open file system");
14726 global_info = info;
14727 root = info->fs_root;
14728 uuid_unparse(info->super_copy->fsid, uuidbuf);
14730 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14733 * Check the bare minimum before starting anything else that could rely
14734 * on it, namely the tree roots, any local consistency checks
14736 if (!extent_buffer_uptodate(info->tree_root->node) ||
14737 !extent_buffer_uptodate(info->dev_root->node) ||
14738 !extent_buffer_uptodate(info->chunk_root->node)) {
14739 error("critical roots corrupted, unable to check the filesystem");
14745 if (clear_space_cache) {
14746 ret = do_clear_free_space_cache(info, clear_space_cache);
14752 * repair mode will force us to commit transaction which
14753 * will make us fail to load log tree when mounting.
14755 if (repair && btrfs_super_log_root(info->super_copy)) {
14756 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14762 ret = zero_log_tree(root);
14765 error("failed to zero log tree: %d", ret);
14770 if (qgroup_report) {
14771 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14773 ret = qgroup_verify_all(info);
14780 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14781 subvolid, argv[optind], uuidbuf);
14782 ret = print_extent_state(info, subvolid);
14787 if (init_extent_tree || init_csum_tree) {
14788 struct btrfs_trans_handle *trans;
14790 trans = btrfs_start_transaction(info->extent_root, 0);
14791 if (IS_ERR(trans)) {
14792 error("error starting transaction");
14793 ret = PTR_ERR(trans);
14798 if (init_extent_tree) {
14799 printf("Creating a new extent tree\n");
14800 ret = reinit_extent_tree(trans, info);
14806 if (init_csum_tree) {
14807 printf("Reinitialize checksum tree\n");
14808 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14810 error("checksum tree initialization failed: %d",
14817 ret = fill_csum_tree(trans, info->csum_root,
14821 error("checksum tree refilling failed: %d", ret);
14826 * Ok now we commit and run the normal fsck, which will add
14827 * extent entries for all of the items it finds.
14829 ret = btrfs_commit_transaction(trans, info->extent_root);
14834 if (!extent_buffer_uptodate(info->extent_root->node)) {
14835 error("critical: extent_root, unable to check the filesystem");
14840 if (!extent_buffer_uptodate(info->csum_root->node)) {
14841 error("critical: csum_root, unable to check the filesystem");
14847 if (!init_extent_tree) {
14848 ret = repair_root_items(info);
14851 error("failed to repair root items: %s", strerror(-ret));
14855 fprintf(stderr, "Fixed %d roots.\n", ret);
14857 } else if (ret > 0) {
14859 "Found %d roots with an outdated root item.\n",
14862 "Please run a filesystem check with the option --repair to fix them.\n");
14869 ret = do_check_chunks_and_extents(info);
14873 "errors found in extent allocation tree or chunk allocation");
14875 if (!ctx.progress_enabled) {
14876 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14877 fprintf(stderr, "checking free space tree\n");
14879 fprintf(stderr, "checking free space cache\n");
14881 ret = check_space_cache(root);
14884 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14885 error("errors found in free space tree");
14887 error("errors found in free space cache");
14892 * We used to have to have these hole extents in between our real
14893 * extents so if we don't have this flag set we need to make sure there
14894 * are no gaps in the file extents for inodes, otherwise we can just
14895 * ignore it when this happens.
14897 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14898 ret = do_check_fs_roots(info, &root_cache);
14901 error("errors found in fs roots");
14905 fprintf(stderr, "checking csums\n");
14906 ret = check_csums(root);
14909 error("errors found in csum tree");
14913 fprintf(stderr, "checking root refs\n");
14914 /* For low memory mode, check_fs_roots_v2 handles root refs */
14915 if (check_mode != CHECK_MODE_LOWMEM) {
14916 ret = check_root_refs(root, &root_cache);
14919 error("errors found in root refs");
14924 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14925 struct extent_buffer *eb;
14927 eb = list_first_entry(&root->fs_info->recow_ebs,
14928 struct extent_buffer, recow);
14929 list_del_init(&eb->recow);
14930 ret = recow_extent_buffer(root, eb);
14933 error("fails to fix transid errors");
14938 while (!list_empty(&delete_items)) {
14939 struct bad_item *bad;
14941 bad = list_first_entry(&delete_items, struct bad_item, list);
14942 list_del_init(&bad->list);
14944 ret = delete_bad_item(root, bad);
14950 if (info->quota_enabled) {
14951 fprintf(stderr, "checking quota groups\n");
14952 ret = qgroup_verify_all(info);
14955 error("failed to check quota groups");
14959 ret = repair_qgroups(info, &qgroups_repaired);
14962 error("failed to repair quota groups");
14968 if (!list_empty(&root->fs_info->recow_ebs)) {
14969 error("transid errors in file system");
14974 printf("found %llu bytes used, ",
14975 (unsigned long long)bytes_used);
14977 printf("error(s) found\n");
14979 printf("no error found\n");
14980 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14981 printf("total tree bytes: %llu\n",
14982 (unsigned long long)total_btree_bytes);
14983 printf("total fs tree bytes: %llu\n",
14984 (unsigned long long)total_fs_tree_bytes);
14985 printf("total extent tree bytes: %llu\n",
14986 (unsigned long long)total_extent_tree_bytes);
14987 printf("btree space waste bytes: %llu\n",
14988 (unsigned long long)btree_space_waste);
14989 printf("file data blocks allocated: %llu\n referenced %llu\n",
14990 (unsigned long long)data_bytes_allocated,
14991 (unsigned long long)data_bytes_referenced);
14993 free_qgroup_counts();
14994 free_root_recs_tree(&root_cache);
14998 if (ctx.progress_enabled)
14999 task_deinit(ctx.info);