2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (cur + sizeof(*di) + name_len > total ||
1516 name_len > BTRFS_NAME_LEN) {
1517 error = REF_ERR_NAME_TOO_LONG;
1519 if (cur + sizeof(*di) > total)
1521 len = min_t(u32, total - cur - sizeof(*di),
1528 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530 if (location.type == BTRFS_INODE_ITEM_KEY) {
1531 add_inode_backref(inode_cache, location.objectid,
1532 key->objectid, key->offset, namebuf,
1533 len, filetype, key->type, error);
1534 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1535 add_inode_backref(root_cache, location.objectid,
1536 key->objectid, key->offset,
1537 namebuf, len, filetype,
1540 fprintf(stderr, "invalid location in dir item %u\n",
1542 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1543 key->objectid, key->offset, namebuf,
1544 len, filetype, key->type, error);
1547 len = sizeof(*di) + name_len + data_len;
1548 di = (struct btrfs_dir_item *)((char *)di + len);
1551 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1552 rec->errors |= I_ERR_DUP_DIR_INDEX;
1557 static int process_inode_ref(struct extent_buffer *eb,
1558 int slot, struct btrfs_key *key,
1559 struct shared_node *active_node)
1567 struct cache_tree *inode_cache;
1568 struct btrfs_inode_ref *ref;
1569 char namebuf[BTRFS_NAME_LEN];
1571 inode_cache = &active_node->inode_cache;
1573 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1574 total = btrfs_item_size_nr(eb, slot);
1575 while (cur < total) {
1576 name_len = btrfs_inode_ref_name_len(eb, ref);
1577 index = btrfs_inode_ref_index(eb, ref);
1579 /* inode_ref + namelen should not cross item boundary */
1580 if (cur + sizeof(*ref) + name_len > total ||
1581 name_len > BTRFS_NAME_LEN) {
1582 if (total < cur + sizeof(*ref))
1585 /* Still try to read out the remaining part */
1586 len = min_t(u32, total - cur - sizeof(*ref),
1588 error = REF_ERR_NAME_TOO_LONG;
1594 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1595 add_inode_backref(inode_cache, key->objectid, key->offset,
1596 index, namebuf, len, 0, key->type, error);
1598 len = sizeof(*ref) + name_len;
1599 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1605 static int process_inode_extref(struct extent_buffer *eb,
1606 int slot, struct btrfs_key *key,
1607 struct shared_node *active_node)
1616 struct cache_tree *inode_cache;
1617 struct btrfs_inode_extref *extref;
1618 char namebuf[BTRFS_NAME_LEN];
1620 inode_cache = &active_node->inode_cache;
1622 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1623 total = btrfs_item_size_nr(eb, slot);
1624 while (cur < total) {
1625 name_len = btrfs_inode_extref_name_len(eb, extref);
1626 index = btrfs_inode_extref_index(eb, extref);
1627 parent = btrfs_inode_extref_parent(eb, extref);
1628 if (name_len <= BTRFS_NAME_LEN) {
1632 len = BTRFS_NAME_LEN;
1633 error = REF_ERR_NAME_TOO_LONG;
1635 read_extent_buffer(eb, namebuf,
1636 (unsigned long)(extref + 1), len);
1637 add_inode_backref(inode_cache, key->objectid, parent,
1638 index, namebuf, len, 0, key->type, error);
1640 len = sizeof(*extref) + name_len;
1641 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1648 static int count_csum_range(struct btrfs_root *root, u64 start,
1649 u64 len, u64 *found)
1651 struct btrfs_key key;
1652 struct btrfs_path path;
1653 struct extent_buffer *leaf;
1658 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660 btrfs_init_path(&path);
1662 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664 key.type = BTRFS_EXTENT_CSUM_KEY;
1666 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1670 if (ret > 0 && path.slots[0] > 0) {
1671 leaf = path.nodes[0];
1672 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1673 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1674 key.type == BTRFS_EXTENT_CSUM_KEY)
1679 leaf = path.nodes[0];
1680 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1681 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1686 leaf = path.nodes[0];
1689 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1690 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1691 key.type != BTRFS_EXTENT_CSUM_KEY)
1694 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1695 if (key.offset >= start + len)
1698 if (key.offset > start)
1701 size = btrfs_item_size_nr(leaf, path.slots[0]);
1702 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1703 if (csum_end > start) {
1704 size = min(csum_end - start, len);
1713 btrfs_release_path(&path);
1719 static int process_file_extent(struct btrfs_root *root,
1720 struct extent_buffer *eb,
1721 int slot, struct btrfs_key *key,
1722 struct shared_node *active_node)
1724 struct inode_record *rec;
1725 struct btrfs_file_extent_item *fi;
1727 u64 disk_bytenr = 0;
1728 u64 extent_offset = 0;
1729 u64 mask = root->sectorsize - 1;
1733 rec = active_node->current;
1734 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1735 rec->found_file_extent = 1;
1737 if (rec->extent_start == (u64)-1) {
1738 rec->extent_start = key->offset;
1739 rec->extent_end = key->offset;
1742 if (rec->extent_end > key->offset)
1743 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1744 else if (rec->extent_end < key->offset) {
1745 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1746 key->offset - rec->extent_end);
1751 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1752 extent_type = btrfs_file_extent_type(eb, fi);
1754 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1755 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1757 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1758 rec->found_size += num_bytes;
1759 num_bytes = (num_bytes + mask) & ~mask;
1760 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1761 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1763 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1764 extent_offset = btrfs_file_extent_offset(eb, fi);
1765 if (num_bytes == 0 || (num_bytes & mask))
1766 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767 if (num_bytes + extent_offset >
1768 btrfs_file_extent_ram_bytes(eb, fi))
1769 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1770 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1771 (btrfs_file_extent_compression(eb, fi) ||
1772 btrfs_file_extent_encryption(eb, fi) ||
1773 btrfs_file_extent_other_encoding(eb, fi)))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (disk_bytenr > 0)
1776 rec->found_size += num_bytes;
1778 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780 rec->extent_end = key->offset + num_bytes;
1783 * The data reloc tree will copy full extents into its inode and then
1784 * copy the corresponding csums. Because the extent it copied could be
1785 * a preallocated extent that hasn't been written to yet there may be no
1786 * csums to copy, ergo we won't have csums for our file extent. This is
1787 * ok so just don't bother checking csums if the inode belongs to the
1790 if (disk_bytenr > 0 &&
1791 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1793 if (btrfs_file_extent_compression(eb, fi))
1794 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1796 disk_bytenr += extent_offset;
1798 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1803 rec->found_csum_item = 1;
1804 if (found < num_bytes)
1805 rec->some_csum_missing = 1;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1814 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1815 struct walk_control *wc)
1817 struct btrfs_key key;
1821 struct cache_tree *inode_cache;
1822 struct shared_node *active_node;
1824 if (wc->root_level == wc->active_node &&
1825 btrfs_root_refs(&root->root_item) == 0)
1828 active_node = wc->nodes[wc->active_node];
1829 inode_cache = &active_node->inode_cache;
1830 nritems = btrfs_header_nritems(eb);
1831 for (i = 0; i < nritems; i++) {
1832 btrfs_item_key_to_cpu(eb, &key, i);
1834 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1836 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839 if (active_node->current == NULL ||
1840 active_node->current->ino < key.objectid) {
1841 if (active_node->current) {
1842 active_node->current->checked = 1;
1843 maybe_free_inode_rec(inode_cache,
1844 active_node->current);
1846 active_node->current = get_inode_rec(inode_cache,
1848 BUG_ON(IS_ERR(active_node->current));
1851 case BTRFS_DIR_ITEM_KEY:
1852 case BTRFS_DIR_INDEX_KEY:
1853 ret = process_dir_item(eb, i, &key, active_node);
1855 case BTRFS_INODE_REF_KEY:
1856 ret = process_inode_ref(eb, i, &key, active_node);
1858 case BTRFS_INODE_EXTREF_KEY:
1859 ret = process_inode_extref(eb, i, &key, active_node);
1861 case BTRFS_INODE_ITEM_KEY:
1862 ret = process_inode_item(eb, i, &key, active_node);
1864 case BTRFS_EXTENT_DATA_KEY:
1865 ret = process_file_extent(root, eb, i, &key,
1876 u64 bytenr[BTRFS_MAX_LEVEL];
1877 u64 refs[BTRFS_MAX_LEVEL];
1878 int need_check[BTRFS_MAX_LEVEL];
1881 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1882 struct node_refs *nrefs, u64 level);
1883 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1884 unsigned int ext_ref);
1887 * Returns >0 Found error, not fatal, should continue
1888 * Returns <0 Fatal error, must exit the whole check
1889 * Returns 0 No errors found
1891 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1892 struct node_refs *nrefs, int *level, int ext_ref)
1894 struct extent_buffer *cur = path->nodes[0];
1895 struct btrfs_key key;
1899 int root_level = btrfs_header_level(root->node);
1901 int ret = 0; /* Final return value */
1902 int err = 0; /* Positive error bitmap */
1904 cur_bytenr = cur->start;
1906 /* skip to first inode item or the first inode number change */
1907 nritems = btrfs_header_nritems(cur);
1908 for (i = 0; i < nritems; i++) {
1909 btrfs_item_key_to_cpu(cur, &key, i);
1911 first_ino = key.objectid;
1912 if (key.type == BTRFS_INODE_ITEM_KEY ||
1913 (first_ino && first_ino != key.objectid))
1917 path->slots[0] = nritems;
1923 err |= check_inode_item(root, path, ext_ref);
1925 if (err & LAST_ITEM)
1928 /* still have inode items in thie leaf */
1929 if (cur->start == cur_bytenr)
1933 * we have switched to another leaf, above nodes may
1934 * have changed, here walk down the path, if a node
1935 * or leaf is shared, check whether we can skip this
1938 for (i = root_level; i >= 0; i--) {
1939 if (path->nodes[i]->start == nrefs->bytenr[i])
1942 ret = update_nodes_refs(root,
1943 path->nodes[i]->start,
1948 if (!nrefs->need_check[i]) {
1954 for (i = 0; i < *level; i++) {
1955 free_extent_buffer(path->nodes[i]);
1956 path->nodes[i] = NULL;
1965 static void reada_walk_down(struct btrfs_root *root,
1966 struct extent_buffer *node, int slot)
1975 level = btrfs_header_level(node);
1979 nritems = btrfs_header_nritems(node);
1980 blocksize = root->nodesize;
1981 for (i = slot; i < nritems; i++) {
1982 bytenr = btrfs_node_blockptr(node, i);
1983 ptr_gen = btrfs_node_ptr_generation(node, i);
1984 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1989 * Check the child node/leaf by the following condition:
1990 * 1. the first item key of the node/leaf should be the same with the one
1992 * 2. block in parent node should match the child node/leaf.
1993 * 3. generation of parent node and child's header should be consistent.
1995 * Or the child node/leaf pointed by the key in parent is not valid.
1997 * We hope to check leaf owner too, but since subvol may share leaves,
1998 * which makes leaf owner check not so strong, key check should be
1999 * sufficient enough for that case.
2001 static int check_child_node(struct extent_buffer *parent, int slot,
2002 struct extent_buffer *child)
2004 struct btrfs_key parent_key;
2005 struct btrfs_key child_key;
2008 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2009 if (btrfs_header_level(child) == 0)
2010 btrfs_item_key_to_cpu(child, &child_key, 0);
2012 btrfs_node_key_to_cpu(child, &child_key, 0);
2014 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2018 parent_key.objectid, parent_key.type, parent_key.offset,
2019 child_key.objectid, child_key.type, child_key.offset);
2021 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2023 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2024 btrfs_node_blockptr(parent, slot),
2025 btrfs_header_bytenr(child));
2027 if (btrfs_node_ptr_generation(parent, slot) !=
2028 btrfs_header_generation(child)) {
2030 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_header_generation(child),
2032 btrfs_node_ptr_generation(parent, slot));
2038 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2039 * in every fs or file tree check. Here we find its all root ids, and only check
2040 * it in the fs or file tree which has the smallest root id.
2042 static int need_check(struct btrfs_root *root, struct ulist *roots)
2044 struct rb_node *node;
2045 struct ulist_node *u;
2047 if (roots->nnodes == 1)
2050 node = rb_first(&roots->root);
2051 u = rb_entry(node, struct ulist_node, rb_node);
2053 * current root id is not smallest, we skip it and let it be checked
2054 * in the fs or file tree who hash the smallest root id.
2056 if (root->objectid != u->val)
2063 * for a tree node or leaf, we record its reference count, so later if we still
2064 * process this node or leaf, don't need to compute its reference count again.
2066 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2067 struct node_refs *nrefs, u64 level)
2071 struct ulist *roots;
2073 if (nrefs->bytenr[level] != bytenr) {
2074 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2075 level, 1, &refs, NULL);
2079 nrefs->bytenr[level] = bytenr;
2080 nrefs->refs[level] = refs;
2082 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2087 check = need_check(root, roots);
2089 nrefs->need_check[level] = check;
2091 nrefs->need_check[level] = 1;
2098 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2099 struct walk_control *wc, int *level,
2100 struct node_refs *nrefs)
2102 enum btrfs_tree_block_status status;
2105 struct extent_buffer *next;
2106 struct extent_buffer *cur;
2111 WARN_ON(*level < 0);
2112 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2114 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2115 refs = nrefs->refs[*level];
2118 ret = btrfs_lookup_extent_info(NULL, root,
2119 path->nodes[*level]->start,
2120 *level, 1, &refs, NULL);
2125 nrefs->bytenr[*level] = path->nodes[*level]->start;
2126 nrefs->refs[*level] = refs;
2130 ret = enter_shared_node(root, path->nodes[*level]->start,
2138 while (*level >= 0) {
2139 WARN_ON(*level < 0);
2140 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2141 cur = path->nodes[*level];
2143 if (btrfs_header_level(cur) != *level)
2146 if (path->slots[*level] >= btrfs_header_nritems(cur))
2149 ret = process_one_leaf(root, cur, wc);
2154 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2155 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2156 blocksize = root->nodesize;
2158 if (bytenr == nrefs->bytenr[*level - 1]) {
2159 refs = nrefs->refs[*level - 1];
2161 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2162 *level - 1, 1, &refs, NULL);
2166 nrefs->bytenr[*level - 1] = bytenr;
2167 nrefs->refs[*level - 1] = refs;
2172 ret = enter_shared_node(root, bytenr, refs,
2175 path->slots[*level]++;
2180 next = btrfs_find_tree_block(root, bytenr, blocksize);
2181 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2182 free_extent_buffer(next);
2183 reada_walk_down(root, cur, path->slots[*level]);
2184 next = read_tree_block(root, bytenr, blocksize,
2186 if (!extent_buffer_uptodate(next)) {
2187 struct btrfs_key node_key;
2189 btrfs_node_key_to_cpu(path->nodes[*level],
2191 path->slots[*level]);
2192 btrfs_add_corrupt_extent_record(root->fs_info,
2194 path->nodes[*level]->start,
2195 root->nodesize, *level);
2201 ret = check_child_node(cur, path->slots[*level], next);
2203 free_extent_buffer(next);
2208 if (btrfs_is_leaf(next))
2209 status = btrfs_check_leaf(root, NULL, next);
2211 status = btrfs_check_node(root, NULL, next);
2212 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2213 free_extent_buffer(next);
2218 *level = *level - 1;
2219 free_extent_buffer(path->nodes[*level]);
2220 path->nodes[*level] = next;
2221 path->slots[*level] = 0;
2224 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2228 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2229 unsigned int ext_ref);
2232 * Returns >0 Found error, should continue
2233 * Returns <0 Fatal error, must exit the whole check
2234 * Returns 0 No errors found
2236 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2237 int *level, struct node_refs *nrefs, int ext_ref)
2239 enum btrfs_tree_block_status status;
2242 struct extent_buffer *next;
2243 struct extent_buffer *cur;
2247 WARN_ON(*level < 0);
2248 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2250 ret = update_nodes_refs(root, path->nodes[*level]->start,
2255 while (*level >= 0) {
2256 WARN_ON(*level < 0);
2257 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258 cur = path->nodes[*level];
2260 if (btrfs_header_level(cur) != *level)
2263 if (path->slots[*level] >= btrfs_header_nritems(cur))
2265 /* Don't forgot to check leaf/node validation */
2267 ret = btrfs_check_leaf(root, NULL, cur);
2268 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272 ret = process_one_leaf_v2(root, path, nrefs,
2276 ret = btrfs_check_node(root, NULL, cur);
2277 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2282 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2283 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2284 blocksize = root->nodesize;
2286 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2289 if (!nrefs->need_check[*level - 1]) {
2290 path->slots[*level]++;
2294 next = btrfs_find_tree_block(root, bytenr, blocksize);
2295 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2296 free_extent_buffer(next);
2297 reada_walk_down(root, cur, path->slots[*level]);
2298 next = read_tree_block(root, bytenr, blocksize,
2300 if (!extent_buffer_uptodate(next)) {
2301 struct btrfs_key node_key;
2303 btrfs_node_key_to_cpu(path->nodes[*level],
2305 path->slots[*level]);
2306 btrfs_add_corrupt_extent_record(root->fs_info,
2308 path->nodes[*level]->start,
2309 root->nodesize, *level);
2315 ret = check_child_node(cur, path->slots[*level], next);
2319 if (btrfs_is_leaf(next))
2320 status = btrfs_check_leaf(root, NULL, next);
2322 status = btrfs_check_node(root, NULL, next);
2323 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2324 free_extent_buffer(next);
2329 *level = *level - 1;
2330 free_extent_buffer(path->nodes[*level]);
2331 path->nodes[*level] = next;
2332 path->slots[*level] = 0;
2337 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2338 struct walk_control *wc, int *level)
2341 struct extent_buffer *leaf;
2343 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344 leaf = path->nodes[i];
2345 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350 free_extent_buffer(path->nodes[*level]);
2351 path->nodes[*level] = NULL;
2352 BUG_ON(*level > wc->active_node);
2353 if (*level == wc->active_node)
2354 leave_shared_node(root, wc, *level);
2361 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2365 struct extent_buffer *leaf;
2367 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2368 leaf = path->nodes[i];
2369 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2374 free_extent_buffer(path->nodes[*level]);
2375 path->nodes[*level] = NULL;
2382 static int check_root_dir(struct inode_record *rec)
2384 struct inode_backref *backref;
2387 if (!rec->found_inode_item || rec->errors)
2389 if (rec->nlink != 1 || rec->found_link != 0)
2391 if (list_empty(&rec->backrefs))
2393 backref = to_inode_backref(rec->backrefs.next);
2394 if (!backref->found_inode_ref)
2396 if (backref->index != 0 || backref->namelen != 2 ||
2397 memcmp(backref->name, "..", 2))
2399 if (backref->found_dir_index || backref->found_dir_item)
2406 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2407 struct btrfs_root *root, struct btrfs_path *path,
2408 struct inode_record *rec)
2410 struct btrfs_inode_item *ei;
2411 struct btrfs_key key;
2414 key.objectid = rec->ino;
2415 key.type = BTRFS_INODE_ITEM_KEY;
2416 key.offset = (u64)-1;
2418 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2422 if (!path->slots[0]) {
2429 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2430 if (key.objectid != rec->ino) {
2435 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2436 struct btrfs_inode_item);
2437 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2438 btrfs_mark_buffer_dirty(path->nodes[0]);
2439 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2440 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2441 root->root_key.objectid);
2443 btrfs_release_path(path);
2447 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2448 struct btrfs_root *root,
2449 struct btrfs_path *path,
2450 struct inode_record *rec)
2454 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2455 btrfs_release_path(path);
2457 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2461 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2462 struct btrfs_root *root,
2463 struct btrfs_path *path,
2464 struct inode_record *rec)
2466 struct btrfs_inode_item *ei;
2467 struct btrfs_key key;
2470 key.objectid = rec->ino;
2471 key.type = BTRFS_INODE_ITEM_KEY;
2474 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2481 /* Since ret == 0, no need to check anything */
2482 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2483 struct btrfs_inode_item);
2484 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2485 btrfs_mark_buffer_dirty(path->nodes[0]);
2486 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2487 printf("reset nbytes for ino %llu root %llu\n",
2488 rec->ino, root->root_key.objectid);
2490 btrfs_release_path(path);
2494 static int add_missing_dir_index(struct btrfs_root *root,
2495 struct cache_tree *inode_cache,
2496 struct inode_record *rec,
2497 struct inode_backref *backref)
2499 struct btrfs_path path;
2500 struct btrfs_trans_handle *trans;
2501 struct btrfs_dir_item *dir_item;
2502 struct extent_buffer *leaf;
2503 struct btrfs_key key;
2504 struct btrfs_disk_key disk_key;
2505 struct inode_record *dir_rec;
2506 unsigned long name_ptr;
2507 u32 data_size = sizeof(*dir_item) + backref->namelen;
2510 trans = btrfs_start_transaction(root, 1);
2512 return PTR_ERR(trans);
2514 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2515 (unsigned long long)rec->ino);
2517 btrfs_init_path(&path);
2518 key.objectid = backref->dir;
2519 key.type = BTRFS_DIR_INDEX_KEY;
2520 key.offset = backref->index;
2521 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2524 leaf = path.nodes[0];
2525 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2527 disk_key.objectid = cpu_to_le64(rec->ino);
2528 disk_key.type = BTRFS_INODE_ITEM_KEY;
2529 disk_key.offset = 0;
2531 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2532 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2533 btrfs_set_dir_data_len(leaf, dir_item, 0);
2534 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2535 name_ptr = (unsigned long)(dir_item + 1);
2536 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2537 btrfs_mark_buffer_dirty(leaf);
2538 btrfs_release_path(&path);
2539 btrfs_commit_transaction(trans, root);
2541 backref->found_dir_index = 1;
2542 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2543 BUG_ON(IS_ERR(dir_rec));
2546 dir_rec->found_size += backref->namelen;
2547 if (dir_rec->found_size == dir_rec->isize &&
2548 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2549 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2550 if (dir_rec->found_size != dir_rec->isize)
2551 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2556 static int delete_dir_index(struct btrfs_root *root,
2557 struct inode_backref *backref)
2559 struct btrfs_trans_handle *trans;
2560 struct btrfs_dir_item *di;
2561 struct btrfs_path path;
2564 trans = btrfs_start_transaction(root, 1);
2566 return PTR_ERR(trans);
2568 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2569 (unsigned long long)backref->dir,
2570 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2571 (unsigned long long)root->objectid);
2573 btrfs_init_path(&path);
2574 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2575 backref->name, backref->namelen,
2576 backref->index, -1);
2579 btrfs_release_path(&path);
2580 btrfs_commit_transaction(trans, root);
2587 ret = btrfs_del_item(trans, root, &path);
2589 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2591 btrfs_release_path(&path);
2592 btrfs_commit_transaction(trans, root);
2596 static int create_inode_item(struct btrfs_root *root,
2597 struct inode_record *rec,
2600 struct btrfs_trans_handle *trans;
2601 struct btrfs_inode_item inode_item;
2602 time_t now = time(NULL);
2605 trans = btrfs_start_transaction(root, 1);
2606 if (IS_ERR(trans)) {
2607 ret = PTR_ERR(trans);
2611 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2612 "be incomplete, please check permissions and content after "
2613 "the fsck completes.\n", (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2616 memset(&inode_item, 0, sizeof(inode_item));
2617 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2619 btrfs_set_stack_inode_nlink(&inode_item, 1);
2621 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2622 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2623 if (rec->found_dir_item) {
2624 if (rec->found_file_extent)
2625 fprintf(stderr, "root %llu inode %llu has both a dir "
2626 "item and extents, unsure if it is a dir or a "
2627 "regular file so setting it as a directory\n",
2628 (unsigned long long)root->objectid,
2629 (unsigned long long)rec->ino);
2630 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2631 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2632 } else if (!rec->found_dir_item) {
2633 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2634 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2636 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2637 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2638 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2639 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2640 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2641 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2642 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2643 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2645 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2647 btrfs_commit_transaction(trans, root);
2651 static int repair_inode_backrefs(struct btrfs_root *root,
2652 struct inode_record *rec,
2653 struct cache_tree *inode_cache,
2656 struct inode_backref *tmp, *backref;
2657 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2661 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2662 if (!delete && rec->ino == root_dirid) {
2663 if (!rec->found_inode_item) {
2664 ret = create_inode_item(root, rec, 1);
2671 /* Index 0 for root dir's are special, don't mess with it */
2672 if (rec->ino == root_dirid && backref->index == 0)
2676 ((backref->found_dir_index && !backref->found_inode_ref) ||
2677 (backref->found_dir_index && backref->found_inode_ref &&
2678 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2679 ret = delete_dir_index(root, backref);
2683 list_del(&backref->list);
2688 if (!delete && !backref->found_dir_index &&
2689 backref->found_dir_item && backref->found_inode_ref) {
2690 ret = add_missing_dir_index(root, inode_cache, rec,
2695 if (backref->found_dir_item &&
2696 backref->found_dir_index) {
2697 if (!backref->errors &&
2698 backref->found_inode_ref) {
2699 list_del(&backref->list);
2706 if (!delete && (!backref->found_dir_index &&
2707 !backref->found_dir_item &&
2708 backref->found_inode_ref)) {
2709 struct btrfs_trans_handle *trans;
2710 struct btrfs_key location;
2712 ret = check_dir_conflict(root, backref->name,
2718 * let nlink fixing routine to handle it,
2719 * which can do it better.
2724 location.objectid = rec->ino;
2725 location.type = BTRFS_INODE_ITEM_KEY;
2726 location.offset = 0;
2728 trans = btrfs_start_transaction(root, 1);
2729 if (IS_ERR(trans)) {
2730 ret = PTR_ERR(trans);
2733 fprintf(stderr, "adding missing dir index/item pair "
2735 (unsigned long long)rec->ino);
2736 ret = btrfs_insert_dir_item(trans, root, backref->name,
2738 backref->dir, &location,
2739 imode_to_type(rec->imode),
2742 btrfs_commit_transaction(trans, root);
2746 if (!delete && (backref->found_inode_ref &&
2747 backref->found_dir_index &&
2748 backref->found_dir_item &&
2749 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2750 !rec->found_inode_item)) {
2751 ret = create_inode_item(root, rec, 0);
2758 return ret ? ret : repaired;
2762 * To determine the file type for nlink/inode_item repair
2764 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2765 * Return -ENOENT if file type is not found.
2767 static int find_file_type(struct inode_record *rec, u8 *type)
2769 struct inode_backref *backref;
2771 /* For inode item recovered case */
2772 if (rec->found_inode_item) {
2773 *type = imode_to_type(rec->imode);
2777 list_for_each_entry(backref, &rec->backrefs, list) {
2778 if (backref->found_dir_index || backref->found_dir_item) {
2779 *type = backref->filetype;
2787 * To determine the file name for nlink repair
2789 * Return 0 if file name is found, set name and namelen.
2790 * Return -ENOENT if file name is not found.
2792 static int find_file_name(struct inode_record *rec,
2793 char *name, int *namelen)
2795 struct inode_backref *backref;
2797 list_for_each_entry(backref, &rec->backrefs, list) {
2798 if (backref->found_dir_index || backref->found_dir_item ||
2799 backref->found_inode_ref) {
2800 memcpy(name, backref->name, backref->namelen);
2801 *namelen = backref->namelen;
2808 /* Reset the nlink of the inode to the correct one */
2809 static int reset_nlink(struct btrfs_trans_handle *trans,
2810 struct btrfs_root *root,
2811 struct btrfs_path *path,
2812 struct inode_record *rec)
2814 struct inode_backref *backref;
2815 struct inode_backref *tmp;
2816 struct btrfs_key key;
2817 struct btrfs_inode_item *inode_item;
2820 /* We don't believe this either, reset it and iterate backref */
2821 rec->found_link = 0;
2823 /* Remove all backref including the valid ones */
2824 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2825 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2826 backref->index, backref->name,
2827 backref->namelen, 0);
2831 /* remove invalid backref, so it won't be added back */
2832 if (!(backref->found_dir_index &&
2833 backref->found_dir_item &&
2834 backref->found_inode_ref)) {
2835 list_del(&backref->list);
2842 /* Set nlink to 0 */
2843 key.objectid = rec->ino;
2844 key.type = BTRFS_INODE_ITEM_KEY;
2846 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2853 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2854 struct btrfs_inode_item);
2855 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2856 btrfs_mark_buffer_dirty(path->nodes[0]);
2857 btrfs_release_path(path);
2860 * Add back valid inode_ref/dir_item/dir_index,
2861 * add_link() will handle the nlink inc, so new nlink must be correct
2863 list_for_each_entry(backref, &rec->backrefs, list) {
2864 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2865 backref->name, backref->namelen,
2866 backref->filetype, &backref->index, 1);
2871 btrfs_release_path(path);
2875 static int get_highest_inode(struct btrfs_trans_handle *trans,
2876 struct btrfs_root *root,
2877 struct btrfs_path *path,
2880 struct btrfs_key key, found_key;
2883 btrfs_init_path(path);
2884 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2886 key.type = BTRFS_INODE_ITEM_KEY;
2887 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2889 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2890 path->slots[0] - 1);
2891 *highest_ino = found_key.objectid;
2894 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2896 btrfs_release_path(path);
2900 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2901 struct btrfs_root *root,
2902 struct btrfs_path *path,
2903 struct inode_record *rec)
2905 char *dir_name = "lost+found";
2906 char namebuf[BTRFS_NAME_LEN] = {0};
2911 int name_recovered = 0;
2912 int type_recovered = 0;
2916 * Get file name and type first before these invalid inode ref
2917 * are deleted by remove_all_invalid_backref()
2919 name_recovered = !find_file_name(rec, namebuf, &namelen);
2920 type_recovered = !find_file_type(rec, &type);
2922 if (!name_recovered) {
2923 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2924 rec->ino, rec->ino);
2925 namelen = count_digits(rec->ino);
2926 sprintf(namebuf, "%llu", rec->ino);
2929 if (!type_recovered) {
2930 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2932 type = BTRFS_FT_REG_FILE;
2936 ret = reset_nlink(trans, root, path, rec);
2939 "Failed to reset nlink for inode %llu: %s\n",
2940 rec->ino, strerror(-ret));
2944 if (rec->found_link == 0) {
2945 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2949 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2950 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2953 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2954 dir_name, strerror(-ret));
2957 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2958 namebuf, namelen, type, NULL, 1);
2960 * Add ".INO" suffix several times to handle case where
2961 * "FILENAME.INO" is already taken by another file.
2963 while (ret == -EEXIST) {
2965 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2967 if (namelen + count_digits(rec->ino) + 1 >
2972 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2974 namelen += count_digits(rec->ino) + 1;
2975 ret = btrfs_add_link(trans, root, rec->ino,
2976 lost_found_ino, namebuf,
2977 namelen, type, NULL, 1);
2981 "Failed to link the inode %llu to %s dir: %s\n",
2982 rec->ino, dir_name, strerror(-ret));
2986 * Just increase the found_link, don't actually add the
2987 * backref. This will make things easier and this inode
2988 * record will be freed after the repair is done.
2989 * So fsck will not report problem about this inode.
2992 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2993 namelen, namebuf, dir_name);
2995 printf("Fixed the nlink of inode %llu\n", rec->ino);
2998 * Clear the flag anyway, or we will loop forever for the same inode
2999 * as it will not be removed from the bad inode list and the dead loop
3002 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3003 btrfs_release_path(path);
3008 * Check if there is any normal(reg or prealloc) file extent for given
3010 * This is used to determine the file type when neither its dir_index/item or
3011 * inode_item exists.
3013 * This will *NOT* report error, if any error happens, just consider it does
3014 * not have any normal file extent.
3016 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3018 struct btrfs_path path;
3019 struct btrfs_key key;
3020 struct btrfs_key found_key;
3021 struct btrfs_file_extent_item *fi;
3025 btrfs_init_path(&path);
3027 key.type = BTRFS_EXTENT_DATA_KEY;
3030 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3035 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3036 ret = btrfs_next_leaf(root, &path);
3043 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3045 if (found_key.objectid != ino ||
3046 found_key.type != BTRFS_EXTENT_DATA_KEY)
3048 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3049 struct btrfs_file_extent_item);
3050 type = btrfs_file_extent_type(path.nodes[0], fi);
3051 if (type != BTRFS_FILE_EXTENT_INLINE) {
3057 btrfs_release_path(&path);
3061 static u32 btrfs_type_to_imode(u8 type)
3063 static u32 imode_by_btrfs_type[] = {
3064 [BTRFS_FT_REG_FILE] = S_IFREG,
3065 [BTRFS_FT_DIR] = S_IFDIR,
3066 [BTRFS_FT_CHRDEV] = S_IFCHR,
3067 [BTRFS_FT_BLKDEV] = S_IFBLK,
3068 [BTRFS_FT_FIFO] = S_IFIFO,
3069 [BTRFS_FT_SOCK] = S_IFSOCK,
3070 [BTRFS_FT_SYMLINK] = S_IFLNK,
3073 return imode_by_btrfs_type[(type)];
3076 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3077 struct btrfs_root *root,
3078 struct btrfs_path *path,
3079 struct inode_record *rec)
3083 int type_recovered = 0;
3086 printf("Trying to rebuild inode:%llu\n", rec->ino);
3088 type_recovered = !find_file_type(rec, &filetype);
3091 * Try to determine inode type if type not found.
3093 * For found regular file extent, it must be FILE.
3094 * For found dir_item/index, it must be DIR.
3096 * For undetermined one, use FILE as fallback.
3099 * 1. If found backref(inode_index/item is already handled) to it,
3101 * Need new inode-inode ref structure to allow search for that.
3103 if (!type_recovered) {
3104 if (rec->found_file_extent &&
3105 find_normal_file_extent(root, rec->ino)) {
3107 filetype = BTRFS_FT_REG_FILE;
3108 } else if (rec->found_dir_item) {
3110 filetype = BTRFS_FT_DIR;
3111 } else if (!list_empty(&rec->orphan_extents)) {
3113 filetype = BTRFS_FT_REG_FILE;
3115 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3118 filetype = BTRFS_FT_REG_FILE;
3122 ret = btrfs_new_inode(trans, root, rec->ino,
3123 mode | btrfs_type_to_imode(filetype));
3128 * Here inode rebuild is done, we only rebuild the inode item,
3129 * don't repair the nlink(like move to lost+found).
3130 * That is the job of nlink repair.
3132 * We just fill the record and return
3134 rec->found_dir_item = 1;
3135 rec->imode = mode | btrfs_type_to_imode(filetype);
3137 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3138 /* Ensure the inode_nlinks repair function will be called */
3139 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3144 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3145 struct btrfs_root *root,
3146 struct btrfs_path *path,
3147 struct inode_record *rec)
3149 struct orphan_data_extent *orphan;
3150 struct orphan_data_extent *tmp;
3153 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3155 * Check for conflicting file extents
3157 * Here we don't know whether the extents is compressed or not,
3158 * so we can only assume it not compressed nor data offset,
3159 * and use its disk_len as extent length.
3161 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3162 orphan->offset, orphan->disk_len, 0);
3163 btrfs_release_path(path);
3168 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3169 orphan->disk_bytenr, orphan->disk_len);
3170 ret = btrfs_free_extent(trans,
3171 root->fs_info->extent_root,
3172 orphan->disk_bytenr, orphan->disk_len,
3173 0, root->objectid, orphan->objectid,
3178 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3179 orphan->offset, orphan->disk_bytenr,
3180 orphan->disk_len, orphan->disk_len);
3184 /* Update file size info */
3185 rec->found_size += orphan->disk_len;
3186 if (rec->found_size == rec->nbytes)
3187 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3189 /* Update the file extent hole info too */
3190 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3194 if (RB_EMPTY_ROOT(&rec->holes))
3195 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3197 list_del(&orphan->list);
3200 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3205 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3206 struct btrfs_root *root,
3207 struct btrfs_path *path,
3208 struct inode_record *rec)
3210 struct rb_node *node;
3211 struct file_extent_hole *hole;
3215 node = rb_first(&rec->holes);
3219 hole = rb_entry(node, struct file_extent_hole, node);
3220 ret = btrfs_punch_hole(trans, root, rec->ino,
3221 hole->start, hole->len);
3224 ret = del_file_extent_hole(&rec->holes, hole->start,
3228 if (RB_EMPTY_ROOT(&rec->holes))
3229 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3230 node = rb_first(&rec->holes);
3232 /* special case for a file losing all its file extent */
3234 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3235 round_up(rec->isize, root->sectorsize));
3239 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3240 rec->ino, root->objectid);
3245 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3247 struct btrfs_trans_handle *trans;
3248 struct btrfs_path path;
3251 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3252 I_ERR_NO_ORPHAN_ITEM |
3253 I_ERR_LINK_COUNT_WRONG |
3254 I_ERR_NO_INODE_ITEM |
3255 I_ERR_FILE_EXTENT_ORPHAN |
3256 I_ERR_FILE_EXTENT_DISCOUNT|
3257 I_ERR_FILE_NBYTES_WRONG)))
3261 * For nlink repair, it may create a dir and add link, so
3262 * 2 for parent(256)'s dir_index and dir_item
3263 * 2 for lost+found dir's inode_item and inode_ref
3264 * 1 for the new inode_ref of the file
3265 * 2 for lost+found dir's dir_index and dir_item for the file
3267 trans = btrfs_start_transaction(root, 7);
3269 return PTR_ERR(trans);
3271 btrfs_init_path(&path);
3272 if (rec->errors & I_ERR_NO_INODE_ITEM)
3273 ret = repair_inode_no_item(trans, root, &path, rec);
3274 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3275 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3276 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3277 ret = repair_inode_discount_extent(trans, root, &path, rec);
3278 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3279 ret = repair_inode_isize(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3281 ret = repair_inode_orphan_item(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3283 ret = repair_inode_nlinks(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3285 ret = repair_inode_nbytes(trans, root, &path, rec);
3286 btrfs_commit_transaction(trans, root);
3287 btrfs_release_path(&path);
3291 static int check_inode_recs(struct btrfs_root *root,
3292 struct cache_tree *inode_cache)
3294 struct cache_extent *cache;
3295 struct ptr_node *node;
3296 struct inode_record *rec;
3297 struct inode_backref *backref;
3302 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3304 if (btrfs_root_refs(&root->root_item) == 0) {
3305 if (!cache_tree_empty(inode_cache))
3306 fprintf(stderr, "warning line %d\n", __LINE__);
3311 * We need to repair backrefs first because we could change some of the
3312 * errors in the inode recs.
3314 * We also need to go through and delete invalid backrefs first and then
3315 * add the correct ones second. We do this because we may get EEXIST
3316 * when adding back the correct index because we hadn't yet deleted the
3319 * For example, if we were missing a dir index then the directories
3320 * isize would be wrong, so if we fixed the isize to what we thought it
3321 * would be and then fixed the backref we'd still have a invalid fs, so
3322 * we need to add back the dir index and then check to see if the isize
3327 if (stage == 3 && !err)
3330 cache = search_cache_extent(inode_cache, 0);
3331 while (repair && cache) {
3332 node = container_of(cache, struct ptr_node, cache);
3334 cache = next_cache_extent(cache);
3336 /* Need to free everything up and rescan */
3338 remove_cache_extent(inode_cache, &node->cache);
3340 free_inode_rec(rec);
3344 if (list_empty(&rec->backrefs))
3347 ret = repair_inode_backrefs(root, rec, inode_cache,
3361 rec = get_inode_rec(inode_cache, root_dirid, 0);
3362 BUG_ON(IS_ERR(rec));
3364 ret = check_root_dir(rec);
3366 fprintf(stderr, "root %llu root dir %llu error\n",
3367 (unsigned long long)root->root_key.objectid,
3368 (unsigned long long)root_dirid);
3369 print_inode_error(root, rec);
3374 struct btrfs_trans_handle *trans;
3376 trans = btrfs_start_transaction(root, 1);
3377 if (IS_ERR(trans)) {
3378 err = PTR_ERR(trans);
3383 "root %llu missing its root dir, recreating\n",
3384 (unsigned long long)root->objectid);
3386 ret = btrfs_make_root_dir(trans, root, root_dirid);
3389 btrfs_commit_transaction(trans, root);
3393 fprintf(stderr, "root %llu root dir %llu not found\n",
3394 (unsigned long long)root->root_key.objectid,
3395 (unsigned long long)root_dirid);
3399 cache = search_cache_extent(inode_cache, 0);
3402 node = container_of(cache, struct ptr_node, cache);
3404 remove_cache_extent(inode_cache, &node->cache);
3406 if (rec->ino == root_dirid ||
3407 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3408 free_inode_rec(rec);
3412 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3413 ret = check_orphan_item(root, rec->ino);
3415 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3416 if (can_free_inode_rec(rec)) {
3417 free_inode_rec(rec);
3422 if (!rec->found_inode_item)
3423 rec->errors |= I_ERR_NO_INODE_ITEM;
3424 if (rec->found_link != rec->nlink)
3425 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3427 ret = try_repair_inode(root, rec);
3428 if (ret == 0 && can_free_inode_rec(rec)) {
3429 free_inode_rec(rec);
3435 if (!(repair && ret == 0))
3437 print_inode_error(root, rec);
3438 list_for_each_entry(backref, &rec->backrefs, list) {
3439 if (!backref->found_dir_item)
3440 backref->errors |= REF_ERR_NO_DIR_ITEM;
3441 if (!backref->found_dir_index)
3442 backref->errors |= REF_ERR_NO_DIR_INDEX;
3443 if (!backref->found_inode_ref)
3444 backref->errors |= REF_ERR_NO_INODE_REF;
3445 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3446 " namelen %u name %s filetype %d errors %x",
3447 (unsigned long long)backref->dir,
3448 (unsigned long long)backref->index,
3449 backref->namelen, backref->name,
3450 backref->filetype, backref->errors);
3451 print_ref_error(backref->errors);
3453 free_inode_rec(rec);
3455 return (error > 0) ? -1 : 0;
3458 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3461 struct cache_extent *cache;
3462 struct root_record *rec = NULL;
3465 cache = lookup_cache_extent(root_cache, objectid, 1);
3467 rec = container_of(cache, struct root_record, cache);
3469 rec = calloc(1, sizeof(*rec));
3471 return ERR_PTR(-ENOMEM);
3472 rec->objectid = objectid;
3473 INIT_LIST_HEAD(&rec->backrefs);
3474 rec->cache.start = objectid;
3475 rec->cache.size = 1;
3477 ret = insert_cache_extent(root_cache, &rec->cache);
3479 return ERR_PTR(-EEXIST);
3484 static struct root_backref *get_root_backref(struct root_record *rec,
3485 u64 ref_root, u64 dir, u64 index,
3486 const char *name, int namelen)
3488 struct root_backref *backref;
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (backref->ref_root != ref_root || backref->dir != dir ||
3492 backref->namelen != namelen)
3494 if (memcmp(name, backref->name, namelen))
3499 backref = calloc(1, sizeof(*backref) + namelen + 1);
3502 backref->ref_root = ref_root;
3504 backref->index = index;
3505 backref->namelen = namelen;
3506 memcpy(backref->name, name, namelen);
3507 backref->name[namelen] = '\0';
3508 list_add_tail(&backref->list, &rec->backrefs);
3512 static void free_root_record(struct cache_extent *cache)
3514 struct root_record *rec;
3515 struct root_backref *backref;
3517 rec = container_of(cache, struct root_record, cache);
3518 while (!list_empty(&rec->backrefs)) {
3519 backref = to_root_backref(rec->backrefs.next);
3520 list_del(&backref->list);
3527 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3529 static int add_root_backref(struct cache_tree *root_cache,
3530 u64 root_id, u64 ref_root, u64 dir, u64 index,
3531 const char *name, int namelen,
3532 int item_type, int errors)
3534 struct root_record *rec;
3535 struct root_backref *backref;
3537 rec = get_root_rec(root_cache, root_id);
3538 BUG_ON(IS_ERR(rec));
3539 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3542 backref->errors |= errors;
3544 if (item_type != BTRFS_DIR_ITEM_KEY) {
3545 if (backref->found_dir_index || backref->found_back_ref ||
3546 backref->found_forward_ref) {
3547 if (backref->index != index)
3548 backref->errors |= REF_ERR_INDEX_UNMATCH;
3550 backref->index = index;
3554 if (item_type == BTRFS_DIR_ITEM_KEY) {
3555 if (backref->found_forward_ref)
3557 backref->found_dir_item = 1;
3558 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3559 backref->found_dir_index = 1;
3560 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3561 if (backref->found_forward_ref)
3562 backref->errors |= REF_ERR_DUP_ROOT_REF;
3563 else if (backref->found_dir_item)
3565 backref->found_forward_ref = 1;
3566 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3567 if (backref->found_back_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3569 backref->found_back_ref = 1;
3574 if (backref->found_forward_ref && backref->found_dir_item)
3575 backref->reachable = 1;
3579 static int merge_root_recs(struct btrfs_root *root,
3580 struct cache_tree *src_cache,
3581 struct cache_tree *dst_cache)
3583 struct cache_extent *cache;
3584 struct ptr_node *node;
3585 struct inode_record *rec;
3586 struct inode_backref *backref;
3589 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3590 free_inode_recs_tree(src_cache);
3595 cache = search_cache_extent(src_cache, 0);
3598 node = container_of(cache, struct ptr_node, cache);
3600 remove_cache_extent(src_cache, &node->cache);
3603 ret = is_child_root(root, root->objectid, rec->ino);
3609 list_for_each_entry(backref, &rec->backrefs, list) {
3610 BUG_ON(backref->found_inode_ref);
3611 if (backref->found_dir_item)
3612 add_root_backref(dst_cache, rec->ino,
3613 root->root_key.objectid, backref->dir,
3614 backref->index, backref->name,
3615 backref->namelen, BTRFS_DIR_ITEM_KEY,
3617 if (backref->found_dir_index)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_INDEX_KEY,
3625 free_inode_rec(rec);
3632 static int check_root_refs(struct btrfs_root *root,
3633 struct cache_tree *root_cache)
3635 struct root_record *rec;
3636 struct root_record *ref_root;
3637 struct root_backref *backref;
3638 struct cache_extent *cache;
3644 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3645 BUG_ON(IS_ERR(rec));
3648 /* fixme: this can not detect circular references */
3651 cache = search_cache_extent(root_cache, 0);
3655 rec = container_of(cache, struct root_record, cache);
3656 cache = next_cache_extent(cache);
3658 if (rec->found_ref == 0)
3661 list_for_each_entry(backref, &rec->backrefs, list) {
3662 if (!backref->reachable)
3665 ref_root = get_root_rec(root_cache,
3667 BUG_ON(IS_ERR(ref_root));
3668 if (ref_root->found_ref > 0)
3671 backref->reachable = 0;
3673 if (rec->found_ref == 0)
3679 cache = search_cache_extent(root_cache, 0);
3683 rec = container_of(cache, struct root_record, cache);
3684 cache = next_cache_extent(cache);
3686 if (rec->found_ref == 0 &&
3687 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3688 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3689 ret = check_orphan_item(root->fs_info->tree_root,
3695 * If we don't have a root item then we likely just have
3696 * a dir item in a snapshot for this root but no actual
3697 * ref key or anything so it's meaningless.
3699 if (!rec->found_root_item)
3702 fprintf(stderr, "fs tree %llu not referenced\n",
3703 (unsigned long long)rec->objectid);
3707 if (rec->found_ref > 0 && !rec->found_root_item)
3709 list_for_each_entry(backref, &rec->backrefs, list) {
3710 if (!backref->found_dir_item)
3711 backref->errors |= REF_ERR_NO_DIR_ITEM;
3712 if (!backref->found_dir_index)
3713 backref->errors |= REF_ERR_NO_DIR_INDEX;
3714 if (!backref->found_back_ref)
3715 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3716 if (!backref->found_forward_ref)
3717 backref->errors |= REF_ERR_NO_ROOT_REF;
3718 if (backref->reachable && backref->errors)
3725 fprintf(stderr, "fs tree %llu refs %u %s\n",
3726 (unsigned long long)rec->objectid, rec->found_ref,
3727 rec->found_root_item ? "" : "not found");
3729 list_for_each_entry(backref, &rec->backrefs, list) {
3730 if (!backref->reachable)
3732 if (!backref->errors && rec->found_root_item)
3734 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3735 " index %llu namelen %u name %s errors %x\n",
3736 (unsigned long long)backref->ref_root,
3737 (unsigned long long)backref->dir,
3738 (unsigned long long)backref->index,
3739 backref->namelen, backref->name,
3741 print_ref_error(backref->errors);
3744 return errors > 0 ? 1 : 0;
3747 static int process_root_ref(struct extent_buffer *eb, int slot,
3748 struct btrfs_key *key,
3749 struct cache_tree *root_cache)
3755 struct btrfs_root_ref *ref;
3756 char namebuf[BTRFS_NAME_LEN];
3759 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3761 dirid = btrfs_root_ref_dirid(eb, ref);
3762 index = btrfs_root_ref_sequence(eb, ref);
3763 name_len = btrfs_root_ref_name_len(eb, ref);
3765 if (name_len <= BTRFS_NAME_LEN) {
3769 len = BTRFS_NAME_LEN;
3770 error = REF_ERR_NAME_TOO_LONG;
3772 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3774 if (key->type == BTRFS_ROOT_REF_KEY) {
3775 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3776 index, namebuf, len, key->type, error);
3778 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3779 index, namebuf, len, key->type, error);
3784 static void free_corrupt_block(struct cache_extent *cache)
3786 struct btrfs_corrupt_block *corrupt;
3788 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3792 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3795 * Repair the btree of the given root.
3797 * The fix is to remove the node key in corrupt_blocks cache_tree.
3798 * and rebalance the tree.
3799 * After the fix, the btree should be writeable.
3801 static int repair_btree(struct btrfs_root *root,
3802 struct cache_tree *corrupt_blocks)
3804 struct btrfs_trans_handle *trans;
3805 struct btrfs_path path;
3806 struct btrfs_corrupt_block *corrupt;
3807 struct cache_extent *cache;
3808 struct btrfs_key key;
3813 if (cache_tree_empty(corrupt_blocks))
3816 trans = btrfs_start_transaction(root, 1);
3817 if (IS_ERR(trans)) {
3818 ret = PTR_ERR(trans);
3819 fprintf(stderr, "Error starting transaction: %s\n",
3823 btrfs_init_path(&path);
3824 cache = first_cache_extent(corrupt_blocks);
3826 corrupt = container_of(cache, struct btrfs_corrupt_block,
3828 level = corrupt->level;
3829 path.lowest_level = level;
3830 key.objectid = corrupt->key.objectid;
3831 key.type = corrupt->key.type;
3832 key.offset = corrupt->key.offset;
3835 * Here we don't want to do any tree balance, since it may
3836 * cause a balance with corrupted brother leaf/node,
3837 * so ins_len set to 0 here.
3838 * Balance will be done after all corrupt node/leaf is deleted.
3840 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3843 offset = btrfs_node_blockptr(path.nodes[level],
3846 /* Remove the ptr */
3847 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3851 * Remove the corresponding extent
3852 * return value is not concerned.
3854 btrfs_release_path(&path);
3855 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3856 0, root->root_key.objectid,
3858 cache = next_cache_extent(cache);
3861 /* Balance the btree using btrfs_search_slot() */
3862 cache = first_cache_extent(corrupt_blocks);
3864 corrupt = container_of(cache, struct btrfs_corrupt_block,
3866 memcpy(&key, &corrupt->key, sizeof(key));
3867 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3870 /* return will always >0 since it won't find the item */
3872 btrfs_release_path(&path);
3873 cache = next_cache_extent(cache);
3876 btrfs_commit_transaction(trans, root);
3877 btrfs_release_path(&path);
3881 static int check_fs_root(struct btrfs_root *root,
3882 struct cache_tree *root_cache,
3883 struct walk_control *wc)
3889 struct btrfs_path path;
3890 struct shared_node root_node;
3891 struct root_record *rec;
3892 struct btrfs_root_item *root_item = &root->root_item;
3893 struct cache_tree corrupt_blocks;
3894 struct orphan_data_extent *orphan;
3895 struct orphan_data_extent *tmp;
3896 enum btrfs_tree_block_status status;
3897 struct node_refs nrefs;
3900 * Reuse the corrupt_block cache tree to record corrupted tree block
3902 * Unlike the usage in extent tree check, here we do it in a per
3903 * fs/subvol tree base.
3905 cache_tree_init(&corrupt_blocks);
3906 root->fs_info->corrupt_blocks = &corrupt_blocks;
3908 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3909 rec = get_root_rec(root_cache, root->root_key.objectid);
3910 BUG_ON(IS_ERR(rec));
3911 if (btrfs_root_refs(root_item) > 0)
3912 rec->found_root_item = 1;
3915 btrfs_init_path(&path);
3916 memset(&root_node, 0, sizeof(root_node));
3917 cache_tree_init(&root_node.root_cache);
3918 cache_tree_init(&root_node.inode_cache);
3919 memset(&nrefs, 0, sizeof(nrefs));
3921 /* Move the orphan extent record to corresponding inode_record */
3922 list_for_each_entry_safe(orphan, tmp,
3923 &root->orphan_data_extents, list) {
3924 struct inode_record *inode;
3926 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3928 BUG_ON(IS_ERR(inode));
3929 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3930 list_move(&orphan->list, &inode->orphan_extents);
3933 level = btrfs_header_level(root->node);
3934 memset(wc->nodes, 0, sizeof(wc->nodes));
3935 wc->nodes[level] = &root_node;
3936 wc->active_node = level;
3937 wc->root_level = level;
3939 /* We may not have checked the root block, lets do that now */
3940 if (btrfs_is_leaf(root->node))
3941 status = btrfs_check_leaf(root, NULL, root->node);
3943 status = btrfs_check_node(root, NULL, root->node);
3944 if (status != BTRFS_TREE_BLOCK_CLEAN)
3947 if (btrfs_root_refs(root_item) > 0 ||
3948 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3949 path.nodes[level] = root->node;
3950 extent_buffer_get(root->node);
3951 path.slots[level] = 0;
3953 struct btrfs_key key;
3954 struct btrfs_disk_key found_key;
3956 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3957 level = root_item->drop_level;
3958 path.lowest_level = level;
3959 if (level > btrfs_header_level(root->node) ||
3960 level >= BTRFS_MAX_LEVEL) {
3961 error("ignoring invalid drop level: %u", level);
3964 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3967 btrfs_node_key(path.nodes[level], &found_key,
3969 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3970 sizeof(found_key)));
3974 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3980 wret = walk_up_tree(root, &path, wc, &level);
3987 btrfs_release_path(&path);
3989 if (!cache_tree_empty(&corrupt_blocks)) {
3990 struct cache_extent *cache;
3991 struct btrfs_corrupt_block *corrupt;
3993 printf("The following tree block(s) is corrupted in tree %llu:\n",
3994 root->root_key.objectid);
3995 cache = first_cache_extent(&corrupt_blocks);
3997 corrupt = container_of(cache,
3998 struct btrfs_corrupt_block,
4000 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4001 cache->start, corrupt->level,
4002 corrupt->key.objectid, corrupt->key.type,
4003 corrupt->key.offset);
4004 cache = next_cache_extent(cache);
4007 printf("Try to repair the btree for root %llu\n",
4008 root->root_key.objectid);
4009 ret = repair_btree(root, &corrupt_blocks);
4011 fprintf(stderr, "Failed to repair btree: %s\n",
4014 printf("Btree for root %llu is fixed\n",
4015 root->root_key.objectid);
4019 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4023 if (root_node.current) {
4024 root_node.current->checked = 1;
4025 maybe_free_inode_rec(&root_node.inode_cache,
4029 err = check_inode_recs(root, &root_node.inode_cache);
4033 free_corrupt_blocks_tree(&corrupt_blocks);
4034 root->fs_info->corrupt_blocks = NULL;
4035 free_orphan_data_extents(&root->orphan_data_extents);
4039 static int fs_root_objectid(u64 objectid)
4041 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4042 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4044 return is_fstree(objectid);
4047 static int check_fs_roots(struct btrfs_root *root,
4048 struct cache_tree *root_cache)
4050 struct btrfs_path path;
4051 struct btrfs_key key;
4052 struct walk_control wc;
4053 struct extent_buffer *leaf, *tree_node;
4054 struct btrfs_root *tmp_root;
4055 struct btrfs_root *tree_root = root->fs_info->tree_root;
4059 if (ctx.progress_enabled) {
4060 ctx.tp = TASK_FS_ROOTS;
4061 task_start(ctx.info);
4065 * Just in case we made any changes to the extent tree that weren't
4066 * reflected into the free space cache yet.
4069 reset_cached_block_groups(root->fs_info);
4070 memset(&wc, 0, sizeof(wc));
4071 cache_tree_init(&wc.shared);
4072 btrfs_init_path(&path);
4077 key.type = BTRFS_ROOT_ITEM_KEY;
4078 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4083 tree_node = tree_root->node;
4085 if (tree_node != tree_root->node) {
4086 free_root_recs_tree(root_cache);
4087 btrfs_release_path(&path);
4090 leaf = path.nodes[0];
4091 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4092 ret = btrfs_next_leaf(tree_root, &path);
4098 leaf = path.nodes[0];
4100 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4101 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4102 fs_root_objectid(key.objectid)) {
4103 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4104 tmp_root = btrfs_read_fs_root_no_cache(
4105 root->fs_info, &key);
4107 key.offset = (u64)-1;
4108 tmp_root = btrfs_read_fs_root(
4109 root->fs_info, &key);
4111 if (IS_ERR(tmp_root)) {
4115 ret = check_fs_root(tmp_root, root_cache, &wc);
4116 if (ret == -EAGAIN) {
4117 free_root_recs_tree(root_cache);
4118 btrfs_release_path(&path);
4123 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4124 btrfs_free_fs_root(tmp_root);
4125 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4126 key.type == BTRFS_ROOT_BACKREF_KEY) {
4127 process_root_ref(leaf, path.slots[0], &key,
4134 btrfs_release_path(&path);
4136 free_extent_cache_tree(&wc.shared);
4137 if (!cache_tree_empty(&wc.shared))
4138 fprintf(stderr, "warning line %d\n", __LINE__);
4140 task_stop(ctx.info);
4146 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4147 * INODE_REF/INODE_EXTREF match.
4149 * @root: the root of the fs/file tree
4150 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4151 * @key: the key of the DIR_ITEM/DIR_INDEX
4152 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4153 * distinguish root_dir between normal dir/file
4154 * @name: the name in the INODE_REF/INODE_EXTREF
4155 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4156 * @mode: the st_mode of INODE_ITEM
4158 * Return 0 if no error occurred.
4159 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4160 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4162 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4163 * not match for normal dir/file.
4165 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4166 struct btrfs_key *key, u64 index, char *name,
4167 u32 namelen, u32 mode)
4169 struct btrfs_path path;
4170 struct extent_buffer *node;
4171 struct btrfs_dir_item *di;
4172 struct btrfs_key location;
4173 char namebuf[BTRFS_NAME_LEN] = {0};
4183 btrfs_init_path(&path);
4184 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4186 ret = DIR_ITEM_MISSING;
4190 /* Process root dir and goto out*/
4193 ret = ROOT_DIR_ERROR;
4195 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4197 ref_key->type == BTRFS_INODE_REF_KEY ?
4199 ref_key->objectid, ref_key->offset,
4200 key->type == BTRFS_DIR_ITEM_KEY ?
4201 "DIR_ITEM" : "DIR_INDEX");
4209 /* Process normal file/dir */
4211 ret = DIR_ITEM_MISSING;
4213 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4215 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4216 ref_key->objectid, ref_key->offset,
4217 key->type == BTRFS_DIR_ITEM_KEY ?
4218 "DIR_ITEM" : "DIR_INDEX",
4219 key->objectid, key->offset, namelen, name,
4220 imode_to_type(mode));
4224 /* Check whether inode_id/filetype/name match */
4225 node = path.nodes[0];
4226 slot = path.slots[0];
4227 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4228 total = btrfs_item_size_nr(node, slot);
4229 while (cur < total) {
4230 ret = DIR_ITEM_MISMATCH;
4231 name_len = btrfs_dir_name_len(node, di);
4232 data_len = btrfs_dir_data_len(node, di);
4234 btrfs_dir_item_key_to_cpu(node, di, &location);
4235 if (location.objectid != ref_key->objectid ||
4236 location.type != BTRFS_INODE_ITEM_KEY ||
4237 location.offset != 0)
4240 filetype = btrfs_dir_type(node, di);
4241 if (imode_to_type(mode) != filetype)
4244 if (cur + sizeof(*di) + name_len > total ||
4245 name_len > BTRFS_NAME_LEN) {
4246 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4248 key->type == BTRFS_DIR_ITEM_KEY ?
4249 "DIR_ITEM" : "DIR_INDEX",
4250 key->objectid, key->offset, name_len);
4252 if (cur + sizeof(*di) > total)
4254 len = min_t(u32, total - cur - sizeof(*di),
4260 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4261 if (len != namelen || strncmp(namebuf, name, len))
4267 len = sizeof(*di) + name_len + data_len;
4268 di = (struct btrfs_dir_item *)((char *)di + len);
4271 if (ret == DIR_ITEM_MISMATCH)
4273 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4275 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4276 ref_key->objectid, ref_key->offset,
4277 key->type == BTRFS_DIR_ITEM_KEY ?
4278 "DIR_ITEM" : "DIR_INDEX",
4279 key->objectid, key->offset, namelen, name,
4280 imode_to_type(mode));
4282 btrfs_release_path(&path);
4287 * Traverse the given INODE_REF and call find_dir_item() to find related
4288 * DIR_ITEM/DIR_INDEX.
4290 * @root: the root of the fs/file tree
4291 * @ref_key: the key of the INODE_REF
4292 * @refs: the count of INODE_REF
4293 * @mode: the st_mode of INODE_ITEM
4295 * Return 0 if no error occurred.
4297 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4298 struct extent_buffer *node, int slot, u64 *refs,
4301 struct btrfs_key key;
4302 struct btrfs_inode_ref *ref;
4303 char namebuf[BTRFS_NAME_LEN] = {0};
4311 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4312 total = btrfs_item_size_nr(node, slot);
4315 /* Update inode ref count */
4318 index = btrfs_inode_ref_index(node, ref);
4319 name_len = btrfs_inode_ref_name_len(node, ref);
4320 if (cur + sizeof(*ref) + name_len > total ||
4321 name_len > BTRFS_NAME_LEN) {
4322 warning("root %llu INODE_REF[%llu %llu] name too long",
4323 root->objectid, ref_key->objectid, ref_key->offset);
4325 if (total < cur + sizeof(*ref))
4327 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4332 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4334 /* Check root dir ref name */
4335 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4336 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4337 root->objectid, ref_key->objectid, ref_key->offset,
4339 err |= ROOT_DIR_ERROR;
4342 /* Find related DIR_INDEX */
4343 key.objectid = ref_key->offset;
4344 key.type = BTRFS_DIR_INDEX_KEY;
4346 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4349 /* Find related dir_item */
4350 key.objectid = ref_key->offset;
4351 key.type = BTRFS_DIR_ITEM_KEY;
4352 key.offset = btrfs_name_hash(namebuf, len);
4353 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356 len = sizeof(*ref) + name_len;
4357 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4367 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4368 * DIR_ITEM/DIR_INDEX.
4370 * @root: the root of the fs/file tree
4371 * @ref_key: the key of the INODE_EXTREF
4372 * @refs: the count of INODE_EXTREF
4373 * @mode: the st_mode of INODE_ITEM
4375 * Return 0 if no error occurred.
4377 static int check_inode_extref(struct btrfs_root *root,
4378 struct btrfs_key *ref_key,
4379 struct extent_buffer *node, int slot, u64 *refs,
4382 struct btrfs_key key;
4383 struct btrfs_inode_extref *extref;
4384 char namebuf[BTRFS_NAME_LEN] = {0};
4394 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4395 total = btrfs_item_size_nr(node, slot);
4398 /* update inode ref count */
4400 name_len = btrfs_inode_extref_name_len(node, extref);
4401 index = btrfs_inode_extref_index(node, extref);
4402 parent = btrfs_inode_extref_parent(node, extref);
4403 if (name_len <= BTRFS_NAME_LEN) {
4406 len = BTRFS_NAME_LEN;
4407 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4408 root->objectid, ref_key->objectid, ref_key->offset);
4410 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4412 /* Check root dir ref name */
4413 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4414 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4415 root->objectid, ref_key->objectid, ref_key->offset,
4417 err |= ROOT_DIR_ERROR;
4420 /* find related dir_index */
4421 key.objectid = parent;
4422 key.type = BTRFS_DIR_INDEX_KEY;
4424 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4427 /* find related dir_item */
4428 key.objectid = parent;
4429 key.type = BTRFS_DIR_ITEM_KEY;
4430 key.offset = btrfs_name_hash(namebuf, len);
4431 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434 len = sizeof(*extref) + name_len;
4435 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4445 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4446 * DIR_ITEM/DIR_INDEX match.
4448 * @root: the root of the fs/file tree
4449 * @key: the key of the INODE_REF/INODE_EXTREF
4450 * @name: the name in the INODE_REF/INODE_EXTREF
4451 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4452 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4454 * @ext_ref: the EXTENDED_IREF feature
4456 * Return 0 if no error occurred.
4457 * Return >0 for error bitmap
4459 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4460 char *name, int namelen, u64 index,
4461 unsigned int ext_ref)
4463 struct btrfs_path path;
4464 struct btrfs_inode_ref *ref;
4465 struct btrfs_inode_extref *extref;
4466 struct extent_buffer *node;
4467 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4478 btrfs_init_path(&path);
4479 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4481 ret = INODE_REF_MISSING;
4485 node = path.nodes[0];
4486 slot = path.slots[0];
4488 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4489 total = btrfs_item_size_nr(node, slot);
4491 /* Iterate all entry of INODE_REF */
4492 while (cur < total) {
4493 ret = INODE_REF_MISSING;
4495 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4496 ref_index = btrfs_inode_ref_index(node, ref);
4497 if (index != (u64)-1 && index != ref_index)
4500 if (cur + sizeof(*ref) + ref_namelen > total ||
4501 ref_namelen > BTRFS_NAME_LEN) {
4502 warning("root %llu INODE %s[%llu %llu] name too long",
4504 key->type == BTRFS_INODE_REF_KEY ?
4506 key->objectid, key->offset);
4508 if (cur + sizeof(*ref) > total)
4510 len = min_t(u32, total - cur - sizeof(*ref),
4516 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4519 if (len != namelen || strncmp(ref_namebuf, name, len))
4525 len = sizeof(*ref) + ref_namelen;
4526 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4531 /* Skip if not support EXTENDED_IREF feature */
4535 btrfs_release_path(&path);
4536 btrfs_init_path(&path);
4538 dir_id = key->offset;
4539 key->type = BTRFS_INODE_EXTREF_KEY;
4540 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4542 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4544 ret = INODE_REF_MISSING;
4548 node = path.nodes[0];
4549 slot = path.slots[0];
4551 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4553 total = btrfs_item_size_nr(node, slot);
4555 /* Iterate all entry of INODE_EXTREF */
4556 while (cur < total) {
4557 ret = INODE_REF_MISSING;
4559 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4560 ref_index = btrfs_inode_extref_index(node, extref);
4561 parent = btrfs_inode_extref_parent(node, extref);
4562 if (index != (u64)-1 && index != ref_index)
4565 if (parent != dir_id)
4568 if (ref_namelen <= BTRFS_NAME_LEN) {
4571 len = BTRFS_NAME_LEN;
4572 warning("root %llu INODE %s[%llu %llu] name too long",
4574 key->type == BTRFS_INODE_REF_KEY ?
4576 key->objectid, key->offset);
4578 read_extent_buffer(node, ref_namebuf,
4579 (unsigned long)(extref + 1), len);
4581 if (len != namelen || strncmp(ref_namebuf, name, len))
4588 len = sizeof(*extref) + ref_namelen;
4589 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4594 btrfs_release_path(&path);
4599 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4600 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4602 * @root: the root of the fs/file tree
4603 * @key: the key of the INODE_REF/INODE_EXTREF
4604 * @size: the st_size of the INODE_ITEM
4605 * @ext_ref: the EXTENDED_IREF feature
4607 * Return 0 if no error occurred.
4609 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4610 struct extent_buffer *node, int slot, u64 *size,
4611 unsigned int ext_ref)
4613 struct btrfs_dir_item *di;
4614 struct btrfs_inode_item *ii;
4615 struct btrfs_path path;
4616 struct btrfs_key location;
4617 char namebuf[BTRFS_NAME_LEN] = {0};
4630 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4631 * ignore index check.
4633 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4635 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4636 total = btrfs_item_size_nr(node, slot);
4638 while (cur < total) {
4639 data_len = btrfs_dir_data_len(node, di);
4641 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4642 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643 "DIR_ITEM" : "DIR_INDEX",
4644 key->objectid, key->offset, data_len);
4646 name_len = btrfs_dir_name_len(node, di);
4647 if (cur + sizeof(*di) + name_len > total ||
4648 name_len > BTRFS_NAME_LEN) {
4649 warning("root %llu %s[%llu %llu] name too long",
4651 key->type == BTRFS_DIR_ITEM_KEY ?
4652 "DIR_ITEM" : "DIR_INDEX",
4653 key->objectid, key->offset);
4655 if (cur + sizeof(*di) > total)
4657 len = min_t(u32, total - cur - sizeof(*di),
4662 (*size) += name_len;
4664 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4665 filetype = btrfs_dir_type(node, di);
4667 btrfs_init_path(&path);
4668 btrfs_dir_item_key_to_cpu(node, di, &location);
4670 /* Ignore related ROOT_ITEM check */
4671 if (location.type == BTRFS_ROOT_ITEM_KEY)
4674 /* Check relative INODE_ITEM(existence/filetype) */
4675 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4677 err |= INODE_ITEM_MISSING;
4678 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4679 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4680 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4681 key->offset, location.objectid, name_len,
4686 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4687 struct btrfs_inode_item);
4688 mode = btrfs_inode_mode(path.nodes[0], ii);
4690 if (imode_to_type(mode) != filetype) {
4691 err |= INODE_ITEM_MISMATCH;
4692 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4693 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4694 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4695 key->offset, name_len, namebuf, filetype);
4698 /* Check relative INODE_REF/INODE_EXTREF */
4699 location.type = BTRFS_INODE_REF_KEY;
4700 location.offset = key->objectid;
4701 ret = find_inode_ref(root, &location, namebuf, len,
4704 if (ret & INODE_REF_MISSING)
4705 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4706 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4707 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4708 key->offset, name_len, namebuf, filetype);
4711 btrfs_release_path(&path);
4712 len = sizeof(*di) + name_len + data_len;
4713 di = (struct btrfs_dir_item *)((char *)di + len);
4716 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4717 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4718 root->objectid, key->objectid, key->offset);
4727 * Check file extent datasum/hole, update the size of the file extents,
4728 * check and update the last offset of the file extent.
4730 * @root: the root of fs/file tree.
4731 * @fkey: the key of the file extent.
4732 * @nodatasum: INODE_NODATASUM feature.
4733 * @size: the sum of all EXTENT_DATA items size for this inode.
4734 * @end: the offset of the last extent.
4736 * Return 0 if no error occurred.
4738 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4739 struct extent_buffer *node, int slot,
4740 unsigned int nodatasum, u64 *size, u64 *end)
4742 struct btrfs_file_extent_item *fi;
4745 u64 extent_num_bytes;
4747 u64 csum_found; /* In byte size, sectorsize aligned */
4748 u64 search_start; /* Logical range start we search for csum */
4749 u64 search_len; /* Logical range len we search for csum */
4750 unsigned int extent_type;
4751 unsigned int is_hole;
4756 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4758 /* Check inline extent */
4759 extent_type = btrfs_file_extent_type(node, fi);
4760 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4761 struct btrfs_item *e = btrfs_item_nr(slot);
4762 u32 item_inline_len;
4764 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4765 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4766 compressed = btrfs_file_extent_compression(node, fi);
4767 if (extent_num_bytes == 0) {
4769 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4770 root->objectid, fkey->objectid, fkey->offset);
4771 err |= FILE_EXTENT_ERROR;
4773 if (!compressed && extent_num_bytes != item_inline_len) {
4775 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4776 root->objectid, fkey->objectid, fkey->offset,
4777 extent_num_bytes, item_inline_len);
4778 err |= FILE_EXTENT_ERROR;
4780 *size += extent_num_bytes;
4784 /* Check extent type */
4785 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4786 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4787 err |= FILE_EXTENT_ERROR;
4788 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4789 root->objectid, fkey->objectid, fkey->offset);
4793 /* Check REG_EXTENT/PREALLOC_EXTENT */
4794 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4795 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4796 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4797 extent_offset = btrfs_file_extent_offset(node, fi);
4798 compressed = btrfs_file_extent_compression(node, fi);
4799 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4802 * Check EXTENT_DATA csum
4804 * For plain (uncompressed) extent, we should only check the range
4805 * we're referring to, as it's possible that part of prealloc extent
4806 * has been written, and has csum:
4808 * |<--- Original large preallocated extent A ---->|
4809 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4812 * For compressed extent, we should check the whole range.
4815 search_start = disk_bytenr + extent_offset;
4816 search_len = extent_num_bytes;
4818 search_start = disk_bytenr;
4819 search_len = disk_num_bytes;
4821 ret = count_csum_range(root, search_start, search_len, &csum_found);
4822 if (csum_found > 0 && nodatasum) {
4823 err |= ODD_CSUM_ITEM;
4824 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4825 root->objectid, fkey->objectid, fkey->offset);
4826 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4827 !is_hole && (ret < 0 || csum_found < search_len)) {
4828 err |= CSUM_ITEM_MISSING;
4829 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4830 root->objectid, fkey->objectid, fkey->offset,
4831 csum_found, search_len);
4832 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4833 err |= ODD_CSUM_ITEM;
4834 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4835 root->objectid, fkey->objectid, fkey->offset, csum_found);
4838 /* Check EXTENT_DATA hole */
4839 if (!no_holes && *end != fkey->offset) {
4840 err |= FILE_EXTENT_ERROR;
4841 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4842 root->objectid, fkey->objectid, fkey->offset);
4845 *end += extent_num_bytes;
4847 *size += extent_num_bytes;
4853 * Check INODE_ITEM and related ITEMs (the same inode number)
4854 * 1. check link count
4855 * 2. check inode ref/extref
4856 * 3. check dir item/index
4858 * @ext_ref: the EXTENDED_IREF feature
4860 * Return 0 if no error occurred.
4861 * Return >0 for error or hit the traversal is done(by error bitmap)
4863 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4864 unsigned int ext_ref)
4866 struct extent_buffer *node;
4867 struct btrfs_inode_item *ii;
4868 struct btrfs_key key;
4877 u64 extent_size = 0;
4879 unsigned int nodatasum;
4884 node = path->nodes[0];
4885 slot = path->slots[0];
4887 btrfs_item_key_to_cpu(node, &key, slot);
4888 inode_id = key.objectid;
4890 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4891 ret = btrfs_next_item(root, path);
4897 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4898 isize = btrfs_inode_size(node, ii);
4899 nbytes = btrfs_inode_nbytes(node, ii);
4900 mode = btrfs_inode_mode(node, ii);
4901 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4902 nlink = btrfs_inode_nlink(node, ii);
4903 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4906 ret = btrfs_next_item(root, path);
4908 /* out will fill 'err' rusing current statistics */
4910 } else if (ret > 0) {
4915 node = path->nodes[0];
4916 slot = path->slots[0];
4917 btrfs_item_key_to_cpu(node, &key, slot);
4918 if (key.objectid != inode_id)
4922 case BTRFS_INODE_REF_KEY:
4923 ret = check_inode_ref(root, &key, node, slot, &refs,
4927 case BTRFS_INODE_EXTREF_KEY:
4928 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4929 warning("root %llu EXTREF[%llu %llu] isn't supported",
4930 root->objectid, key.objectid,
4932 ret = check_inode_extref(root, &key, node, slot, &refs,
4936 case BTRFS_DIR_ITEM_KEY:
4937 case BTRFS_DIR_INDEX_KEY:
4939 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4940 root->objectid, inode_id,
4941 imode_to_type(mode), key.objectid,
4944 ret = check_dir_item(root, &key, node, slot, &size,
4948 case BTRFS_EXTENT_DATA_KEY:
4950 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4951 root->objectid, inode_id, key.objectid,
4954 ret = check_file_extent(root, &key, node, slot,
4955 nodatasum, &extent_size,
4959 case BTRFS_XATTR_ITEM_KEY:
4962 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4963 key.objectid, key.type, key.offset);
4968 /* verify INODE_ITEM nlink/isize/nbytes */
4971 err |= LINK_COUNT_ERROR;
4972 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4973 root->objectid, inode_id, nlink);
4977 * Just a warning, as dir inode nbytes is just an
4978 * instructive value.
4980 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4981 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4982 root->objectid, inode_id, root->nodesize);
4985 if (isize != size) {
4987 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4988 root->objectid, inode_id, isize, size);
4991 if (nlink != refs) {
4992 err |= LINK_COUNT_ERROR;
4993 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4994 root->objectid, inode_id, nlink, refs);
4995 } else if (!nlink) {
4999 if (!nbytes && !no_holes && extent_end < isize) {
5000 err |= NBYTES_ERROR;
5001 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5002 root->objectid, inode_id, isize);
5005 if (nbytes != extent_size) {
5006 err |= NBYTES_ERROR;
5007 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5008 root->objectid, inode_id, nbytes, extent_size);
5015 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5017 struct btrfs_path path;
5018 struct btrfs_key key;
5022 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5023 key.type = BTRFS_INODE_ITEM_KEY;
5026 /* For root being dropped, we don't need to check first inode */
5027 if (btrfs_root_refs(&root->root_item) == 0 &&
5028 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5032 btrfs_init_path(&path);
5034 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5039 err |= INODE_ITEM_MISSING;
5040 error("first inode item of root %llu is missing",
5044 err |= check_inode_item(root, &path, ext_ref);
5049 btrfs_release_path(&path);
5054 * Iterate all item on the tree and call check_inode_item() to check.
5056 * @root: the root of the tree to be checked.
5057 * @ext_ref: the EXTENDED_IREF feature
5059 * Return 0 if no error found.
5060 * Return <0 for error.
5062 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5064 struct btrfs_path path;
5065 struct node_refs nrefs;
5066 struct btrfs_root_item *root_item = &root->root_item;
5072 * We need to manually check the first inode item(256)
5073 * As the following traversal function will only start from
5074 * the first inode item in the leaf, if inode item(256) is missing
5075 * we will just skip it forever.
5077 ret = check_fs_first_inode(root, ext_ref);
5081 memset(&nrefs, 0, sizeof(nrefs));
5082 level = btrfs_header_level(root->node);
5083 btrfs_init_path(&path);
5085 if (btrfs_root_refs(root_item) > 0 ||
5086 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5087 path.nodes[level] = root->node;
5088 path.slots[level] = 0;
5089 extent_buffer_get(root->node);
5091 struct btrfs_key key;
5093 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5094 level = root_item->drop_level;
5095 path.lowest_level = level;
5096 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5103 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5106 /* if ret is negative, walk shall stop */
5112 ret = walk_up_tree_v2(root, &path, &level);
5114 /* Normal exit, reset ret to err */
5121 btrfs_release_path(&path);
5126 * Find the relative ref for root_ref and root_backref.
5128 * @root: the root of the root tree.
5129 * @ref_key: the key of the root ref.
5131 * Return 0 if no error occurred.
5133 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5134 struct extent_buffer *node, int slot)
5136 struct btrfs_path path;
5137 struct btrfs_key key;
5138 struct btrfs_root_ref *ref;
5139 struct btrfs_root_ref *backref;
5140 char ref_name[BTRFS_NAME_LEN] = {0};
5141 char backref_name[BTRFS_NAME_LEN] = {0};
5147 u32 backref_namelen;
5152 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5153 ref_dirid = btrfs_root_ref_dirid(node, ref);
5154 ref_seq = btrfs_root_ref_sequence(node, ref);
5155 ref_namelen = btrfs_root_ref_name_len(node, ref);
5157 if (ref_namelen <= BTRFS_NAME_LEN) {
5160 len = BTRFS_NAME_LEN;
5161 warning("%s[%llu %llu] ref_name too long",
5162 ref_key->type == BTRFS_ROOT_REF_KEY ?
5163 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5166 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5168 /* Find relative root_ref */
5169 key.objectid = ref_key->offset;
5170 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5171 key.offset = ref_key->objectid;
5173 btrfs_init_path(&path);
5174 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5176 err |= ROOT_REF_MISSING;
5177 error("%s[%llu %llu] couldn't find relative ref",
5178 ref_key->type == BTRFS_ROOT_REF_KEY ?
5179 "ROOT_REF" : "ROOT_BACKREF",
5180 ref_key->objectid, ref_key->offset);
5184 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5185 struct btrfs_root_ref);
5186 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5187 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5188 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5190 if (backref_namelen <= BTRFS_NAME_LEN) {
5191 len = backref_namelen;
5193 len = BTRFS_NAME_LEN;
5194 warning("%s[%llu %llu] ref_name too long",
5195 key.type == BTRFS_ROOT_REF_KEY ?
5196 "ROOT_REF" : "ROOT_BACKREF",
5197 key.objectid, key.offset);
5199 read_extent_buffer(path.nodes[0], backref_name,
5200 (unsigned long)(backref + 1), len);
5202 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5203 ref_namelen != backref_namelen ||
5204 strncmp(ref_name, backref_name, len)) {
5205 err |= ROOT_REF_MISMATCH;
5206 error("%s[%llu %llu] mismatch relative ref",
5207 ref_key->type == BTRFS_ROOT_REF_KEY ?
5208 "ROOT_REF" : "ROOT_BACKREF",
5209 ref_key->objectid, ref_key->offset);
5212 btrfs_release_path(&path);
5217 * Check all fs/file tree in low_memory mode.
5219 * 1. for fs tree root item, call check_fs_root_v2()
5220 * 2. for fs tree root ref/backref, call check_root_ref()
5222 * Return 0 if no error occurred.
5224 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5226 struct btrfs_root *tree_root = fs_info->tree_root;
5227 struct btrfs_root *cur_root = NULL;
5228 struct btrfs_path path;
5229 struct btrfs_key key;
5230 struct extent_buffer *node;
5231 unsigned int ext_ref;
5236 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5238 btrfs_init_path(&path);
5239 key.objectid = BTRFS_FS_TREE_OBJECTID;
5241 key.type = BTRFS_ROOT_ITEM_KEY;
5243 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5247 } else if (ret > 0) {
5253 node = path.nodes[0];
5254 slot = path.slots[0];
5255 btrfs_item_key_to_cpu(node, &key, slot);
5256 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5258 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5259 fs_root_objectid(key.objectid)) {
5260 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5261 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5264 key.offset = (u64)-1;
5265 cur_root = btrfs_read_fs_root(fs_info, &key);
5268 if (IS_ERR(cur_root)) {
5269 error("Fail to read fs/subvol tree: %lld",
5275 ret = check_fs_root_v2(cur_root, ext_ref);
5278 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5279 btrfs_free_fs_root(cur_root);
5280 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5281 key.type == BTRFS_ROOT_BACKREF_KEY) {
5282 ret = check_root_ref(tree_root, &key, node, slot);
5286 ret = btrfs_next_item(tree_root, &path);
5296 btrfs_release_path(&path);
5300 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5302 struct list_head *cur = rec->backrefs.next;
5303 struct extent_backref *back;
5304 struct tree_backref *tback;
5305 struct data_backref *dback;
5309 while(cur != &rec->backrefs) {
5310 back = to_extent_backref(cur);
5312 if (!back->found_extent_tree) {
5316 if (back->is_data) {
5317 dback = to_data_backref(back);
5318 fprintf(stderr, "Backref %llu %s %llu"
5319 " owner %llu offset %llu num_refs %lu"
5320 " not found in extent tree\n",
5321 (unsigned long long)rec->start,
5322 back->full_backref ?
5324 back->full_backref ?
5325 (unsigned long long)dback->parent:
5326 (unsigned long long)dback->root,
5327 (unsigned long long)dback->owner,
5328 (unsigned long long)dback->offset,
5329 (unsigned long)dback->num_refs);
5331 tback = to_tree_backref(back);
5332 fprintf(stderr, "Backref %llu parent %llu"
5333 " root %llu not found in extent tree\n",
5334 (unsigned long long)rec->start,
5335 (unsigned long long)tback->parent,
5336 (unsigned long long)tback->root);
5339 if (!back->is_data && !back->found_ref) {
5343 tback = to_tree_backref(back);
5344 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5345 (unsigned long long)rec->start,
5346 back->full_backref ? "parent" : "root",
5347 back->full_backref ?
5348 (unsigned long long)tback->parent :
5349 (unsigned long long)tback->root, back);
5351 if (back->is_data) {
5352 dback = to_data_backref(back);
5353 if (dback->found_ref != dback->num_refs) {
5357 fprintf(stderr, "Incorrect local backref count"
5358 " on %llu %s %llu owner %llu"
5359 " offset %llu found %u wanted %u back %p\n",
5360 (unsigned long long)rec->start,
5361 back->full_backref ?
5363 back->full_backref ?
5364 (unsigned long long)dback->parent:
5365 (unsigned long long)dback->root,
5366 (unsigned long long)dback->owner,
5367 (unsigned long long)dback->offset,
5368 dback->found_ref, dback->num_refs, back);
5370 if (dback->disk_bytenr != rec->start) {
5374 fprintf(stderr, "Backref disk bytenr does not"
5375 " match extent record, bytenr=%llu, "
5376 "ref bytenr=%llu\n",
5377 (unsigned long long)rec->start,
5378 (unsigned long long)dback->disk_bytenr);
5381 if (dback->bytes != rec->nr) {
5385 fprintf(stderr, "Backref bytes do not match "
5386 "extent backref, bytenr=%llu, ref "
5387 "bytes=%llu, backref bytes=%llu\n",
5388 (unsigned long long)rec->start,
5389 (unsigned long long)rec->nr,
5390 (unsigned long long)dback->bytes);
5393 if (!back->is_data) {
5396 dback = to_data_backref(back);
5397 found += dback->found_ref;
5400 if (found != rec->refs) {
5404 fprintf(stderr, "Incorrect global backref count "
5405 "on %llu found %llu wanted %llu\n",
5406 (unsigned long long)rec->start,
5407 (unsigned long long)found,
5408 (unsigned long long)rec->refs);
5414 static int free_all_extent_backrefs(struct extent_record *rec)
5416 struct extent_backref *back;
5417 struct list_head *cur;
5418 while (!list_empty(&rec->backrefs)) {
5419 cur = rec->backrefs.next;
5420 back = to_extent_backref(cur);
5427 static void free_extent_record_cache(struct cache_tree *extent_cache)
5429 struct cache_extent *cache;
5430 struct extent_record *rec;
5433 cache = first_cache_extent(extent_cache);
5436 rec = container_of(cache, struct extent_record, cache);
5437 remove_cache_extent(extent_cache, cache);
5438 free_all_extent_backrefs(rec);
5443 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5444 struct extent_record *rec)
5446 if (rec->content_checked && rec->owner_ref_checked &&
5447 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5448 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5449 !rec->bad_full_backref && !rec->crossing_stripes &&
5450 !rec->wrong_chunk_type) {
5451 remove_cache_extent(extent_cache, &rec->cache);
5452 free_all_extent_backrefs(rec);
5453 list_del_init(&rec->list);
5459 static int check_owner_ref(struct btrfs_root *root,
5460 struct extent_record *rec,
5461 struct extent_buffer *buf)
5463 struct extent_backref *node;
5464 struct tree_backref *back;
5465 struct btrfs_root *ref_root;
5466 struct btrfs_key key;
5467 struct btrfs_path path;
5468 struct extent_buffer *parent;
5473 list_for_each_entry(node, &rec->backrefs, list) {
5476 if (!node->found_ref)
5478 if (node->full_backref)
5480 back = to_tree_backref(node);
5481 if (btrfs_header_owner(buf) == back->root)
5484 BUG_ON(rec->is_root);
5486 /* try to find the block by search corresponding fs tree */
5487 key.objectid = btrfs_header_owner(buf);
5488 key.type = BTRFS_ROOT_ITEM_KEY;
5489 key.offset = (u64)-1;
5491 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5492 if (IS_ERR(ref_root))
5495 level = btrfs_header_level(buf);
5497 btrfs_item_key_to_cpu(buf, &key, 0);
5499 btrfs_node_key_to_cpu(buf, &key, 0);
5501 btrfs_init_path(&path);
5502 path.lowest_level = level + 1;
5503 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5507 parent = path.nodes[level + 1];
5508 if (parent && buf->start == btrfs_node_blockptr(parent,
5509 path.slots[level + 1]))
5512 btrfs_release_path(&path);
5513 return found ? 0 : 1;
5516 static int is_extent_tree_record(struct extent_record *rec)
5518 struct list_head *cur = rec->backrefs.next;
5519 struct extent_backref *node;
5520 struct tree_backref *back;
5523 while(cur != &rec->backrefs) {
5524 node = to_extent_backref(cur);
5528 back = to_tree_backref(node);
5529 if (node->full_backref)
5531 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5538 static int record_bad_block_io(struct btrfs_fs_info *info,
5539 struct cache_tree *extent_cache,
5542 struct extent_record *rec;
5543 struct cache_extent *cache;
5544 struct btrfs_key key;
5546 cache = lookup_cache_extent(extent_cache, start, len);
5550 rec = container_of(cache, struct extent_record, cache);
5551 if (!is_extent_tree_record(rec))
5554 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5555 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5558 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5559 struct extent_buffer *buf, int slot)
5561 if (btrfs_header_level(buf)) {
5562 struct btrfs_key_ptr ptr1, ptr2;
5564 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5565 sizeof(struct btrfs_key_ptr));
5566 read_extent_buffer(buf, &ptr2,
5567 btrfs_node_key_ptr_offset(slot + 1),
5568 sizeof(struct btrfs_key_ptr));
5569 write_extent_buffer(buf, &ptr1,
5570 btrfs_node_key_ptr_offset(slot + 1),
5571 sizeof(struct btrfs_key_ptr));
5572 write_extent_buffer(buf, &ptr2,
5573 btrfs_node_key_ptr_offset(slot),
5574 sizeof(struct btrfs_key_ptr));
5576 struct btrfs_disk_key key;
5577 btrfs_node_key(buf, &key, 0);
5578 btrfs_fixup_low_keys(root, path, &key,
5579 btrfs_header_level(buf) + 1);
5582 struct btrfs_item *item1, *item2;
5583 struct btrfs_key k1, k2;
5584 char *item1_data, *item2_data;
5585 u32 item1_offset, item2_offset, item1_size, item2_size;
5587 item1 = btrfs_item_nr(slot);
5588 item2 = btrfs_item_nr(slot + 1);
5589 btrfs_item_key_to_cpu(buf, &k1, slot);
5590 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5591 item1_offset = btrfs_item_offset(buf, item1);
5592 item2_offset = btrfs_item_offset(buf, item2);
5593 item1_size = btrfs_item_size(buf, item1);
5594 item2_size = btrfs_item_size(buf, item2);
5596 item1_data = malloc(item1_size);
5599 item2_data = malloc(item2_size);
5605 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5606 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5608 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5609 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5613 btrfs_set_item_offset(buf, item1, item2_offset);
5614 btrfs_set_item_offset(buf, item2, item1_offset);
5615 btrfs_set_item_size(buf, item1, item2_size);
5616 btrfs_set_item_size(buf, item2, item1_size);
5618 path->slots[0] = slot;
5619 btrfs_set_item_key_unsafe(root, path, &k2);
5620 path->slots[0] = slot + 1;
5621 btrfs_set_item_key_unsafe(root, path, &k1);
5626 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5628 struct extent_buffer *buf;
5629 struct btrfs_key k1, k2;
5631 int level = path->lowest_level;
5634 buf = path->nodes[level];
5635 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5637 btrfs_node_key_to_cpu(buf, &k1, i);
5638 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5640 btrfs_item_key_to_cpu(buf, &k1, i);
5641 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5643 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5645 ret = swap_values(root, path, buf, i);
5648 btrfs_mark_buffer_dirty(buf);
5654 static int delete_bogus_item(struct btrfs_root *root,
5655 struct btrfs_path *path,
5656 struct extent_buffer *buf, int slot)
5658 struct btrfs_key key;
5659 int nritems = btrfs_header_nritems(buf);
5661 btrfs_item_key_to_cpu(buf, &key, slot);
5663 /* These are all the keys we can deal with missing. */
5664 if (key.type != BTRFS_DIR_INDEX_KEY &&
5665 key.type != BTRFS_EXTENT_ITEM_KEY &&
5666 key.type != BTRFS_METADATA_ITEM_KEY &&
5667 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5668 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5671 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5672 (unsigned long long)key.objectid, key.type,
5673 (unsigned long long)key.offset, slot, buf->start);
5674 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5675 btrfs_item_nr_offset(slot + 1),
5676 sizeof(struct btrfs_item) *
5677 (nritems - slot - 1));
5678 btrfs_set_header_nritems(buf, nritems - 1);
5680 struct btrfs_disk_key disk_key;
5682 btrfs_item_key(buf, &disk_key, 0);
5683 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5685 btrfs_mark_buffer_dirty(buf);
5689 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5691 struct extent_buffer *buf;
5695 /* We should only get this for leaves */
5696 BUG_ON(path->lowest_level);
5697 buf = path->nodes[0];
5699 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5700 unsigned int shift = 0, offset;
5702 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5703 BTRFS_LEAF_DATA_SIZE(root)) {
5704 if (btrfs_item_end_nr(buf, i) >
5705 BTRFS_LEAF_DATA_SIZE(root)) {
5706 ret = delete_bogus_item(root, path, buf, i);
5709 fprintf(stderr, "item is off the end of the "
5710 "leaf, can't fix\n");
5714 shift = BTRFS_LEAF_DATA_SIZE(root) -
5715 btrfs_item_end_nr(buf, i);
5716 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5717 btrfs_item_offset_nr(buf, i - 1)) {
5718 if (btrfs_item_end_nr(buf, i) >
5719 btrfs_item_offset_nr(buf, i - 1)) {
5720 ret = delete_bogus_item(root, path, buf, i);
5723 fprintf(stderr, "items overlap, can't fix\n");
5727 shift = btrfs_item_offset_nr(buf, i - 1) -
5728 btrfs_item_end_nr(buf, i);
5733 printf("Shifting item nr %d by %u bytes in block %llu\n",
5734 i, shift, (unsigned long long)buf->start);
5735 offset = btrfs_item_offset_nr(buf, i);
5736 memmove_extent_buffer(buf,
5737 btrfs_leaf_data(buf) + offset + shift,
5738 btrfs_leaf_data(buf) + offset,
5739 btrfs_item_size_nr(buf, i));
5740 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5742 btrfs_mark_buffer_dirty(buf);
5746 * We may have moved things, in which case we want to exit so we don't
5747 * write those changes out. Once we have proper abort functionality in
5748 * progs this can be changed to something nicer.
5755 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5756 * then just return -EIO.
5758 static int try_to_fix_bad_block(struct btrfs_root *root,
5759 struct extent_buffer *buf,
5760 enum btrfs_tree_block_status status)
5762 struct btrfs_trans_handle *trans;
5763 struct ulist *roots;
5764 struct ulist_node *node;
5765 struct btrfs_root *search_root;
5766 struct btrfs_path path;
5767 struct ulist_iterator iter;
5768 struct btrfs_key root_key, key;
5771 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5772 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5775 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5779 btrfs_init_path(&path);
5780 ULIST_ITER_INIT(&iter);
5781 while ((node = ulist_next(roots, &iter))) {
5782 root_key.objectid = node->val;
5783 root_key.type = BTRFS_ROOT_ITEM_KEY;
5784 root_key.offset = (u64)-1;
5786 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5793 trans = btrfs_start_transaction(search_root, 0);
5794 if (IS_ERR(trans)) {
5795 ret = PTR_ERR(trans);
5799 path.lowest_level = btrfs_header_level(buf);
5800 path.skip_check_block = 1;
5801 if (path.lowest_level)
5802 btrfs_node_key_to_cpu(buf, &key, 0);
5804 btrfs_item_key_to_cpu(buf, &key, 0);
5805 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5808 btrfs_commit_transaction(trans, search_root);
5811 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5812 ret = fix_key_order(search_root, &path);
5813 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5814 ret = fix_item_offset(search_root, &path);
5816 btrfs_commit_transaction(trans, search_root);
5819 btrfs_release_path(&path);
5820 btrfs_commit_transaction(trans, search_root);
5823 btrfs_release_path(&path);
5827 static int check_block(struct btrfs_root *root,
5828 struct cache_tree *extent_cache,
5829 struct extent_buffer *buf, u64 flags)
5831 struct extent_record *rec;
5832 struct cache_extent *cache;
5833 struct btrfs_key key;
5834 enum btrfs_tree_block_status status;
5838 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5841 rec = container_of(cache, struct extent_record, cache);
5842 rec->generation = btrfs_header_generation(buf);
5844 level = btrfs_header_level(buf);
5845 if (btrfs_header_nritems(buf) > 0) {
5848 btrfs_item_key_to_cpu(buf, &key, 0);
5850 btrfs_node_key_to_cpu(buf, &key, 0);
5852 rec->info_objectid = key.objectid;
5854 rec->info_level = level;
5856 if (btrfs_is_leaf(buf))
5857 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5859 status = btrfs_check_node(root, &rec->parent_key, buf);
5861 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5863 status = try_to_fix_bad_block(root, buf, status);
5864 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5866 fprintf(stderr, "bad block %llu\n",
5867 (unsigned long long)buf->start);
5870 * Signal to callers we need to start the scan over
5871 * again since we'll have cowed blocks.
5876 rec->content_checked = 1;
5877 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5878 rec->owner_ref_checked = 1;
5880 ret = check_owner_ref(root, rec, buf);
5882 rec->owner_ref_checked = 1;
5886 maybe_free_extent_rec(extent_cache, rec);
5890 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5891 u64 parent, u64 root)
5893 struct list_head *cur = rec->backrefs.next;
5894 struct extent_backref *node;
5895 struct tree_backref *back;
5897 while(cur != &rec->backrefs) {
5898 node = to_extent_backref(cur);
5902 back = to_tree_backref(node);
5904 if (!node->full_backref)
5906 if (parent == back->parent)
5909 if (node->full_backref)
5911 if (back->root == root)
5918 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5919 u64 parent, u64 root)
5921 struct tree_backref *ref = malloc(sizeof(*ref));
5925 memset(&ref->node, 0, sizeof(ref->node));
5927 ref->parent = parent;
5928 ref->node.full_backref = 1;
5931 ref->node.full_backref = 0;
5933 list_add_tail(&ref->node.list, &rec->backrefs);
5938 static struct data_backref *find_data_backref(struct extent_record *rec,
5939 u64 parent, u64 root,
5940 u64 owner, u64 offset,
5942 u64 disk_bytenr, u64 bytes)
5944 struct list_head *cur = rec->backrefs.next;
5945 struct extent_backref *node;
5946 struct data_backref *back;
5948 while(cur != &rec->backrefs) {
5949 node = to_extent_backref(cur);
5953 back = to_data_backref(node);
5955 if (!node->full_backref)
5957 if (parent == back->parent)
5960 if (node->full_backref)
5962 if (back->root == root && back->owner == owner &&
5963 back->offset == offset) {
5964 if (found_ref && node->found_ref &&
5965 (back->bytes != bytes ||
5966 back->disk_bytenr != disk_bytenr))
5975 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5976 u64 parent, u64 root,
5977 u64 owner, u64 offset,
5980 struct data_backref *ref = malloc(sizeof(*ref));
5984 memset(&ref->node, 0, sizeof(ref->node));
5985 ref->node.is_data = 1;
5988 ref->parent = parent;
5991 ref->node.full_backref = 1;
5995 ref->offset = offset;
5996 ref->node.full_backref = 0;
5998 ref->bytes = max_size;
6001 list_add_tail(&ref->node.list, &rec->backrefs);
6002 if (max_size > rec->max_size)
6003 rec->max_size = max_size;
6007 /* Check if the type of extent matches with its chunk */
6008 static void check_extent_type(struct extent_record *rec)
6010 struct btrfs_block_group_cache *bg_cache;
6012 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6016 /* data extent, check chunk directly*/
6017 if (!rec->metadata) {
6018 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6019 rec->wrong_chunk_type = 1;
6023 /* metadata extent, check the obvious case first */
6024 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6025 BTRFS_BLOCK_GROUP_METADATA))) {
6026 rec->wrong_chunk_type = 1;
6031 * Check SYSTEM extent, as it's also marked as metadata, we can only
6032 * make sure it's a SYSTEM extent by its backref
6034 if (!list_empty(&rec->backrefs)) {
6035 struct extent_backref *node;
6036 struct tree_backref *tback;
6039 node = to_extent_backref(rec->backrefs.next);
6040 if (node->is_data) {
6041 /* tree block shouldn't have data backref */
6042 rec->wrong_chunk_type = 1;
6045 tback = container_of(node, struct tree_backref, node);
6047 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6048 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6050 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6051 if (!(bg_cache->flags & bg_type))
6052 rec->wrong_chunk_type = 1;
6057 * Allocate a new extent record, fill default values from @tmpl and insert int
6058 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6059 * the cache, otherwise it fails.
6061 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6062 struct extent_record *tmpl)
6064 struct extent_record *rec;
6067 BUG_ON(tmpl->max_size == 0);
6068 rec = malloc(sizeof(*rec));
6071 rec->start = tmpl->start;
6072 rec->max_size = tmpl->max_size;
6073 rec->nr = max(tmpl->nr, tmpl->max_size);
6074 rec->found_rec = tmpl->found_rec;
6075 rec->content_checked = tmpl->content_checked;
6076 rec->owner_ref_checked = tmpl->owner_ref_checked;
6077 rec->num_duplicates = 0;
6078 rec->metadata = tmpl->metadata;
6079 rec->flag_block_full_backref = FLAG_UNSET;
6080 rec->bad_full_backref = 0;
6081 rec->crossing_stripes = 0;
6082 rec->wrong_chunk_type = 0;
6083 rec->is_root = tmpl->is_root;
6084 rec->refs = tmpl->refs;
6085 rec->extent_item_refs = tmpl->extent_item_refs;
6086 rec->parent_generation = tmpl->parent_generation;
6087 INIT_LIST_HEAD(&rec->backrefs);
6088 INIT_LIST_HEAD(&rec->dups);
6089 INIT_LIST_HEAD(&rec->list);
6090 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6091 rec->cache.start = tmpl->start;
6092 rec->cache.size = tmpl->nr;
6093 ret = insert_cache_extent(extent_cache, &rec->cache);
6098 bytes_used += rec->nr;
6101 rec->crossing_stripes = check_crossing_stripes(global_info,
6102 rec->start, global_info->tree_root->nodesize);
6103 check_extent_type(rec);
6108 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6110 * - refs - if found, increase refs
6111 * - is_root - if found, set
6112 * - content_checked - if found, set
6113 * - owner_ref_checked - if found, set
6115 * If not found, create a new one, initialize and insert.
6117 static int add_extent_rec(struct cache_tree *extent_cache,
6118 struct extent_record *tmpl)
6120 struct extent_record *rec;
6121 struct cache_extent *cache;
6125 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6127 rec = container_of(cache, struct extent_record, cache);
6131 rec->nr = max(tmpl->nr, tmpl->max_size);
6134 * We need to make sure to reset nr to whatever the extent
6135 * record says was the real size, this way we can compare it to
6138 if (tmpl->found_rec) {
6139 if (tmpl->start != rec->start || rec->found_rec) {
6140 struct extent_record *tmp;
6143 if (list_empty(&rec->list))
6144 list_add_tail(&rec->list,
6145 &duplicate_extents);
6148 * We have to do this song and dance in case we
6149 * find an extent record that falls inside of
6150 * our current extent record but does not have
6151 * the same objectid.
6153 tmp = malloc(sizeof(*tmp));
6156 tmp->start = tmpl->start;
6157 tmp->max_size = tmpl->max_size;
6160 tmp->metadata = tmpl->metadata;
6161 tmp->extent_item_refs = tmpl->extent_item_refs;
6162 INIT_LIST_HEAD(&tmp->list);
6163 list_add_tail(&tmp->list, &rec->dups);
6164 rec->num_duplicates++;
6171 if (tmpl->extent_item_refs && !dup) {
6172 if (rec->extent_item_refs) {
6173 fprintf(stderr, "block %llu rec "
6174 "extent_item_refs %llu, passed %llu\n",
6175 (unsigned long long)tmpl->start,
6176 (unsigned long long)
6177 rec->extent_item_refs,
6178 (unsigned long long)tmpl->extent_item_refs);
6180 rec->extent_item_refs = tmpl->extent_item_refs;
6184 if (tmpl->content_checked)
6185 rec->content_checked = 1;
6186 if (tmpl->owner_ref_checked)
6187 rec->owner_ref_checked = 1;
6188 memcpy(&rec->parent_key, &tmpl->parent_key,
6189 sizeof(tmpl->parent_key));
6190 if (tmpl->parent_generation)
6191 rec->parent_generation = tmpl->parent_generation;
6192 if (rec->max_size < tmpl->max_size)
6193 rec->max_size = tmpl->max_size;
6196 * A metadata extent can't cross stripe_len boundary, otherwise
6197 * kernel scrub won't be able to handle it.
6198 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6202 rec->crossing_stripes = check_crossing_stripes(
6203 global_info, rec->start,
6204 global_info->tree_root->nodesize);
6205 check_extent_type(rec);
6206 maybe_free_extent_rec(extent_cache, rec);
6210 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6215 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6216 u64 parent, u64 root, int found_ref)
6218 struct extent_record *rec;
6219 struct tree_backref *back;
6220 struct cache_extent *cache;
6223 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6225 struct extent_record tmpl;
6227 memset(&tmpl, 0, sizeof(tmpl));
6228 tmpl.start = bytenr;
6233 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6237 /* really a bug in cache_extent implement now */
6238 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6243 rec = container_of(cache, struct extent_record, cache);
6244 if (rec->start != bytenr) {
6246 * Several cause, from unaligned bytenr to over lapping extents
6251 back = find_tree_backref(rec, parent, root);
6253 back = alloc_tree_backref(rec, parent, root);
6259 if (back->node.found_ref) {
6260 fprintf(stderr, "Extent back ref already exists "
6261 "for %llu parent %llu root %llu \n",
6262 (unsigned long long)bytenr,
6263 (unsigned long long)parent,
6264 (unsigned long long)root);
6266 back->node.found_ref = 1;
6268 if (back->node.found_extent_tree) {
6269 fprintf(stderr, "Extent back ref already exists "
6270 "for %llu parent %llu root %llu \n",
6271 (unsigned long long)bytenr,
6272 (unsigned long long)parent,
6273 (unsigned long long)root);
6275 back->node.found_extent_tree = 1;
6277 check_extent_type(rec);
6278 maybe_free_extent_rec(extent_cache, rec);
6282 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6283 u64 parent, u64 root, u64 owner, u64 offset,
6284 u32 num_refs, int found_ref, u64 max_size)
6286 struct extent_record *rec;
6287 struct data_backref *back;
6288 struct cache_extent *cache;
6291 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6293 struct extent_record tmpl;
6295 memset(&tmpl, 0, sizeof(tmpl));
6296 tmpl.start = bytenr;
6298 tmpl.max_size = max_size;
6300 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6304 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6309 rec = container_of(cache, struct extent_record, cache);
6310 if (rec->max_size < max_size)
6311 rec->max_size = max_size;
6314 * If found_ref is set then max_size is the real size and must match the
6315 * existing refs. So if we have already found a ref then we need to
6316 * make sure that this ref matches the existing one, otherwise we need
6317 * to add a new backref so we can notice that the backrefs don't match
6318 * and we need to figure out who is telling the truth. This is to
6319 * account for that awful fsync bug I introduced where we'd end up with
6320 * a btrfs_file_extent_item that would have its length include multiple
6321 * prealloc extents or point inside of a prealloc extent.
6323 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6326 back = alloc_data_backref(rec, parent, root, owner, offset,
6332 BUG_ON(num_refs != 1);
6333 if (back->node.found_ref)
6334 BUG_ON(back->bytes != max_size);
6335 back->node.found_ref = 1;
6336 back->found_ref += 1;
6337 back->bytes = max_size;
6338 back->disk_bytenr = bytenr;
6340 rec->content_checked = 1;
6341 rec->owner_ref_checked = 1;
6343 if (back->node.found_extent_tree) {
6344 fprintf(stderr, "Extent back ref already exists "
6345 "for %llu parent %llu root %llu "
6346 "owner %llu offset %llu num_refs %lu\n",
6347 (unsigned long long)bytenr,
6348 (unsigned long long)parent,
6349 (unsigned long long)root,
6350 (unsigned long long)owner,
6351 (unsigned long long)offset,
6352 (unsigned long)num_refs);
6354 back->num_refs = num_refs;
6355 back->node.found_extent_tree = 1;
6357 maybe_free_extent_rec(extent_cache, rec);
6361 static int add_pending(struct cache_tree *pending,
6362 struct cache_tree *seen, u64 bytenr, u32 size)
6365 ret = add_cache_extent(seen, bytenr, size);
6368 add_cache_extent(pending, bytenr, size);
6372 static int pick_next_pending(struct cache_tree *pending,
6373 struct cache_tree *reada,
6374 struct cache_tree *nodes,
6375 u64 last, struct block_info *bits, int bits_nr,
6378 unsigned long node_start = last;
6379 struct cache_extent *cache;
6382 cache = search_cache_extent(reada, 0);
6384 bits[0].start = cache->start;
6385 bits[0].size = cache->size;
6390 if (node_start > 32768)
6391 node_start -= 32768;
6393 cache = search_cache_extent(nodes, node_start);
6395 cache = search_cache_extent(nodes, 0);
6398 cache = search_cache_extent(pending, 0);
6403 bits[ret].start = cache->start;
6404 bits[ret].size = cache->size;
6405 cache = next_cache_extent(cache);
6407 } while (cache && ret < bits_nr);
6413 bits[ret].start = cache->start;
6414 bits[ret].size = cache->size;
6415 cache = next_cache_extent(cache);
6417 } while (cache && ret < bits_nr);
6419 if (bits_nr - ret > 8) {
6420 u64 lookup = bits[0].start + bits[0].size;
6421 struct cache_extent *next;
6422 next = search_cache_extent(pending, lookup);
6424 if (next->start - lookup > 32768)
6426 bits[ret].start = next->start;
6427 bits[ret].size = next->size;
6428 lookup = next->start + next->size;
6432 next = next_cache_extent(next);
6440 static void free_chunk_record(struct cache_extent *cache)
6442 struct chunk_record *rec;
6444 rec = container_of(cache, struct chunk_record, cache);
6445 list_del_init(&rec->list);
6446 list_del_init(&rec->dextents);
6450 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6452 cache_tree_free_extents(chunk_cache, free_chunk_record);
6455 static void free_device_record(struct rb_node *node)
6457 struct device_record *rec;
6459 rec = container_of(node, struct device_record, node);
6463 FREE_RB_BASED_TREE(device_cache, free_device_record);
6465 int insert_block_group_record(struct block_group_tree *tree,
6466 struct block_group_record *bg_rec)
6470 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6474 list_add_tail(&bg_rec->list, &tree->block_groups);
6478 static void free_block_group_record(struct cache_extent *cache)
6480 struct block_group_record *rec;
6482 rec = container_of(cache, struct block_group_record, cache);
6483 list_del_init(&rec->list);
6487 void free_block_group_tree(struct block_group_tree *tree)
6489 cache_tree_free_extents(&tree->tree, free_block_group_record);
6492 int insert_device_extent_record(struct device_extent_tree *tree,
6493 struct device_extent_record *de_rec)
6498 * Device extent is a bit different from the other extents, because
6499 * the extents which belong to the different devices may have the
6500 * same start and size, so we need use the special extent cache
6501 * search/insert functions.
6503 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6507 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6508 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6512 static void free_device_extent_record(struct cache_extent *cache)
6514 struct device_extent_record *rec;
6516 rec = container_of(cache, struct device_extent_record, cache);
6517 if (!list_empty(&rec->chunk_list))
6518 list_del_init(&rec->chunk_list);
6519 if (!list_empty(&rec->device_list))
6520 list_del_init(&rec->device_list);
6524 void free_device_extent_tree(struct device_extent_tree *tree)
6526 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6529 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6530 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6531 struct extent_buffer *leaf, int slot)
6533 struct btrfs_extent_ref_v0 *ref0;
6534 struct btrfs_key key;
6537 btrfs_item_key_to_cpu(leaf, &key, slot);
6538 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6539 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6540 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6543 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6544 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6550 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6551 struct btrfs_key *key,
6554 struct btrfs_chunk *ptr;
6555 struct chunk_record *rec;
6558 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6559 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6561 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6563 fprintf(stderr, "memory allocation failed\n");
6567 INIT_LIST_HEAD(&rec->list);
6568 INIT_LIST_HEAD(&rec->dextents);
6571 rec->cache.start = key->offset;
6572 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6574 rec->generation = btrfs_header_generation(leaf);
6576 rec->objectid = key->objectid;
6577 rec->type = key->type;
6578 rec->offset = key->offset;
6580 rec->length = rec->cache.size;
6581 rec->owner = btrfs_chunk_owner(leaf, ptr);
6582 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6583 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6584 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6585 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6586 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6587 rec->num_stripes = num_stripes;
6588 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6590 for (i = 0; i < rec->num_stripes; ++i) {
6591 rec->stripes[i].devid =
6592 btrfs_stripe_devid_nr(leaf, ptr, i);
6593 rec->stripes[i].offset =
6594 btrfs_stripe_offset_nr(leaf, ptr, i);
6595 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6596 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6603 static int process_chunk_item(struct cache_tree *chunk_cache,
6604 struct btrfs_key *key, struct extent_buffer *eb,
6607 struct chunk_record *rec;
6608 struct btrfs_chunk *chunk;
6611 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6613 * Do extra check for this chunk item,
6615 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6616 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6617 * and owner<->key_type check.
6619 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6622 error("chunk(%llu, %llu) is not valid, ignore it",
6623 key->offset, btrfs_chunk_length(eb, chunk));
6626 rec = btrfs_new_chunk_record(eb, key, slot);
6627 ret = insert_cache_extent(chunk_cache, &rec->cache);
6629 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6630 rec->offset, rec->length);
6637 static int process_device_item(struct rb_root *dev_cache,
6638 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6640 struct btrfs_dev_item *ptr;
6641 struct device_record *rec;
6644 ptr = btrfs_item_ptr(eb,
6645 slot, struct btrfs_dev_item);
6647 rec = malloc(sizeof(*rec));
6649 fprintf(stderr, "memory allocation failed\n");
6653 rec->devid = key->offset;
6654 rec->generation = btrfs_header_generation(eb);
6656 rec->objectid = key->objectid;
6657 rec->type = key->type;
6658 rec->offset = key->offset;
6660 rec->devid = btrfs_device_id(eb, ptr);
6661 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6662 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6664 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6666 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6673 struct block_group_record *
6674 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6677 struct btrfs_block_group_item *ptr;
6678 struct block_group_record *rec;
6680 rec = calloc(1, sizeof(*rec));
6682 fprintf(stderr, "memory allocation failed\n");
6686 rec->cache.start = key->objectid;
6687 rec->cache.size = key->offset;
6689 rec->generation = btrfs_header_generation(leaf);
6691 rec->objectid = key->objectid;
6692 rec->type = key->type;
6693 rec->offset = key->offset;
6695 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6696 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6698 INIT_LIST_HEAD(&rec->list);
6703 static int process_block_group_item(struct block_group_tree *block_group_cache,
6704 struct btrfs_key *key,
6705 struct extent_buffer *eb, int slot)
6707 struct block_group_record *rec;
6710 rec = btrfs_new_block_group_record(eb, key, slot);
6711 ret = insert_block_group_record(block_group_cache, rec);
6713 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6714 rec->objectid, rec->offset);
6721 struct device_extent_record *
6722 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6723 struct btrfs_key *key, int slot)
6725 struct device_extent_record *rec;
6726 struct btrfs_dev_extent *ptr;
6728 rec = calloc(1, sizeof(*rec));
6730 fprintf(stderr, "memory allocation failed\n");
6734 rec->cache.objectid = key->objectid;
6735 rec->cache.start = key->offset;
6737 rec->generation = btrfs_header_generation(leaf);
6739 rec->objectid = key->objectid;
6740 rec->type = key->type;
6741 rec->offset = key->offset;
6743 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6744 rec->chunk_objecteid =
6745 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6747 btrfs_dev_extent_chunk_offset(leaf, ptr);
6748 rec->length = btrfs_dev_extent_length(leaf, ptr);
6749 rec->cache.size = rec->length;
6751 INIT_LIST_HEAD(&rec->chunk_list);
6752 INIT_LIST_HEAD(&rec->device_list);
6758 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6759 struct btrfs_key *key, struct extent_buffer *eb,
6762 struct device_extent_record *rec;
6765 rec = btrfs_new_device_extent_record(eb, key, slot);
6766 ret = insert_device_extent_record(dev_extent_cache, rec);
6769 "Device extent[%llu, %llu, %llu] existed.\n",
6770 rec->objectid, rec->offset, rec->length);
6777 static int process_extent_item(struct btrfs_root *root,
6778 struct cache_tree *extent_cache,
6779 struct extent_buffer *eb, int slot)
6781 struct btrfs_extent_item *ei;
6782 struct btrfs_extent_inline_ref *iref;
6783 struct btrfs_extent_data_ref *dref;
6784 struct btrfs_shared_data_ref *sref;
6785 struct btrfs_key key;
6786 struct extent_record tmpl;
6791 u32 item_size = btrfs_item_size_nr(eb, slot);
6797 btrfs_item_key_to_cpu(eb, &key, slot);
6799 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6801 num_bytes = root->nodesize;
6803 num_bytes = key.offset;
6806 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6807 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6808 key.objectid, root->sectorsize);
6811 if (item_size < sizeof(*ei)) {
6812 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6813 struct btrfs_extent_item_v0 *ei0;
6814 BUG_ON(item_size != sizeof(*ei0));
6815 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6816 refs = btrfs_extent_refs_v0(eb, ei0);
6820 memset(&tmpl, 0, sizeof(tmpl));
6821 tmpl.start = key.objectid;
6822 tmpl.nr = num_bytes;
6823 tmpl.extent_item_refs = refs;
6824 tmpl.metadata = metadata;
6826 tmpl.max_size = num_bytes;
6828 return add_extent_rec(extent_cache, &tmpl);
6831 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6832 refs = btrfs_extent_refs(eb, ei);
6833 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6837 if (metadata && num_bytes != root->nodesize) {
6838 error("ignore invalid metadata extent, length %llu does not equal to %u",
6839 num_bytes, root->nodesize);
6842 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6843 error("ignore invalid data extent, length %llu is not aligned to %u",
6844 num_bytes, root->sectorsize);
6848 memset(&tmpl, 0, sizeof(tmpl));
6849 tmpl.start = key.objectid;
6850 tmpl.nr = num_bytes;
6851 tmpl.extent_item_refs = refs;
6852 tmpl.metadata = metadata;
6854 tmpl.max_size = num_bytes;
6855 add_extent_rec(extent_cache, &tmpl);
6857 ptr = (unsigned long)(ei + 1);
6858 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6859 key.type == BTRFS_EXTENT_ITEM_KEY)
6860 ptr += sizeof(struct btrfs_tree_block_info);
6862 end = (unsigned long)ei + item_size;
6864 iref = (struct btrfs_extent_inline_ref *)ptr;
6865 type = btrfs_extent_inline_ref_type(eb, iref);
6866 offset = btrfs_extent_inline_ref_offset(eb, iref);
6868 case BTRFS_TREE_BLOCK_REF_KEY:
6869 ret = add_tree_backref(extent_cache, key.objectid,
6873 "add_tree_backref failed (extent items tree block): %s",
6876 case BTRFS_SHARED_BLOCK_REF_KEY:
6877 ret = add_tree_backref(extent_cache, key.objectid,
6881 "add_tree_backref failed (extent items shared block): %s",
6884 case BTRFS_EXTENT_DATA_REF_KEY:
6885 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6886 add_data_backref(extent_cache, key.objectid, 0,
6887 btrfs_extent_data_ref_root(eb, dref),
6888 btrfs_extent_data_ref_objectid(eb,
6890 btrfs_extent_data_ref_offset(eb, dref),
6891 btrfs_extent_data_ref_count(eb, dref),
6894 case BTRFS_SHARED_DATA_REF_KEY:
6895 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6896 add_data_backref(extent_cache, key.objectid, offset,
6898 btrfs_shared_data_ref_count(eb, sref),
6902 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6903 key.objectid, key.type, num_bytes);
6906 ptr += btrfs_extent_inline_ref_size(type);
6913 static int check_cache_range(struct btrfs_root *root,
6914 struct btrfs_block_group_cache *cache,
6915 u64 offset, u64 bytes)
6917 struct btrfs_free_space *entry;
6923 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6924 bytenr = btrfs_sb_offset(i);
6925 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6926 cache->key.objectid, bytenr, 0,
6927 &logical, &nr, &stripe_len);
6932 if (logical[nr] + stripe_len <= offset)
6934 if (offset + bytes <= logical[nr])
6936 if (logical[nr] == offset) {
6937 if (stripe_len >= bytes) {
6941 bytes -= stripe_len;
6942 offset += stripe_len;
6943 } else if (logical[nr] < offset) {
6944 if (logical[nr] + stripe_len >=
6949 bytes = (offset + bytes) -
6950 (logical[nr] + stripe_len);
6951 offset = logical[nr] + stripe_len;
6954 * Could be tricky, the super may land in the
6955 * middle of the area we're checking. First
6956 * check the easiest case, it's at the end.
6958 if (logical[nr] + stripe_len >=
6960 bytes = logical[nr] - offset;
6964 /* Check the left side */
6965 ret = check_cache_range(root, cache,
6967 logical[nr] - offset);
6973 /* Now we continue with the right side */
6974 bytes = (offset + bytes) -
6975 (logical[nr] + stripe_len);
6976 offset = logical[nr] + stripe_len;
6983 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6985 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6986 offset, offset+bytes);
6990 if (entry->offset != offset) {
6991 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6996 if (entry->bytes != bytes) {
6997 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6998 bytes, entry->bytes, offset);
7002 unlink_free_space(cache->free_space_ctl, entry);
7007 static int verify_space_cache(struct btrfs_root *root,
7008 struct btrfs_block_group_cache *cache)
7010 struct btrfs_path path;
7011 struct extent_buffer *leaf;
7012 struct btrfs_key key;
7016 root = root->fs_info->extent_root;
7018 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7020 btrfs_init_path(&path);
7021 key.objectid = last;
7023 key.type = BTRFS_EXTENT_ITEM_KEY;
7024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7029 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7030 ret = btrfs_next_leaf(root, &path);
7038 leaf = path.nodes[0];
7039 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7040 if (key.objectid >= cache->key.offset + cache->key.objectid)
7042 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7043 key.type != BTRFS_METADATA_ITEM_KEY) {
7048 if (last == key.objectid) {
7049 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7050 last = key.objectid + key.offset;
7052 last = key.objectid + root->nodesize;
7057 ret = check_cache_range(root, cache, last,
7058 key.objectid - last);
7061 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7062 last = key.objectid + key.offset;
7064 last = key.objectid + root->nodesize;
7068 if (last < cache->key.objectid + cache->key.offset)
7069 ret = check_cache_range(root, cache, last,
7070 cache->key.objectid +
7071 cache->key.offset - last);
7074 btrfs_release_path(&path);
7077 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7078 fprintf(stderr, "There are still entries left in the space "
7086 static int check_space_cache(struct btrfs_root *root)
7088 struct btrfs_block_group_cache *cache;
7089 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7093 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7094 btrfs_super_generation(root->fs_info->super_copy) !=
7095 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7096 printf("cache and super generation don't match, space cache "
7097 "will be invalidated\n");
7101 if (ctx.progress_enabled) {
7102 ctx.tp = TASK_FREE_SPACE;
7103 task_start(ctx.info);
7107 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7111 start = cache->key.objectid + cache->key.offset;
7112 if (!cache->free_space_ctl) {
7113 if (btrfs_init_free_space_ctl(cache,
7114 root->sectorsize)) {
7119 btrfs_remove_free_space_cache(cache);
7122 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7123 ret = exclude_super_stripes(root, cache);
7125 fprintf(stderr, "could not exclude super stripes: %s\n",
7130 ret = load_free_space_tree(root->fs_info, cache);
7131 free_excluded_extents(root, cache);
7133 fprintf(stderr, "could not load free space tree: %s\n",
7140 ret = load_free_space_cache(root->fs_info, cache);
7145 ret = verify_space_cache(root, cache);
7147 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7148 cache->key.objectid);
7153 task_stop(ctx.info);
7155 return error ? -EINVAL : 0;
7158 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7159 u64 num_bytes, unsigned long leaf_offset,
7160 struct extent_buffer *eb) {
7163 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7165 unsigned long csum_offset;
7169 u64 data_checked = 0;
7175 if (num_bytes % root->sectorsize)
7178 data = malloc(num_bytes);
7182 while (offset < num_bytes) {
7185 read_len = num_bytes - offset;
7186 /* read as much space once a time */
7187 ret = read_extent_data(root, data + offset,
7188 bytenr + offset, &read_len, mirror);
7192 /* verify every 4k data's checksum */
7193 while (data_checked < read_len) {
7195 tmp = offset + data_checked;
7197 csum = btrfs_csum_data((char *)data + tmp,
7198 csum, root->sectorsize);
7199 btrfs_csum_final(csum, (u8 *)&csum);
7201 csum_offset = leaf_offset +
7202 tmp / root->sectorsize * csum_size;
7203 read_extent_buffer(eb, (char *)&csum_expected,
7204 csum_offset, csum_size);
7205 /* try another mirror */
7206 if (csum != csum_expected) {
7207 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7208 mirror, bytenr + tmp,
7209 csum, csum_expected);
7210 num_copies = btrfs_num_copies(
7211 &root->fs_info->mapping_tree,
7213 if (mirror < num_copies - 1) {
7218 data_checked += root->sectorsize;
7227 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7230 struct btrfs_path path;
7231 struct extent_buffer *leaf;
7232 struct btrfs_key key;
7235 btrfs_init_path(&path);
7236 key.objectid = bytenr;
7237 key.type = BTRFS_EXTENT_ITEM_KEY;
7238 key.offset = (u64)-1;
7241 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7244 fprintf(stderr, "Error looking up extent record %d\n", ret);
7245 btrfs_release_path(&path);
7248 if (path.slots[0] > 0) {
7251 ret = btrfs_prev_leaf(root, &path);
7254 } else if (ret > 0) {
7261 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7264 * Block group items come before extent items if they have the same
7265 * bytenr, so walk back one more just in case. Dear future traveller,
7266 * first congrats on mastering time travel. Now if it's not too much
7267 * trouble could you go back to 2006 and tell Chris to make the
7268 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7269 * EXTENT_ITEM_KEY please?
7271 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7272 if (path.slots[0] > 0) {
7275 ret = btrfs_prev_leaf(root, &path);
7278 } else if (ret > 0) {
7283 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7287 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7288 ret = btrfs_next_leaf(root, &path);
7290 fprintf(stderr, "Error going to next leaf "
7292 btrfs_release_path(&path);
7298 leaf = path.nodes[0];
7299 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7300 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7304 if (key.objectid + key.offset < bytenr) {
7308 if (key.objectid > bytenr + num_bytes)
7311 if (key.objectid == bytenr) {
7312 if (key.offset >= num_bytes) {
7316 num_bytes -= key.offset;
7317 bytenr += key.offset;
7318 } else if (key.objectid < bytenr) {
7319 if (key.objectid + key.offset >= bytenr + num_bytes) {
7323 num_bytes = (bytenr + num_bytes) -
7324 (key.objectid + key.offset);
7325 bytenr = key.objectid + key.offset;
7327 if (key.objectid + key.offset < bytenr + num_bytes) {
7328 u64 new_start = key.objectid + key.offset;
7329 u64 new_bytes = bytenr + num_bytes - new_start;
7332 * Weird case, the extent is in the middle of
7333 * our range, we'll have to search one side
7334 * and then the other. Not sure if this happens
7335 * in real life, but no harm in coding it up
7336 * anyway just in case.
7338 btrfs_release_path(&path);
7339 ret = check_extent_exists(root, new_start,
7342 fprintf(stderr, "Right section didn't "
7346 num_bytes = key.objectid - bytenr;
7349 num_bytes = key.objectid - bytenr;
7356 if (num_bytes && !ret) {
7357 fprintf(stderr, "There are no extents for csum range "
7358 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7362 btrfs_release_path(&path);
7366 static int check_csums(struct btrfs_root *root)
7368 struct btrfs_path path;
7369 struct extent_buffer *leaf;
7370 struct btrfs_key key;
7371 u64 offset = 0, num_bytes = 0;
7372 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7376 unsigned long leaf_offset;
7378 root = root->fs_info->csum_root;
7379 if (!extent_buffer_uptodate(root->node)) {
7380 fprintf(stderr, "No valid csum tree found\n");
7384 btrfs_init_path(&path);
7385 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7386 key.type = BTRFS_EXTENT_CSUM_KEY;
7388 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7390 fprintf(stderr, "Error searching csum tree %d\n", ret);
7391 btrfs_release_path(&path);
7395 if (ret > 0 && path.slots[0])
7400 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7401 ret = btrfs_next_leaf(root, &path);
7403 fprintf(stderr, "Error going to next leaf "
7410 leaf = path.nodes[0];
7412 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7413 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7418 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7419 csum_size) * root->sectorsize;
7420 if (!check_data_csum)
7421 goto skip_csum_check;
7422 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7423 ret = check_extent_csums(root, key.offset, data_len,
7429 offset = key.offset;
7430 } else if (key.offset != offset + num_bytes) {
7431 ret = check_extent_exists(root, offset, num_bytes);
7433 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7434 "there is no extent record\n",
7435 offset, offset+num_bytes);
7438 offset = key.offset;
7441 num_bytes += data_len;
7445 btrfs_release_path(&path);
7449 static int is_dropped_key(struct btrfs_key *key,
7450 struct btrfs_key *drop_key) {
7451 if (key->objectid < drop_key->objectid)
7453 else if (key->objectid == drop_key->objectid) {
7454 if (key->type < drop_key->type)
7456 else if (key->type == drop_key->type) {
7457 if (key->offset < drop_key->offset)
7465 * Here are the rules for FULL_BACKREF.
7467 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7468 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7470 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7471 * if it happened after the relocation occurred since we'll have dropped the
7472 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7473 * have no real way to know for sure.
7475 * We process the blocks one root at a time, and we start from the lowest root
7476 * objectid and go to the highest. So we can just lookup the owner backref for
7477 * the record and if we don't find it then we know it doesn't exist and we have
7480 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7481 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7482 * be set or not and then we can check later once we've gathered all the refs.
7484 static int calc_extent_flag(struct cache_tree *extent_cache,
7485 struct extent_buffer *buf,
7486 struct root_item_record *ri,
7489 struct extent_record *rec;
7490 struct cache_extent *cache;
7491 struct tree_backref *tback;
7494 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7495 /* we have added this extent before */
7499 rec = container_of(cache, struct extent_record, cache);
7502 * Except file/reloc tree, we can not have
7505 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7510 if (buf->start == ri->bytenr)
7513 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7516 owner = btrfs_header_owner(buf);
7517 if (owner == ri->objectid)
7520 tback = find_tree_backref(rec, 0, owner);
7525 if (rec->flag_block_full_backref != FLAG_UNSET &&
7526 rec->flag_block_full_backref != 0)
7527 rec->bad_full_backref = 1;
7530 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7531 if (rec->flag_block_full_backref != FLAG_UNSET &&
7532 rec->flag_block_full_backref != 1)
7533 rec->bad_full_backref = 1;
7537 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7539 fprintf(stderr, "Invalid key type(");
7540 print_key_type(stderr, 0, key_type);
7541 fprintf(stderr, ") found in root(");
7542 print_objectid(stderr, rootid, 0);
7543 fprintf(stderr, ")\n");
7547 * Check if the key is valid with its extent buffer.
7549 * This is a early check in case invalid key exists in a extent buffer
7550 * This is not comprehensive yet, but should prevent wrong key/item passed
7553 static int check_type_with_root(u64 rootid, u8 key_type)
7556 /* Only valid in chunk tree */
7557 case BTRFS_DEV_ITEM_KEY:
7558 case BTRFS_CHUNK_ITEM_KEY:
7559 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7562 /* valid in csum and log tree */
7563 case BTRFS_CSUM_TREE_OBJECTID:
7564 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7568 case BTRFS_EXTENT_ITEM_KEY:
7569 case BTRFS_METADATA_ITEM_KEY:
7570 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7571 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7574 case BTRFS_ROOT_ITEM_KEY:
7575 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7578 case BTRFS_DEV_EXTENT_KEY:
7579 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7585 report_mismatch_key_root(key_type, rootid);
7589 static int run_next_block(struct btrfs_root *root,
7590 struct block_info *bits,
7593 struct cache_tree *pending,
7594 struct cache_tree *seen,
7595 struct cache_tree *reada,
7596 struct cache_tree *nodes,
7597 struct cache_tree *extent_cache,
7598 struct cache_tree *chunk_cache,
7599 struct rb_root *dev_cache,
7600 struct block_group_tree *block_group_cache,
7601 struct device_extent_tree *dev_extent_cache,
7602 struct root_item_record *ri)
7604 struct extent_buffer *buf;
7605 struct extent_record *rec = NULL;
7616 struct btrfs_key key;
7617 struct cache_extent *cache;
7620 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7621 bits_nr, &reada_bits);
7626 for(i = 0; i < nritems; i++) {
7627 ret = add_cache_extent(reada, bits[i].start,
7632 /* fixme, get the parent transid */
7633 readahead_tree_block(root, bits[i].start,
7637 *last = bits[0].start;
7638 bytenr = bits[0].start;
7639 size = bits[0].size;
7641 cache = lookup_cache_extent(pending, bytenr, size);
7643 remove_cache_extent(pending, cache);
7646 cache = lookup_cache_extent(reada, bytenr, size);
7648 remove_cache_extent(reada, cache);
7651 cache = lookup_cache_extent(nodes, bytenr, size);
7653 remove_cache_extent(nodes, cache);
7656 cache = lookup_cache_extent(extent_cache, bytenr, size);
7658 rec = container_of(cache, struct extent_record, cache);
7659 gen = rec->parent_generation;
7662 /* fixme, get the real parent transid */
7663 buf = read_tree_block(root, bytenr, size, gen);
7664 if (!extent_buffer_uptodate(buf)) {
7665 record_bad_block_io(root->fs_info,
7666 extent_cache, bytenr, size);
7670 nritems = btrfs_header_nritems(buf);
7673 if (!init_extent_tree) {
7674 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7675 btrfs_header_level(buf), 1, NULL,
7678 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7680 fprintf(stderr, "Couldn't calc extent flags\n");
7681 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7686 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7688 fprintf(stderr, "Couldn't calc extent flags\n");
7689 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7693 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7695 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7696 ri->objectid == btrfs_header_owner(buf)) {
7698 * Ok we got to this block from it's original owner and
7699 * we have FULL_BACKREF set. Relocation can leave
7700 * converted blocks over so this is altogether possible,
7701 * however it's not possible if the generation > the
7702 * last snapshot, so check for this case.
7704 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7705 btrfs_header_generation(buf) > ri->last_snapshot) {
7706 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7707 rec->bad_full_backref = 1;
7712 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7713 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7714 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7715 rec->bad_full_backref = 1;
7719 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7720 rec->flag_block_full_backref = 1;
7724 rec->flag_block_full_backref = 0;
7726 owner = btrfs_header_owner(buf);
7729 ret = check_block(root, extent_cache, buf, flags);
7733 if (btrfs_is_leaf(buf)) {
7734 btree_space_waste += btrfs_leaf_free_space(root, buf);
7735 for (i = 0; i < nritems; i++) {
7736 struct btrfs_file_extent_item *fi;
7737 btrfs_item_key_to_cpu(buf, &key, i);
7739 * Check key type against the leaf owner.
7740 * Could filter quite a lot of early error if
7743 if (check_type_with_root(btrfs_header_owner(buf),
7745 fprintf(stderr, "ignoring invalid key\n");
7748 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7749 process_extent_item(root, extent_cache, buf,
7753 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7754 process_extent_item(root, extent_cache, buf,
7758 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7760 btrfs_item_size_nr(buf, i);
7763 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7764 process_chunk_item(chunk_cache, &key, buf, i);
7767 if (key.type == BTRFS_DEV_ITEM_KEY) {
7768 process_device_item(dev_cache, &key, buf, i);
7771 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7772 process_block_group_item(block_group_cache,
7776 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7777 process_device_extent_item(dev_extent_cache,
7782 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7783 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7784 process_extent_ref_v0(extent_cache, buf, i);
7791 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7792 ret = add_tree_backref(extent_cache,
7793 key.objectid, 0, key.offset, 0);
7796 "add_tree_backref failed (leaf tree block): %s",
7800 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7801 ret = add_tree_backref(extent_cache,
7802 key.objectid, key.offset, 0, 0);
7805 "add_tree_backref failed (leaf shared block): %s",
7809 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7810 struct btrfs_extent_data_ref *ref;
7811 ref = btrfs_item_ptr(buf, i,
7812 struct btrfs_extent_data_ref);
7813 add_data_backref(extent_cache,
7815 btrfs_extent_data_ref_root(buf, ref),
7816 btrfs_extent_data_ref_objectid(buf,
7818 btrfs_extent_data_ref_offset(buf, ref),
7819 btrfs_extent_data_ref_count(buf, ref),
7820 0, root->sectorsize);
7823 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7824 struct btrfs_shared_data_ref *ref;
7825 ref = btrfs_item_ptr(buf, i,
7826 struct btrfs_shared_data_ref);
7827 add_data_backref(extent_cache,
7828 key.objectid, key.offset, 0, 0, 0,
7829 btrfs_shared_data_ref_count(buf, ref),
7830 0, root->sectorsize);
7833 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7834 struct bad_item *bad;
7836 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7840 bad = malloc(sizeof(struct bad_item));
7843 INIT_LIST_HEAD(&bad->list);
7844 memcpy(&bad->key, &key,
7845 sizeof(struct btrfs_key));
7846 bad->root_id = owner;
7847 list_add_tail(&bad->list, &delete_items);
7850 if (key.type != BTRFS_EXTENT_DATA_KEY)
7852 fi = btrfs_item_ptr(buf, i,
7853 struct btrfs_file_extent_item);
7854 if (btrfs_file_extent_type(buf, fi) ==
7855 BTRFS_FILE_EXTENT_INLINE)
7857 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7860 data_bytes_allocated +=
7861 btrfs_file_extent_disk_num_bytes(buf, fi);
7862 if (data_bytes_allocated < root->sectorsize) {
7865 data_bytes_referenced +=
7866 btrfs_file_extent_num_bytes(buf, fi);
7867 add_data_backref(extent_cache,
7868 btrfs_file_extent_disk_bytenr(buf, fi),
7869 parent, owner, key.objectid, key.offset -
7870 btrfs_file_extent_offset(buf, fi), 1, 1,
7871 btrfs_file_extent_disk_num_bytes(buf, fi));
7875 struct btrfs_key first_key;
7877 first_key.objectid = 0;
7880 btrfs_item_key_to_cpu(buf, &first_key, 0);
7881 level = btrfs_header_level(buf);
7882 for (i = 0; i < nritems; i++) {
7883 struct extent_record tmpl;
7885 ptr = btrfs_node_blockptr(buf, i);
7886 size = root->nodesize;
7887 btrfs_node_key_to_cpu(buf, &key, i);
7889 if ((level == ri->drop_level)
7890 && is_dropped_key(&key, &ri->drop_key)) {
7895 memset(&tmpl, 0, sizeof(tmpl));
7896 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7897 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7902 tmpl.max_size = size;
7903 ret = add_extent_rec(extent_cache, &tmpl);
7907 ret = add_tree_backref(extent_cache, ptr, parent,
7911 "add_tree_backref failed (non-leaf block): %s",
7917 add_pending(nodes, seen, ptr, size);
7919 add_pending(pending, seen, ptr, size);
7922 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7923 nritems) * sizeof(struct btrfs_key_ptr);
7925 total_btree_bytes += buf->len;
7926 if (fs_root_objectid(btrfs_header_owner(buf)))
7927 total_fs_tree_bytes += buf->len;
7928 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7929 total_extent_tree_bytes += buf->len;
7930 if (!found_old_backref &&
7931 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7932 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7933 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7934 found_old_backref = 1;
7936 free_extent_buffer(buf);
7940 static int add_root_to_pending(struct extent_buffer *buf,
7941 struct cache_tree *extent_cache,
7942 struct cache_tree *pending,
7943 struct cache_tree *seen,
7944 struct cache_tree *nodes,
7947 struct extent_record tmpl;
7950 if (btrfs_header_level(buf) > 0)
7951 add_pending(nodes, seen, buf->start, buf->len);
7953 add_pending(pending, seen, buf->start, buf->len);
7955 memset(&tmpl, 0, sizeof(tmpl));
7956 tmpl.start = buf->start;
7961 tmpl.max_size = buf->len;
7962 add_extent_rec(extent_cache, &tmpl);
7964 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7965 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7966 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7969 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7974 /* as we fix the tree, we might be deleting blocks that
7975 * we're tracking for repair. This hook makes sure we
7976 * remove any backrefs for blocks as we are fixing them.
7978 static int free_extent_hook(struct btrfs_trans_handle *trans,
7979 struct btrfs_root *root,
7980 u64 bytenr, u64 num_bytes, u64 parent,
7981 u64 root_objectid, u64 owner, u64 offset,
7984 struct extent_record *rec;
7985 struct cache_extent *cache;
7987 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7989 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7990 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7994 rec = container_of(cache, struct extent_record, cache);
7996 struct data_backref *back;
7997 back = find_data_backref(rec, parent, root_objectid, owner,
7998 offset, 1, bytenr, num_bytes);
8001 if (back->node.found_ref) {
8002 back->found_ref -= refs_to_drop;
8004 rec->refs -= refs_to_drop;
8006 if (back->node.found_extent_tree) {
8007 back->num_refs -= refs_to_drop;
8008 if (rec->extent_item_refs)
8009 rec->extent_item_refs -= refs_to_drop;
8011 if (back->found_ref == 0)
8012 back->node.found_ref = 0;
8013 if (back->num_refs == 0)
8014 back->node.found_extent_tree = 0;
8016 if (!back->node.found_extent_tree && back->node.found_ref) {
8017 list_del(&back->node.list);
8021 struct tree_backref *back;
8022 back = find_tree_backref(rec, parent, root_objectid);
8025 if (back->node.found_ref) {
8028 back->node.found_ref = 0;
8030 if (back->node.found_extent_tree) {
8031 if (rec->extent_item_refs)
8032 rec->extent_item_refs--;
8033 back->node.found_extent_tree = 0;
8035 if (!back->node.found_extent_tree && back->node.found_ref) {
8036 list_del(&back->node.list);
8040 maybe_free_extent_rec(extent_cache, rec);
8045 static int delete_extent_records(struct btrfs_trans_handle *trans,
8046 struct btrfs_root *root,
8047 struct btrfs_path *path,
8050 struct btrfs_key key;
8051 struct btrfs_key found_key;
8052 struct extent_buffer *leaf;
8057 key.objectid = bytenr;
8059 key.offset = (u64)-1;
8062 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8069 if (path->slots[0] == 0)
8075 leaf = path->nodes[0];
8076 slot = path->slots[0];
8078 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8079 if (found_key.objectid != bytenr)
8082 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8083 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8084 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8085 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8086 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8087 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8088 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8089 btrfs_release_path(path);
8090 if (found_key.type == 0) {
8091 if (found_key.offset == 0)
8093 key.offset = found_key.offset - 1;
8094 key.type = found_key.type;
8096 key.type = found_key.type - 1;
8097 key.offset = (u64)-1;
8101 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8102 found_key.objectid, found_key.type, found_key.offset);
8104 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8107 btrfs_release_path(path);
8109 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8110 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8111 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8112 found_key.offset : root->nodesize;
8114 ret = btrfs_update_block_group(trans, root, bytenr,
8121 btrfs_release_path(path);
8126 * for a single backref, this will allocate a new extent
8127 * and add the backref to it.
8129 static int record_extent(struct btrfs_trans_handle *trans,
8130 struct btrfs_fs_info *info,
8131 struct btrfs_path *path,
8132 struct extent_record *rec,
8133 struct extent_backref *back,
8134 int allocated, u64 flags)
8137 struct btrfs_root *extent_root = info->extent_root;
8138 struct extent_buffer *leaf;
8139 struct btrfs_key ins_key;
8140 struct btrfs_extent_item *ei;
8141 struct data_backref *dback;
8142 struct btrfs_tree_block_info *bi;
8145 rec->max_size = max_t(u64, rec->max_size,
8146 info->extent_root->nodesize);
8149 u32 item_size = sizeof(*ei);
8152 item_size += sizeof(*bi);
8154 ins_key.objectid = rec->start;
8155 ins_key.offset = rec->max_size;
8156 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8158 ret = btrfs_insert_empty_item(trans, extent_root, path,
8159 &ins_key, item_size);
8163 leaf = path->nodes[0];
8164 ei = btrfs_item_ptr(leaf, path->slots[0],
8165 struct btrfs_extent_item);
8167 btrfs_set_extent_refs(leaf, ei, 0);
8168 btrfs_set_extent_generation(leaf, ei, rec->generation);
8170 if (back->is_data) {
8171 btrfs_set_extent_flags(leaf, ei,
8172 BTRFS_EXTENT_FLAG_DATA);
8174 struct btrfs_disk_key copy_key;;
8176 bi = (struct btrfs_tree_block_info *)(ei + 1);
8177 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8180 btrfs_set_disk_key_objectid(©_key,
8181 rec->info_objectid);
8182 btrfs_set_disk_key_type(©_key, 0);
8183 btrfs_set_disk_key_offset(©_key, 0);
8185 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8186 btrfs_set_tree_block_key(leaf, bi, ©_key);
8188 btrfs_set_extent_flags(leaf, ei,
8189 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8192 btrfs_mark_buffer_dirty(leaf);
8193 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8194 rec->max_size, 1, 0);
8197 btrfs_release_path(path);
8200 if (back->is_data) {
8204 dback = to_data_backref(back);
8205 if (back->full_backref)
8206 parent = dback->parent;
8210 for (i = 0; i < dback->found_ref; i++) {
8211 /* if parent != 0, we're doing a full backref
8212 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8213 * just makes the backref allocator create a data
8216 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8217 rec->start, rec->max_size,
8221 BTRFS_FIRST_FREE_OBJECTID :
8227 fprintf(stderr, "adding new data backref"
8228 " on %llu %s %llu owner %llu"
8229 " offset %llu found %d\n",
8230 (unsigned long long)rec->start,
8231 back->full_backref ?
8233 back->full_backref ?
8234 (unsigned long long)parent :
8235 (unsigned long long)dback->root,
8236 (unsigned long long)dback->owner,
8237 (unsigned long long)dback->offset,
8241 struct tree_backref *tback;
8243 tback = to_tree_backref(back);
8244 if (back->full_backref)
8245 parent = tback->parent;
8249 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8250 rec->start, rec->max_size,
8251 parent, tback->root, 0, 0);
8252 fprintf(stderr, "adding new tree backref on "
8253 "start %llu len %llu parent %llu root %llu\n",
8254 rec->start, rec->max_size, parent, tback->root);
8257 btrfs_release_path(path);
8261 static struct extent_entry *find_entry(struct list_head *entries,
8262 u64 bytenr, u64 bytes)
8264 struct extent_entry *entry = NULL;
8266 list_for_each_entry(entry, entries, list) {
8267 if (entry->bytenr == bytenr && entry->bytes == bytes)
8274 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8276 struct extent_entry *entry, *best = NULL, *prev = NULL;
8278 list_for_each_entry(entry, entries, list) {
8280 * If there are as many broken entries as entries then we know
8281 * not to trust this particular entry.
8283 if (entry->broken == entry->count)
8287 * Special case, when there are only two entries and 'best' is
8297 * If our current entry == best then we can't be sure our best
8298 * is really the best, so we need to keep searching.
8300 if (best && best->count == entry->count) {
8306 /* Prev == entry, not good enough, have to keep searching */
8307 if (!prev->broken && prev->count == entry->count)
8311 best = (prev->count > entry->count) ? prev : entry;
8312 else if (best->count < entry->count)
8320 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8321 struct data_backref *dback, struct extent_entry *entry)
8323 struct btrfs_trans_handle *trans;
8324 struct btrfs_root *root;
8325 struct btrfs_file_extent_item *fi;
8326 struct extent_buffer *leaf;
8327 struct btrfs_key key;
8331 key.objectid = dback->root;
8332 key.type = BTRFS_ROOT_ITEM_KEY;
8333 key.offset = (u64)-1;
8334 root = btrfs_read_fs_root(info, &key);
8336 fprintf(stderr, "Couldn't find root for our ref\n");
8341 * The backref points to the original offset of the extent if it was
8342 * split, so we need to search down to the offset we have and then walk
8343 * forward until we find the backref we're looking for.
8345 key.objectid = dback->owner;
8346 key.type = BTRFS_EXTENT_DATA_KEY;
8347 key.offset = dback->offset;
8348 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8350 fprintf(stderr, "Error looking up ref %d\n", ret);
8355 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8356 ret = btrfs_next_leaf(root, path);
8358 fprintf(stderr, "Couldn't find our ref, next\n");
8362 leaf = path->nodes[0];
8363 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8364 if (key.objectid != dback->owner ||
8365 key.type != BTRFS_EXTENT_DATA_KEY) {
8366 fprintf(stderr, "Couldn't find our ref, search\n");
8369 fi = btrfs_item_ptr(leaf, path->slots[0],
8370 struct btrfs_file_extent_item);
8371 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8372 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8374 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8379 btrfs_release_path(path);
8381 trans = btrfs_start_transaction(root, 1);
8383 return PTR_ERR(trans);
8386 * Ok we have the key of the file extent we want to fix, now we can cow
8387 * down to the thing and fix it.
8389 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8391 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8392 key.objectid, key.type, key.offset, ret);
8396 fprintf(stderr, "Well that's odd, we just found this key "
8397 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8402 leaf = path->nodes[0];
8403 fi = btrfs_item_ptr(leaf, path->slots[0],
8404 struct btrfs_file_extent_item);
8406 if (btrfs_file_extent_compression(leaf, fi) &&
8407 dback->disk_bytenr != entry->bytenr) {
8408 fprintf(stderr, "Ref doesn't match the record start and is "
8409 "compressed, please take a btrfs-image of this file "
8410 "system and send it to a btrfs developer so they can "
8411 "complete this functionality for bytenr %Lu\n",
8412 dback->disk_bytenr);
8417 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8418 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8419 } else if (dback->disk_bytenr > entry->bytenr) {
8420 u64 off_diff, offset;
8422 off_diff = dback->disk_bytenr - entry->bytenr;
8423 offset = btrfs_file_extent_offset(leaf, fi);
8424 if (dback->disk_bytenr + offset +
8425 btrfs_file_extent_num_bytes(leaf, fi) >
8426 entry->bytenr + entry->bytes) {
8427 fprintf(stderr, "Ref is past the entry end, please "
8428 "take a btrfs-image of this file system and "
8429 "send it to a btrfs developer, ref %Lu\n",
8430 dback->disk_bytenr);
8435 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8436 btrfs_set_file_extent_offset(leaf, fi, offset);
8437 } else if (dback->disk_bytenr < entry->bytenr) {
8440 offset = btrfs_file_extent_offset(leaf, fi);
8441 if (dback->disk_bytenr + offset < entry->bytenr) {
8442 fprintf(stderr, "Ref is before the entry start, please"
8443 " take a btrfs-image of this file system and "
8444 "send it to a btrfs developer, ref %Lu\n",
8445 dback->disk_bytenr);
8450 offset += dback->disk_bytenr;
8451 offset -= entry->bytenr;
8452 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8453 btrfs_set_file_extent_offset(leaf, fi, offset);
8456 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8459 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8460 * only do this if we aren't using compression, otherwise it's a
8463 if (!btrfs_file_extent_compression(leaf, fi))
8464 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8466 printf("ram bytes may be wrong?\n");
8467 btrfs_mark_buffer_dirty(leaf);
8469 err = btrfs_commit_transaction(trans, root);
8470 btrfs_release_path(path);
8471 return ret ? ret : err;
8474 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8475 struct extent_record *rec)
8477 struct extent_backref *back;
8478 struct data_backref *dback;
8479 struct extent_entry *entry, *best = NULL;
8482 int broken_entries = 0;
8487 * Metadata is easy and the backrefs should always agree on bytenr and
8488 * size, if not we've got bigger issues.
8493 list_for_each_entry(back, &rec->backrefs, list) {
8494 if (back->full_backref || !back->is_data)
8497 dback = to_data_backref(back);
8500 * We only pay attention to backrefs that we found a real
8503 if (dback->found_ref == 0)
8507 * For now we only catch when the bytes don't match, not the
8508 * bytenr. We can easily do this at the same time, but I want
8509 * to have a fs image to test on before we just add repair
8510 * functionality willy-nilly so we know we won't screw up the
8514 entry = find_entry(&entries, dback->disk_bytenr,
8517 entry = malloc(sizeof(struct extent_entry));
8522 memset(entry, 0, sizeof(*entry));
8523 entry->bytenr = dback->disk_bytenr;
8524 entry->bytes = dback->bytes;
8525 list_add_tail(&entry->list, &entries);
8530 * If we only have on entry we may think the entries agree when
8531 * in reality they don't so we have to do some extra checking.
8533 if (dback->disk_bytenr != rec->start ||
8534 dback->bytes != rec->nr || back->broken)
8545 /* Yay all the backrefs agree, carry on good sir */
8546 if (nr_entries <= 1 && !mismatch)
8549 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8550 "%Lu\n", rec->start);
8553 * First we want to see if the backrefs can agree amongst themselves who
8554 * is right, so figure out which one of the entries has the highest
8557 best = find_most_right_entry(&entries);
8560 * Ok so we may have an even split between what the backrefs think, so
8561 * this is where we use the extent ref to see what it thinks.
8564 entry = find_entry(&entries, rec->start, rec->nr);
8565 if (!entry && (!broken_entries || !rec->found_rec)) {
8566 fprintf(stderr, "Backrefs don't agree with each other "
8567 "and extent record doesn't agree with anybody,"
8568 " so we can't fix bytenr %Lu bytes %Lu\n",
8569 rec->start, rec->nr);
8572 } else if (!entry) {
8574 * Ok our backrefs were broken, we'll assume this is the
8575 * correct value and add an entry for this range.
8577 entry = malloc(sizeof(struct extent_entry));
8582 memset(entry, 0, sizeof(*entry));
8583 entry->bytenr = rec->start;
8584 entry->bytes = rec->nr;
8585 list_add_tail(&entry->list, &entries);
8589 best = find_most_right_entry(&entries);
8591 fprintf(stderr, "Backrefs and extent record evenly "
8592 "split on who is right, this is going to "
8593 "require user input to fix bytenr %Lu bytes "
8594 "%Lu\n", rec->start, rec->nr);
8601 * I don't think this can happen currently as we'll abort() if we catch
8602 * this case higher up, but in case somebody removes that we still can't
8603 * deal with it properly here yet, so just bail out of that's the case.
8605 if (best->bytenr != rec->start) {
8606 fprintf(stderr, "Extent start and backref starts don't match, "
8607 "please use btrfs-image on this file system and send "
8608 "it to a btrfs developer so they can make fsck fix "
8609 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8610 rec->start, rec->nr);
8616 * Ok great we all agreed on an extent record, let's go find the real
8617 * references and fix up the ones that don't match.
8619 list_for_each_entry(back, &rec->backrefs, list) {
8620 if (back->full_backref || !back->is_data)
8623 dback = to_data_backref(back);
8626 * Still ignoring backrefs that don't have a real ref attached
8629 if (dback->found_ref == 0)
8632 if (dback->bytes == best->bytes &&
8633 dback->disk_bytenr == best->bytenr)
8636 ret = repair_ref(info, path, dback, best);
8642 * Ok we messed with the actual refs, which means we need to drop our
8643 * entire cache and go back and rescan. I know this is a huge pain and
8644 * adds a lot of extra work, but it's the only way to be safe. Once all
8645 * the backrefs agree we may not need to do anything to the extent
8650 while (!list_empty(&entries)) {
8651 entry = list_entry(entries.next, struct extent_entry, list);
8652 list_del_init(&entry->list);
8658 static int process_duplicates(struct cache_tree *extent_cache,
8659 struct extent_record *rec)
8661 struct extent_record *good, *tmp;
8662 struct cache_extent *cache;
8666 * If we found a extent record for this extent then return, or if we
8667 * have more than one duplicate we are likely going to need to delete
8670 if (rec->found_rec || rec->num_duplicates > 1)
8673 /* Shouldn't happen but just in case */
8674 BUG_ON(!rec->num_duplicates);
8677 * So this happens if we end up with a backref that doesn't match the
8678 * actual extent entry. So either the backref is bad or the extent
8679 * entry is bad. Either way we want to have the extent_record actually
8680 * reflect what we found in the extent_tree, so we need to take the
8681 * duplicate out and use that as the extent_record since the only way we
8682 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8684 remove_cache_extent(extent_cache, &rec->cache);
8686 good = to_extent_record(rec->dups.next);
8687 list_del_init(&good->list);
8688 INIT_LIST_HEAD(&good->backrefs);
8689 INIT_LIST_HEAD(&good->dups);
8690 good->cache.start = good->start;
8691 good->cache.size = good->nr;
8692 good->content_checked = 0;
8693 good->owner_ref_checked = 0;
8694 good->num_duplicates = 0;
8695 good->refs = rec->refs;
8696 list_splice_init(&rec->backrefs, &good->backrefs);
8698 cache = lookup_cache_extent(extent_cache, good->start,
8702 tmp = container_of(cache, struct extent_record, cache);
8705 * If we find another overlapping extent and it's found_rec is
8706 * set then it's a duplicate and we need to try and delete
8709 if (tmp->found_rec || tmp->num_duplicates > 0) {
8710 if (list_empty(&good->list))
8711 list_add_tail(&good->list,
8712 &duplicate_extents);
8713 good->num_duplicates += tmp->num_duplicates + 1;
8714 list_splice_init(&tmp->dups, &good->dups);
8715 list_del_init(&tmp->list);
8716 list_add_tail(&tmp->list, &good->dups);
8717 remove_cache_extent(extent_cache, &tmp->cache);
8722 * Ok we have another non extent item backed extent rec, so lets
8723 * just add it to this extent and carry on like we did above.
8725 good->refs += tmp->refs;
8726 list_splice_init(&tmp->backrefs, &good->backrefs);
8727 remove_cache_extent(extent_cache, &tmp->cache);
8730 ret = insert_cache_extent(extent_cache, &good->cache);
8733 return good->num_duplicates ? 0 : 1;
8736 static int delete_duplicate_records(struct btrfs_root *root,
8737 struct extent_record *rec)
8739 struct btrfs_trans_handle *trans;
8740 LIST_HEAD(delete_list);
8741 struct btrfs_path path;
8742 struct extent_record *tmp, *good, *n;
8745 struct btrfs_key key;
8747 btrfs_init_path(&path);
8750 /* Find the record that covers all of the duplicates. */
8751 list_for_each_entry(tmp, &rec->dups, list) {
8752 if (good->start < tmp->start)
8754 if (good->nr > tmp->nr)
8757 if (tmp->start + tmp->nr < good->start + good->nr) {
8758 fprintf(stderr, "Ok we have overlapping extents that "
8759 "aren't completely covered by each other, this "
8760 "is going to require more careful thought. "
8761 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8762 tmp->start, tmp->nr, good->start, good->nr);
8769 list_add_tail(&rec->list, &delete_list);
8771 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8774 list_move_tail(&tmp->list, &delete_list);
8777 root = root->fs_info->extent_root;
8778 trans = btrfs_start_transaction(root, 1);
8779 if (IS_ERR(trans)) {
8780 ret = PTR_ERR(trans);
8784 list_for_each_entry(tmp, &delete_list, list) {
8785 if (tmp->found_rec == 0)
8787 key.objectid = tmp->start;
8788 key.type = BTRFS_EXTENT_ITEM_KEY;
8789 key.offset = tmp->nr;
8791 /* Shouldn't happen but just in case */
8792 if (tmp->metadata) {
8793 fprintf(stderr, "Well this shouldn't happen, extent "
8794 "record overlaps but is metadata? "
8795 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8799 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8805 ret = btrfs_del_item(trans, root, &path);
8808 btrfs_release_path(&path);
8811 err = btrfs_commit_transaction(trans, root);
8815 while (!list_empty(&delete_list)) {
8816 tmp = to_extent_record(delete_list.next);
8817 list_del_init(&tmp->list);
8823 while (!list_empty(&rec->dups)) {
8824 tmp = to_extent_record(rec->dups.next);
8825 list_del_init(&tmp->list);
8829 btrfs_release_path(&path);
8831 if (!ret && !nr_del)
8832 rec->num_duplicates = 0;
8834 return ret ? ret : nr_del;
8837 static int find_possible_backrefs(struct btrfs_fs_info *info,
8838 struct btrfs_path *path,
8839 struct cache_tree *extent_cache,
8840 struct extent_record *rec)
8842 struct btrfs_root *root;
8843 struct extent_backref *back;
8844 struct data_backref *dback;
8845 struct cache_extent *cache;
8846 struct btrfs_file_extent_item *fi;
8847 struct btrfs_key key;
8851 list_for_each_entry(back, &rec->backrefs, list) {
8852 /* Don't care about full backrefs (poor unloved backrefs) */
8853 if (back->full_backref || !back->is_data)
8856 dback = to_data_backref(back);
8858 /* We found this one, we don't need to do a lookup */
8859 if (dback->found_ref)
8862 key.objectid = dback->root;
8863 key.type = BTRFS_ROOT_ITEM_KEY;
8864 key.offset = (u64)-1;
8866 root = btrfs_read_fs_root(info, &key);
8868 /* No root, definitely a bad ref, skip */
8869 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8871 /* Other err, exit */
8873 return PTR_ERR(root);
8875 key.objectid = dback->owner;
8876 key.type = BTRFS_EXTENT_DATA_KEY;
8877 key.offset = dback->offset;
8878 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8880 btrfs_release_path(path);
8883 /* Didn't find it, we can carry on */
8888 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8889 struct btrfs_file_extent_item);
8890 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8891 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8892 btrfs_release_path(path);
8893 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8895 struct extent_record *tmp;
8896 tmp = container_of(cache, struct extent_record, cache);
8899 * If we found an extent record for the bytenr for this
8900 * particular backref then we can't add it to our
8901 * current extent record. We only want to add backrefs
8902 * that don't have a corresponding extent item in the
8903 * extent tree since they likely belong to this record
8904 * and we need to fix it if it doesn't match bytenrs.
8910 dback->found_ref += 1;
8911 dback->disk_bytenr = bytenr;
8912 dback->bytes = bytes;
8915 * Set this so the verify backref code knows not to trust the
8916 * values in this backref.
8925 * Record orphan data ref into corresponding root.
8927 * Return 0 if the extent item contains data ref and recorded.
8928 * Return 1 if the extent item contains no useful data ref
8929 * On that case, it may contains only shared_dataref or metadata backref
8930 * or the file extent exists(this should be handled by the extent bytenr
8932 * Return <0 if something goes wrong.
8934 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8935 struct extent_record *rec)
8937 struct btrfs_key key;
8938 struct btrfs_root *dest_root;
8939 struct extent_backref *back;
8940 struct data_backref *dback;
8941 struct orphan_data_extent *orphan;
8942 struct btrfs_path path;
8943 int recorded_data_ref = 0;
8948 btrfs_init_path(&path);
8949 list_for_each_entry(back, &rec->backrefs, list) {
8950 if (back->full_backref || !back->is_data ||
8951 !back->found_extent_tree)
8953 dback = to_data_backref(back);
8954 if (dback->found_ref)
8956 key.objectid = dback->root;
8957 key.type = BTRFS_ROOT_ITEM_KEY;
8958 key.offset = (u64)-1;
8960 dest_root = btrfs_read_fs_root(fs_info, &key);
8962 /* For non-exist root we just skip it */
8963 if (IS_ERR(dest_root) || !dest_root)
8966 key.objectid = dback->owner;
8967 key.type = BTRFS_EXTENT_DATA_KEY;
8968 key.offset = dback->offset;
8970 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8971 btrfs_release_path(&path);
8973 * For ret < 0, it's OK since the fs-tree may be corrupted,
8974 * we need to record it for inode/file extent rebuild.
8975 * For ret > 0, we record it only for file extent rebuild.
8976 * For ret == 0, the file extent exists but only bytenr
8977 * mismatch, let the original bytenr fix routine to handle,
8983 orphan = malloc(sizeof(*orphan));
8988 INIT_LIST_HEAD(&orphan->list);
8989 orphan->root = dback->root;
8990 orphan->objectid = dback->owner;
8991 orphan->offset = dback->offset;
8992 orphan->disk_bytenr = rec->cache.start;
8993 orphan->disk_len = rec->cache.size;
8994 list_add(&dest_root->orphan_data_extents, &orphan->list);
8995 recorded_data_ref = 1;
8998 btrfs_release_path(&path);
9000 return !recorded_data_ref;
9006 * when an incorrect extent item is found, this will delete
9007 * all of the existing entries for it and recreate them
9008 * based on what the tree scan found.
9010 static int fixup_extent_refs(struct btrfs_fs_info *info,
9011 struct cache_tree *extent_cache,
9012 struct extent_record *rec)
9014 struct btrfs_trans_handle *trans = NULL;
9016 struct btrfs_path path;
9017 struct list_head *cur = rec->backrefs.next;
9018 struct cache_extent *cache;
9019 struct extent_backref *back;
9023 if (rec->flag_block_full_backref)
9024 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9026 btrfs_init_path(&path);
9027 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9029 * Sometimes the backrefs themselves are so broken they don't
9030 * get attached to any meaningful rec, so first go back and
9031 * check any of our backrefs that we couldn't find and throw
9032 * them into the list if we find the backref so that
9033 * verify_backrefs can figure out what to do.
9035 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9040 /* step one, make sure all of the backrefs agree */
9041 ret = verify_backrefs(info, &path, rec);
9045 trans = btrfs_start_transaction(info->extent_root, 1);
9046 if (IS_ERR(trans)) {
9047 ret = PTR_ERR(trans);
9051 /* step two, delete all the existing records */
9052 ret = delete_extent_records(trans, info->extent_root, &path,
9058 /* was this block corrupt? If so, don't add references to it */
9059 cache = lookup_cache_extent(info->corrupt_blocks,
9060 rec->start, rec->max_size);
9066 /* step three, recreate all the refs we did find */
9067 while(cur != &rec->backrefs) {
9068 back = to_extent_backref(cur);
9072 * if we didn't find any references, don't create a
9075 if (!back->found_ref)
9078 rec->bad_full_backref = 0;
9079 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9087 int err = btrfs_commit_transaction(trans, info->extent_root);
9093 fprintf(stderr, "Repaired extent references for %llu\n",
9094 (unsigned long long)rec->start);
9096 btrfs_release_path(&path);
9100 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9101 struct extent_record *rec)
9103 struct btrfs_trans_handle *trans;
9104 struct btrfs_root *root = fs_info->extent_root;
9105 struct btrfs_path path;
9106 struct btrfs_extent_item *ei;
9107 struct btrfs_key key;
9111 key.objectid = rec->start;
9112 if (rec->metadata) {
9113 key.type = BTRFS_METADATA_ITEM_KEY;
9114 key.offset = rec->info_level;
9116 key.type = BTRFS_EXTENT_ITEM_KEY;
9117 key.offset = rec->max_size;
9120 trans = btrfs_start_transaction(root, 0);
9122 return PTR_ERR(trans);
9124 btrfs_init_path(&path);
9125 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9127 btrfs_release_path(&path);
9128 btrfs_commit_transaction(trans, root);
9131 fprintf(stderr, "Didn't find extent for %llu\n",
9132 (unsigned long long)rec->start);
9133 btrfs_release_path(&path);
9134 btrfs_commit_transaction(trans, root);
9138 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9139 struct btrfs_extent_item);
9140 flags = btrfs_extent_flags(path.nodes[0], ei);
9141 if (rec->flag_block_full_backref) {
9142 fprintf(stderr, "setting full backref on %llu\n",
9143 (unsigned long long)key.objectid);
9144 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9146 fprintf(stderr, "clearing full backref on %llu\n",
9147 (unsigned long long)key.objectid);
9148 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9150 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9151 btrfs_mark_buffer_dirty(path.nodes[0]);
9152 btrfs_release_path(&path);
9153 ret = btrfs_commit_transaction(trans, root);
9155 fprintf(stderr, "Repaired extent flags for %llu\n",
9156 (unsigned long long)rec->start);
9161 /* right now we only prune from the extent allocation tree */
9162 static int prune_one_block(struct btrfs_trans_handle *trans,
9163 struct btrfs_fs_info *info,
9164 struct btrfs_corrupt_block *corrupt)
9167 struct btrfs_path path;
9168 struct extent_buffer *eb;
9172 int level = corrupt->level + 1;
9174 btrfs_init_path(&path);
9176 /* we want to stop at the parent to our busted block */
9177 path.lowest_level = level;
9179 ret = btrfs_search_slot(trans, info->extent_root,
9180 &corrupt->key, &path, -1, 1);
9185 eb = path.nodes[level];
9192 * hopefully the search gave us the block we want to prune,
9193 * lets try that first
9195 slot = path.slots[level];
9196 found = btrfs_node_blockptr(eb, slot);
9197 if (found == corrupt->cache.start)
9200 nritems = btrfs_header_nritems(eb);
9202 /* the search failed, lets scan this node and hope we find it */
9203 for (slot = 0; slot < nritems; slot++) {
9204 found = btrfs_node_blockptr(eb, slot);
9205 if (found == corrupt->cache.start)
9209 * we couldn't find the bad block. TODO, search all the nodes for pointers
9212 if (eb == info->extent_root->node) {
9217 btrfs_release_path(&path);
9222 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9223 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9226 btrfs_release_path(&path);
9230 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9232 struct btrfs_trans_handle *trans = NULL;
9233 struct cache_extent *cache;
9234 struct btrfs_corrupt_block *corrupt;
9237 cache = search_cache_extent(info->corrupt_blocks, 0);
9241 trans = btrfs_start_transaction(info->extent_root, 1);
9243 return PTR_ERR(trans);
9245 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9246 prune_one_block(trans, info, corrupt);
9247 remove_cache_extent(info->corrupt_blocks, cache);
9250 return btrfs_commit_transaction(trans, info->extent_root);
9254 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9256 struct btrfs_block_group_cache *cache;
9261 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9262 &start, &end, EXTENT_DIRTY);
9265 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9270 cache = btrfs_lookup_first_block_group(fs_info, start);
9275 start = cache->key.objectid + cache->key.offset;
9279 static int check_extent_refs(struct btrfs_root *root,
9280 struct cache_tree *extent_cache)
9282 struct extent_record *rec;
9283 struct cache_extent *cache;
9289 * if we're doing a repair, we have to make sure
9290 * we don't allocate from the problem extents.
9291 * In the worst case, this will be all the
9294 cache = search_cache_extent(extent_cache, 0);
9296 rec = container_of(cache, struct extent_record, cache);
9297 set_extent_dirty(root->fs_info->excluded_extents,
9299 rec->start + rec->max_size - 1);
9300 cache = next_cache_extent(cache);
9303 /* pin down all the corrupted blocks too */
9304 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9306 set_extent_dirty(root->fs_info->excluded_extents,
9308 cache->start + cache->size - 1);
9309 cache = next_cache_extent(cache);
9311 prune_corrupt_blocks(root->fs_info);
9312 reset_cached_block_groups(root->fs_info);
9315 reset_cached_block_groups(root->fs_info);
9318 * We need to delete any duplicate entries we find first otherwise we
9319 * could mess up the extent tree when we have backrefs that actually
9320 * belong to a different extent item and not the weird duplicate one.
9322 while (repair && !list_empty(&duplicate_extents)) {
9323 rec = to_extent_record(duplicate_extents.next);
9324 list_del_init(&rec->list);
9326 /* Sometimes we can find a backref before we find an actual
9327 * extent, so we need to process it a little bit to see if there
9328 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9329 * if this is a backref screwup. If we need to delete stuff
9330 * process_duplicates() will return 0, otherwise it will return
9333 if (process_duplicates(extent_cache, rec))
9335 ret = delete_duplicate_records(root, rec);
9339 * delete_duplicate_records will return the number of entries
9340 * deleted, so if it's greater than 0 then we know we actually
9341 * did something and we need to remove.
9354 cache = search_cache_extent(extent_cache, 0);
9357 rec = container_of(cache, struct extent_record, cache);
9358 if (rec->num_duplicates) {
9359 fprintf(stderr, "extent item %llu has multiple extent "
9360 "items\n", (unsigned long long)rec->start);
9364 if (rec->refs != rec->extent_item_refs) {
9365 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9366 (unsigned long long)rec->start,
9367 (unsigned long long)rec->nr);
9368 fprintf(stderr, "extent item %llu, found %llu\n",
9369 (unsigned long long)rec->extent_item_refs,
9370 (unsigned long long)rec->refs);
9371 ret = record_orphan_data_extents(root->fs_info, rec);
9377 if (all_backpointers_checked(rec, 1)) {
9378 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9379 (unsigned long long)rec->start,
9380 (unsigned long long)rec->nr);
9384 if (!rec->owner_ref_checked) {
9385 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9386 (unsigned long long)rec->start,
9387 (unsigned long long)rec->nr);
9392 if (repair && fix) {
9393 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9399 if (rec->bad_full_backref) {
9400 fprintf(stderr, "bad full backref, on [%llu]\n",
9401 (unsigned long long)rec->start);
9403 ret = fixup_extent_flags(root->fs_info, rec);
9411 * Although it's not a extent ref's problem, we reuse this
9412 * routine for error reporting.
9413 * No repair function yet.
9415 if (rec->crossing_stripes) {
9417 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9418 rec->start, rec->start + rec->max_size);
9422 if (rec->wrong_chunk_type) {
9424 "bad extent [%llu, %llu), type mismatch with chunk\n",
9425 rec->start, rec->start + rec->max_size);
9429 remove_cache_extent(extent_cache, cache);
9430 free_all_extent_backrefs(rec);
9431 if (!init_extent_tree && repair && (!cur_err || fix))
9432 clear_extent_dirty(root->fs_info->excluded_extents,
9434 rec->start + rec->max_size - 1);
9439 if (ret && ret != -EAGAIN) {
9440 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9443 struct btrfs_trans_handle *trans;
9445 root = root->fs_info->extent_root;
9446 trans = btrfs_start_transaction(root, 1);
9447 if (IS_ERR(trans)) {
9448 ret = PTR_ERR(trans);
9452 btrfs_fix_block_accounting(trans, root);
9453 ret = btrfs_commit_transaction(trans, root);
9462 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9466 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9467 stripe_size = length;
9468 stripe_size /= num_stripes;
9469 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9470 stripe_size = length * 2;
9471 stripe_size /= num_stripes;
9472 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9473 stripe_size = length;
9474 stripe_size /= (num_stripes - 1);
9475 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9476 stripe_size = length;
9477 stripe_size /= (num_stripes - 2);
9479 stripe_size = length;
9485 * Check the chunk with its block group/dev list ref:
9486 * Return 0 if all refs seems valid.
9487 * Return 1 if part of refs seems valid, need later check for rebuild ref
9488 * like missing block group and needs to search extent tree to rebuild them.
9489 * Return -1 if essential refs are missing and unable to rebuild.
9491 static int check_chunk_refs(struct chunk_record *chunk_rec,
9492 struct block_group_tree *block_group_cache,
9493 struct device_extent_tree *dev_extent_cache,
9496 struct cache_extent *block_group_item;
9497 struct block_group_record *block_group_rec;
9498 struct cache_extent *dev_extent_item;
9499 struct device_extent_record *dev_extent_rec;
9503 int metadump_v2 = 0;
9507 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9510 if (block_group_item) {
9511 block_group_rec = container_of(block_group_item,
9512 struct block_group_record,
9514 if (chunk_rec->length != block_group_rec->offset ||
9515 chunk_rec->offset != block_group_rec->objectid ||
9517 chunk_rec->type_flags != block_group_rec->flags)) {
9520 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9521 chunk_rec->objectid,
9526 chunk_rec->type_flags,
9527 block_group_rec->objectid,
9528 block_group_rec->type,
9529 block_group_rec->offset,
9530 block_group_rec->offset,
9531 block_group_rec->objectid,
9532 block_group_rec->flags);
9535 list_del_init(&block_group_rec->list);
9536 chunk_rec->bg_rec = block_group_rec;
9541 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9542 chunk_rec->objectid,
9547 chunk_rec->type_flags);
9554 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9555 chunk_rec->num_stripes);
9556 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9557 devid = chunk_rec->stripes[i].devid;
9558 offset = chunk_rec->stripes[i].offset;
9559 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9560 devid, offset, length);
9561 if (dev_extent_item) {
9562 dev_extent_rec = container_of(dev_extent_item,
9563 struct device_extent_record,
9565 if (dev_extent_rec->objectid != devid ||
9566 dev_extent_rec->offset != offset ||
9567 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9568 dev_extent_rec->length != length) {
9571 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9572 chunk_rec->objectid,
9575 chunk_rec->stripes[i].devid,
9576 chunk_rec->stripes[i].offset,
9577 dev_extent_rec->objectid,
9578 dev_extent_rec->offset,
9579 dev_extent_rec->length);
9582 list_move(&dev_extent_rec->chunk_list,
9583 &chunk_rec->dextents);
9588 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9589 chunk_rec->objectid,
9592 chunk_rec->stripes[i].devid,
9593 chunk_rec->stripes[i].offset);
9600 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9601 int check_chunks(struct cache_tree *chunk_cache,
9602 struct block_group_tree *block_group_cache,
9603 struct device_extent_tree *dev_extent_cache,
9604 struct list_head *good, struct list_head *bad,
9605 struct list_head *rebuild, int silent)
9607 struct cache_extent *chunk_item;
9608 struct chunk_record *chunk_rec;
9609 struct block_group_record *bg_rec;
9610 struct device_extent_record *dext_rec;
9614 chunk_item = first_cache_extent(chunk_cache);
9615 while (chunk_item) {
9616 chunk_rec = container_of(chunk_item, struct chunk_record,
9618 err = check_chunk_refs(chunk_rec, block_group_cache,
9619 dev_extent_cache, silent);
9622 if (err == 0 && good)
9623 list_add_tail(&chunk_rec->list, good);
9624 if (err > 0 && rebuild)
9625 list_add_tail(&chunk_rec->list, rebuild);
9627 list_add_tail(&chunk_rec->list, bad);
9628 chunk_item = next_cache_extent(chunk_item);
9631 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9634 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9642 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9646 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9657 static int check_device_used(struct device_record *dev_rec,
9658 struct device_extent_tree *dext_cache)
9660 struct cache_extent *cache;
9661 struct device_extent_record *dev_extent_rec;
9664 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9666 dev_extent_rec = container_of(cache,
9667 struct device_extent_record,
9669 if (dev_extent_rec->objectid != dev_rec->devid)
9672 list_del_init(&dev_extent_rec->device_list);
9673 total_byte += dev_extent_rec->length;
9674 cache = next_cache_extent(cache);
9677 if (total_byte != dev_rec->byte_used) {
9679 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9680 total_byte, dev_rec->byte_used, dev_rec->objectid,
9681 dev_rec->type, dev_rec->offset);
9688 /* check btrfs_dev_item -> btrfs_dev_extent */
9689 static int check_devices(struct rb_root *dev_cache,
9690 struct device_extent_tree *dev_extent_cache)
9692 struct rb_node *dev_node;
9693 struct device_record *dev_rec;
9694 struct device_extent_record *dext_rec;
9698 dev_node = rb_first(dev_cache);
9700 dev_rec = container_of(dev_node, struct device_record, node);
9701 err = check_device_used(dev_rec, dev_extent_cache);
9705 dev_node = rb_next(dev_node);
9707 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9710 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9711 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9718 static int add_root_item_to_list(struct list_head *head,
9719 u64 objectid, u64 bytenr, u64 last_snapshot,
9720 u8 level, u8 drop_level,
9721 int level_size, struct btrfs_key *drop_key)
9724 struct root_item_record *ri_rec;
9725 ri_rec = malloc(sizeof(*ri_rec));
9728 ri_rec->bytenr = bytenr;
9729 ri_rec->objectid = objectid;
9730 ri_rec->level = level;
9731 ri_rec->level_size = level_size;
9732 ri_rec->drop_level = drop_level;
9733 ri_rec->last_snapshot = last_snapshot;
9735 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9736 list_add_tail(&ri_rec->list, head);
9741 static void free_root_item_list(struct list_head *list)
9743 struct root_item_record *ri_rec;
9745 while (!list_empty(list)) {
9746 ri_rec = list_first_entry(list, struct root_item_record,
9748 list_del_init(&ri_rec->list);
9753 static int deal_root_from_list(struct list_head *list,
9754 struct btrfs_root *root,
9755 struct block_info *bits,
9757 struct cache_tree *pending,
9758 struct cache_tree *seen,
9759 struct cache_tree *reada,
9760 struct cache_tree *nodes,
9761 struct cache_tree *extent_cache,
9762 struct cache_tree *chunk_cache,
9763 struct rb_root *dev_cache,
9764 struct block_group_tree *block_group_cache,
9765 struct device_extent_tree *dev_extent_cache)
9770 while (!list_empty(list)) {
9771 struct root_item_record *rec;
9772 struct extent_buffer *buf;
9773 rec = list_entry(list->next,
9774 struct root_item_record, list);
9776 buf = read_tree_block(root->fs_info->tree_root,
9777 rec->bytenr, rec->level_size, 0);
9778 if (!extent_buffer_uptodate(buf)) {
9779 free_extent_buffer(buf);
9783 ret = add_root_to_pending(buf, extent_cache, pending,
9784 seen, nodes, rec->objectid);
9788 * To rebuild extent tree, we need deal with snapshot
9789 * one by one, otherwise we deal with node firstly which
9790 * can maximize readahead.
9793 ret = run_next_block(root, bits, bits_nr, &last,
9794 pending, seen, reada, nodes,
9795 extent_cache, chunk_cache,
9796 dev_cache, block_group_cache,
9797 dev_extent_cache, rec);
9801 free_extent_buffer(buf);
9802 list_del(&rec->list);
9808 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9809 reada, nodes, extent_cache, chunk_cache,
9810 dev_cache, block_group_cache,
9811 dev_extent_cache, NULL);
9821 static int check_chunks_and_extents(struct btrfs_root *root)
9823 struct rb_root dev_cache;
9824 struct cache_tree chunk_cache;
9825 struct block_group_tree block_group_cache;
9826 struct device_extent_tree dev_extent_cache;
9827 struct cache_tree extent_cache;
9828 struct cache_tree seen;
9829 struct cache_tree pending;
9830 struct cache_tree reada;
9831 struct cache_tree nodes;
9832 struct extent_io_tree excluded_extents;
9833 struct cache_tree corrupt_blocks;
9834 struct btrfs_path path;
9835 struct btrfs_key key;
9836 struct btrfs_key found_key;
9838 struct block_info *bits;
9840 struct extent_buffer *leaf;
9842 struct btrfs_root_item ri;
9843 struct list_head dropping_trees;
9844 struct list_head normal_trees;
9845 struct btrfs_root *root1;
9850 dev_cache = RB_ROOT;
9851 cache_tree_init(&chunk_cache);
9852 block_group_tree_init(&block_group_cache);
9853 device_extent_tree_init(&dev_extent_cache);
9855 cache_tree_init(&extent_cache);
9856 cache_tree_init(&seen);
9857 cache_tree_init(&pending);
9858 cache_tree_init(&nodes);
9859 cache_tree_init(&reada);
9860 cache_tree_init(&corrupt_blocks);
9861 extent_io_tree_init(&excluded_extents);
9862 INIT_LIST_HEAD(&dropping_trees);
9863 INIT_LIST_HEAD(&normal_trees);
9866 root->fs_info->excluded_extents = &excluded_extents;
9867 root->fs_info->fsck_extent_cache = &extent_cache;
9868 root->fs_info->free_extent_hook = free_extent_hook;
9869 root->fs_info->corrupt_blocks = &corrupt_blocks;
9873 bits = malloc(bits_nr * sizeof(struct block_info));
9879 if (ctx.progress_enabled) {
9880 ctx.tp = TASK_EXTENTS;
9881 task_start(ctx.info);
9885 root1 = root->fs_info->tree_root;
9886 level = btrfs_header_level(root1->node);
9887 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9888 root1->node->start, 0, level, 0,
9889 root1->nodesize, NULL);
9892 root1 = root->fs_info->chunk_root;
9893 level = btrfs_header_level(root1->node);
9894 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9895 root1->node->start, 0, level, 0,
9896 root1->nodesize, NULL);
9899 btrfs_init_path(&path);
9902 key.type = BTRFS_ROOT_ITEM_KEY;
9903 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9908 leaf = path.nodes[0];
9909 slot = path.slots[0];
9910 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9911 ret = btrfs_next_leaf(root, &path);
9914 leaf = path.nodes[0];
9915 slot = path.slots[0];
9917 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9918 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9919 unsigned long offset;
9922 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9923 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9924 last_snapshot = btrfs_root_last_snapshot(&ri);
9925 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9926 level = btrfs_root_level(&ri);
9927 level_size = root->nodesize;
9928 ret = add_root_item_to_list(&normal_trees,
9930 btrfs_root_bytenr(&ri),
9931 last_snapshot, level,
9932 0, level_size, NULL);
9936 level = btrfs_root_level(&ri);
9937 level_size = root->nodesize;
9938 objectid = found_key.objectid;
9939 btrfs_disk_key_to_cpu(&found_key,
9941 ret = add_root_item_to_list(&dropping_trees,
9943 btrfs_root_bytenr(&ri),
9944 last_snapshot, level,
9946 level_size, &found_key);
9953 btrfs_release_path(&path);
9956 * check_block can return -EAGAIN if it fixes something, please keep
9957 * this in mind when dealing with return values from these functions, if
9958 * we get -EAGAIN we want to fall through and restart the loop.
9960 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9961 &seen, &reada, &nodes, &extent_cache,
9962 &chunk_cache, &dev_cache, &block_group_cache,
9969 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9970 &pending, &seen, &reada, &nodes,
9971 &extent_cache, &chunk_cache, &dev_cache,
9972 &block_group_cache, &dev_extent_cache);
9979 ret = check_chunks(&chunk_cache, &block_group_cache,
9980 &dev_extent_cache, NULL, NULL, NULL, 0);
9987 ret = check_extent_refs(root, &extent_cache);
9994 ret = check_devices(&dev_cache, &dev_extent_cache);
9999 task_stop(ctx.info);
10001 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10002 extent_io_tree_cleanup(&excluded_extents);
10003 root->fs_info->fsck_extent_cache = NULL;
10004 root->fs_info->free_extent_hook = NULL;
10005 root->fs_info->corrupt_blocks = NULL;
10006 root->fs_info->excluded_extents = NULL;
10009 free_chunk_cache_tree(&chunk_cache);
10010 free_device_cache_tree(&dev_cache);
10011 free_block_group_tree(&block_group_cache);
10012 free_device_extent_tree(&dev_extent_cache);
10013 free_extent_cache_tree(&seen);
10014 free_extent_cache_tree(&pending);
10015 free_extent_cache_tree(&reada);
10016 free_extent_cache_tree(&nodes);
10017 free_root_item_list(&normal_trees);
10018 free_root_item_list(&dropping_trees);
10021 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10022 free_extent_cache_tree(&seen);
10023 free_extent_cache_tree(&pending);
10024 free_extent_cache_tree(&reada);
10025 free_extent_cache_tree(&nodes);
10026 free_chunk_cache_tree(&chunk_cache);
10027 free_block_group_tree(&block_group_cache);
10028 free_device_cache_tree(&dev_cache);
10029 free_device_extent_tree(&dev_extent_cache);
10030 free_extent_record_cache(&extent_cache);
10031 free_root_item_list(&normal_trees);
10032 free_root_item_list(&dropping_trees);
10033 extent_io_tree_cleanup(&excluded_extents);
10038 * Check backrefs of a tree block given by @bytenr or @eb.
10040 * @root: the root containing the @bytenr or @eb
10041 * @eb: tree block extent buffer, can be NULL
10042 * @bytenr: bytenr of the tree block to search
10043 * @level: tree level of the tree block
10044 * @owner: owner of the tree block
10046 * Return >0 for any error found and output error message
10047 * Return 0 for no error found
10049 static int check_tree_block_ref(struct btrfs_root *root,
10050 struct extent_buffer *eb, u64 bytenr,
10051 int level, u64 owner)
10053 struct btrfs_key key;
10054 struct btrfs_root *extent_root = root->fs_info->extent_root;
10055 struct btrfs_path path;
10056 struct btrfs_extent_item *ei;
10057 struct btrfs_extent_inline_ref *iref;
10058 struct extent_buffer *leaf;
10064 u32 nodesize = root->nodesize;
10067 int tree_reloc_root = 0;
10072 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10073 btrfs_header_bytenr(root->node) == bytenr)
10074 tree_reloc_root = 1;
10076 btrfs_init_path(&path);
10077 key.objectid = bytenr;
10078 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10079 key.type = BTRFS_METADATA_ITEM_KEY;
10081 key.type = BTRFS_EXTENT_ITEM_KEY;
10082 key.offset = (u64)-1;
10084 /* Search for the backref in extent tree */
10085 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10087 err |= BACKREF_MISSING;
10090 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10092 err |= BACKREF_MISSING;
10096 leaf = path.nodes[0];
10097 slot = path.slots[0];
10098 btrfs_item_key_to_cpu(leaf, &key, slot);
10100 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10102 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10103 skinny_level = (int)key.offset;
10104 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10106 struct btrfs_tree_block_info *info;
10108 info = (struct btrfs_tree_block_info *)(ei + 1);
10109 skinny_level = btrfs_tree_block_level(leaf, info);
10110 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10117 if (!(btrfs_extent_flags(leaf, ei) &
10118 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10120 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10121 key.objectid, nodesize,
10122 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10123 err = BACKREF_MISMATCH;
10125 header_gen = btrfs_header_generation(eb);
10126 extent_gen = btrfs_extent_generation(leaf, ei);
10127 if (header_gen != extent_gen) {
10129 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10130 key.objectid, nodesize, header_gen,
10132 err = BACKREF_MISMATCH;
10134 if (level != skinny_level) {
10136 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10137 key.objectid, nodesize, level, skinny_level);
10138 err = BACKREF_MISMATCH;
10140 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10142 "extent[%llu %u] is referred by other roots than %llu",
10143 key.objectid, nodesize, root->objectid);
10144 err = BACKREF_MISMATCH;
10149 * Iterate the extent/metadata item to find the exact backref
10151 item_size = btrfs_item_size_nr(leaf, slot);
10152 ptr = (unsigned long)iref;
10153 end = (unsigned long)ei + item_size;
10154 while (ptr < end) {
10155 iref = (struct btrfs_extent_inline_ref *)ptr;
10156 type = btrfs_extent_inline_ref_type(leaf, iref);
10157 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10159 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10160 (offset == root->objectid || offset == owner)) {
10162 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10164 * Backref of tree reloc root points to itself, no need
10165 * to check backref any more.
10167 if (tree_reloc_root)
10170 /* Check if the backref points to valid referencer */
10171 found_ref = !check_tree_block_ref(root, NULL,
10172 offset, level + 1, owner);
10177 ptr += btrfs_extent_inline_ref_size(type);
10181 * Inlined extent item doesn't have what we need, check
10182 * TREE_BLOCK_REF_KEY
10185 btrfs_release_path(&path);
10186 key.objectid = bytenr;
10187 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10188 key.offset = root->objectid;
10190 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10195 err |= BACKREF_MISSING;
10197 btrfs_release_path(&path);
10198 if (eb && (err & BACKREF_MISSING))
10199 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10200 bytenr, nodesize, owner, level);
10205 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10207 * Return >0 any error found and output error message
10208 * Return 0 for no error found
10210 static int check_extent_data_item(struct btrfs_root *root,
10211 struct extent_buffer *eb, int slot)
10213 struct btrfs_file_extent_item *fi;
10214 struct btrfs_path path;
10215 struct btrfs_root *extent_root = root->fs_info->extent_root;
10216 struct btrfs_key fi_key;
10217 struct btrfs_key dbref_key;
10218 struct extent_buffer *leaf;
10219 struct btrfs_extent_item *ei;
10220 struct btrfs_extent_inline_ref *iref;
10221 struct btrfs_extent_data_ref *dref;
10224 u64 disk_num_bytes;
10225 u64 extent_num_bytes;
10232 int found_dbackref = 0;
10236 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10237 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10239 /* Nothing to check for hole and inline data extents */
10240 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10241 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10244 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10245 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10246 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10248 /* Check unaligned disk_num_bytes and num_bytes */
10249 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10251 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10252 fi_key.objectid, fi_key.offset, disk_num_bytes,
10254 err |= BYTES_UNALIGNED;
10256 data_bytes_allocated += disk_num_bytes;
10258 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10260 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10261 fi_key.objectid, fi_key.offset, extent_num_bytes,
10263 err |= BYTES_UNALIGNED;
10265 data_bytes_referenced += extent_num_bytes;
10267 owner = btrfs_header_owner(eb);
10269 /* Check the extent item of the file extent in extent tree */
10270 btrfs_init_path(&path);
10271 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10272 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10273 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10275 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10279 leaf = path.nodes[0];
10280 slot = path.slots[0];
10281 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10283 extent_flags = btrfs_extent_flags(leaf, ei);
10285 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10287 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10288 disk_bytenr, disk_num_bytes,
10289 BTRFS_EXTENT_FLAG_DATA);
10290 err |= BACKREF_MISMATCH;
10293 /* Check data backref inside that extent item */
10294 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10295 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10296 ptr = (unsigned long)iref;
10297 end = (unsigned long)ei + item_size;
10298 while (ptr < end) {
10299 iref = (struct btrfs_extent_inline_ref *)ptr;
10300 type = btrfs_extent_inline_ref_type(leaf, iref);
10301 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10303 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10304 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10305 if (ref_root == owner || ref_root == root->objectid)
10306 found_dbackref = 1;
10307 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10308 found_dbackref = !check_tree_block_ref(root, NULL,
10309 btrfs_extent_inline_ref_offset(leaf, iref),
10313 if (found_dbackref)
10315 ptr += btrfs_extent_inline_ref_size(type);
10318 if (!found_dbackref) {
10319 btrfs_release_path(&path);
10321 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10322 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10323 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10324 dbref_key.offset = hash_extent_data_ref(root->objectid,
10325 fi_key.objectid, fi_key.offset);
10327 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10328 &dbref_key, &path, 0, 0);
10330 found_dbackref = 1;
10334 btrfs_release_path(&path);
10337 * Neither inlined nor EXTENT_DATA_REF found, try
10338 * SHARED_DATA_REF as last chance.
10340 dbref_key.objectid = disk_bytenr;
10341 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10342 dbref_key.offset = eb->start;
10344 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10345 &dbref_key, &path, 0, 0);
10347 found_dbackref = 1;
10353 if (!found_dbackref)
10354 err |= BACKREF_MISSING;
10355 btrfs_release_path(&path);
10356 if (err & BACKREF_MISSING) {
10357 error("data extent[%llu %llu] backref lost",
10358 disk_bytenr, disk_num_bytes);
10364 * Get real tree block level for the case like shared block
10365 * Return >= 0 as tree level
10366 * Return <0 for error
10368 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10370 struct extent_buffer *eb;
10371 struct btrfs_path path;
10372 struct btrfs_key key;
10373 struct btrfs_extent_item *ei;
10376 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10381 /* Search extent tree for extent generation and level */
10382 key.objectid = bytenr;
10383 key.type = BTRFS_METADATA_ITEM_KEY;
10384 key.offset = (u64)-1;
10386 btrfs_init_path(&path);
10387 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10390 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10398 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10399 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10400 struct btrfs_extent_item);
10401 flags = btrfs_extent_flags(path.nodes[0], ei);
10402 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10407 /* Get transid for later read_tree_block() check */
10408 transid = btrfs_extent_generation(path.nodes[0], ei);
10410 /* Get backref level as one source */
10411 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10412 backref_level = key.offset;
10414 struct btrfs_tree_block_info *info;
10416 info = (struct btrfs_tree_block_info *)(ei + 1);
10417 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10419 btrfs_release_path(&path);
10421 /* Get level from tree block as an alternative source */
10422 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10423 if (!extent_buffer_uptodate(eb)) {
10424 free_extent_buffer(eb);
10427 header_level = btrfs_header_level(eb);
10428 free_extent_buffer(eb);
10430 if (header_level != backref_level)
10432 return header_level;
10435 btrfs_release_path(&path);
10440 * Check if a tree block backref is valid (points to a valid tree block)
10441 * if level == -1, level will be resolved
10442 * Return >0 for any error found and print error message
10444 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10445 u64 bytenr, int level)
10447 struct btrfs_root *root;
10448 struct btrfs_key key;
10449 struct btrfs_path path;
10450 struct extent_buffer *eb;
10451 struct extent_buffer *node;
10452 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10456 /* Query level for level == -1 special case */
10458 level = query_tree_block_level(fs_info, bytenr);
10460 err |= REFERENCER_MISSING;
10464 key.objectid = root_id;
10465 key.type = BTRFS_ROOT_ITEM_KEY;
10466 key.offset = (u64)-1;
10468 root = btrfs_read_fs_root(fs_info, &key);
10469 if (IS_ERR(root)) {
10470 err |= REFERENCER_MISSING;
10474 /* Read out the tree block to get item/node key */
10475 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10476 if (!extent_buffer_uptodate(eb)) {
10477 err |= REFERENCER_MISSING;
10478 free_extent_buffer(eb);
10482 /* Empty tree, no need to check key */
10483 if (!btrfs_header_nritems(eb) && !level) {
10484 free_extent_buffer(eb);
10489 btrfs_node_key_to_cpu(eb, &key, 0);
10491 btrfs_item_key_to_cpu(eb, &key, 0);
10493 free_extent_buffer(eb);
10495 btrfs_init_path(&path);
10496 path.lowest_level = level;
10497 /* Search with the first key, to ensure we can reach it */
10498 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10500 err |= REFERENCER_MISSING;
10504 node = path.nodes[level];
10505 if (btrfs_header_bytenr(node) != bytenr) {
10507 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10508 bytenr, nodesize, bytenr,
10509 btrfs_header_bytenr(node));
10510 err |= REFERENCER_MISMATCH;
10512 if (btrfs_header_level(node) != level) {
10514 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10515 bytenr, nodesize, level,
10516 btrfs_header_level(node));
10517 err |= REFERENCER_MISMATCH;
10521 btrfs_release_path(&path);
10523 if (err & REFERENCER_MISSING) {
10525 error("extent [%llu %d] lost referencer (owner: %llu)",
10526 bytenr, nodesize, root_id);
10529 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10530 bytenr, nodesize, root_id, level);
10537 * Check if tree block @eb is tree reloc root.
10538 * Return 0 if it's not or any problem happens
10539 * Return 1 if it's a tree reloc root
10541 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10542 struct extent_buffer *eb)
10544 struct btrfs_root *tree_reloc_root;
10545 struct btrfs_key key;
10546 u64 bytenr = btrfs_header_bytenr(eb);
10547 u64 owner = btrfs_header_owner(eb);
10550 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10551 key.offset = owner;
10552 key.type = BTRFS_ROOT_ITEM_KEY;
10554 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10555 if (IS_ERR(tree_reloc_root))
10558 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10560 btrfs_free_fs_root(tree_reloc_root);
10565 * Check referencer for shared block backref
10566 * If level == -1, this function will resolve the level.
10568 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10569 u64 parent, u64 bytenr, int level)
10571 struct extent_buffer *eb;
10572 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10574 int found_parent = 0;
10577 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10578 if (!extent_buffer_uptodate(eb))
10582 level = query_tree_block_level(fs_info, bytenr);
10586 /* It's possible it's a tree reloc root */
10587 if (parent == bytenr) {
10588 if (is_tree_reloc_root(fs_info, eb))
10593 if (level + 1 != btrfs_header_level(eb))
10596 nr = btrfs_header_nritems(eb);
10597 for (i = 0; i < nr; i++) {
10598 if (bytenr == btrfs_node_blockptr(eb, i)) {
10604 free_extent_buffer(eb);
10605 if (!found_parent) {
10607 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10608 bytenr, nodesize, parent, level);
10609 return REFERENCER_MISSING;
10615 * Check referencer for normal (inlined) data ref
10616 * If len == 0, it will be resolved by searching in extent tree
10618 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10619 u64 root_id, u64 objectid, u64 offset,
10620 u64 bytenr, u64 len, u32 count)
10622 struct btrfs_root *root;
10623 struct btrfs_root *extent_root = fs_info->extent_root;
10624 struct btrfs_key key;
10625 struct btrfs_path path;
10626 struct extent_buffer *leaf;
10627 struct btrfs_file_extent_item *fi;
10628 u32 found_count = 0;
10633 key.objectid = bytenr;
10634 key.type = BTRFS_EXTENT_ITEM_KEY;
10635 key.offset = (u64)-1;
10637 btrfs_init_path(&path);
10638 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10641 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10644 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10645 if (key.objectid != bytenr ||
10646 key.type != BTRFS_EXTENT_ITEM_KEY)
10649 btrfs_release_path(&path);
10651 key.objectid = root_id;
10652 key.type = BTRFS_ROOT_ITEM_KEY;
10653 key.offset = (u64)-1;
10654 btrfs_init_path(&path);
10656 root = btrfs_read_fs_root(fs_info, &key);
10660 key.objectid = objectid;
10661 key.type = BTRFS_EXTENT_DATA_KEY;
10663 * It can be nasty as data backref offset is
10664 * file offset - file extent offset, which is smaller or
10665 * equal to original backref offset. The only special case is
10666 * overflow. So we need to special check and do further search.
10668 key.offset = offset & (1ULL << 63) ? 0 : offset;
10670 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10675 * Search afterwards to get correct one
10676 * NOTE: As we must do a comprehensive check on the data backref to
10677 * make sure the dref count also matches, we must iterate all file
10678 * extents for that inode.
10681 leaf = path.nodes[0];
10682 slot = path.slots[0];
10684 if (slot >= btrfs_header_nritems(leaf))
10686 btrfs_item_key_to_cpu(leaf, &key, slot);
10687 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10689 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10691 * Except normal disk bytenr and disk num bytes, we still
10692 * need to do extra check on dbackref offset as
10693 * dbackref offset = file_offset - file_extent_offset
10695 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10696 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10697 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10702 ret = btrfs_next_item(root, &path);
10707 btrfs_release_path(&path);
10708 if (found_count != count) {
10710 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10711 bytenr, len, root_id, objectid, offset, count, found_count);
10712 return REFERENCER_MISSING;
10718 * Check if the referencer of a shared data backref exists
10720 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10721 u64 parent, u64 bytenr)
10723 struct extent_buffer *eb;
10724 struct btrfs_key key;
10725 struct btrfs_file_extent_item *fi;
10726 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10728 int found_parent = 0;
10731 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10732 if (!extent_buffer_uptodate(eb))
10735 nr = btrfs_header_nritems(eb);
10736 for (i = 0; i < nr; i++) {
10737 btrfs_item_key_to_cpu(eb, &key, i);
10738 if (key.type != BTRFS_EXTENT_DATA_KEY)
10741 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10742 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10745 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10752 free_extent_buffer(eb);
10753 if (!found_parent) {
10754 error("shared extent %llu referencer lost (parent: %llu)",
10756 return REFERENCER_MISSING;
10762 * This function will check a given extent item, including its backref and
10763 * itself (like crossing stripe boundary and type)
10765 * Since we don't use extent_record anymore, introduce new error bit
10767 static int check_extent_item(struct btrfs_fs_info *fs_info,
10768 struct extent_buffer *eb, int slot)
10770 struct btrfs_extent_item *ei;
10771 struct btrfs_extent_inline_ref *iref;
10772 struct btrfs_extent_data_ref *dref;
10776 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10777 u32 item_size = btrfs_item_size_nr(eb, slot);
10782 struct btrfs_key key;
10786 btrfs_item_key_to_cpu(eb, &key, slot);
10787 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10788 bytes_used += key.offset;
10790 bytes_used += nodesize;
10792 if (item_size < sizeof(*ei)) {
10794 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10795 * old thing when on disk format is still un-determined.
10796 * No need to care about it anymore
10798 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10802 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10803 flags = btrfs_extent_flags(eb, ei);
10805 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10807 if (metadata && check_crossing_stripes(global_info, key.objectid,
10809 error("bad metadata [%llu, %llu) crossing stripe boundary",
10810 key.objectid, key.objectid + nodesize);
10811 err |= CROSSING_STRIPE_BOUNDARY;
10814 ptr = (unsigned long)(ei + 1);
10816 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10817 /* Old EXTENT_ITEM metadata */
10818 struct btrfs_tree_block_info *info;
10820 info = (struct btrfs_tree_block_info *)ptr;
10821 level = btrfs_tree_block_level(eb, info);
10822 ptr += sizeof(struct btrfs_tree_block_info);
10824 /* New METADATA_ITEM */
10825 level = key.offset;
10827 end = (unsigned long)ei + item_size;
10830 /* Reached extent item end normally */
10834 /* Beyond extent item end, wrong item size */
10836 err |= ITEM_SIZE_MISMATCH;
10837 error("extent item at bytenr %llu slot %d has wrong size",
10842 /* Now check every backref in this extent item */
10843 iref = (struct btrfs_extent_inline_ref *)ptr;
10844 type = btrfs_extent_inline_ref_type(eb, iref);
10845 offset = btrfs_extent_inline_ref_offset(eb, iref);
10847 case BTRFS_TREE_BLOCK_REF_KEY:
10848 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10852 case BTRFS_SHARED_BLOCK_REF_KEY:
10853 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10857 case BTRFS_EXTENT_DATA_REF_KEY:
10858 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10859 ret = check_extent_data_backref(fs_info,
10860 btrfs_extent_data_ref_root(eb, dref),
10861 btrfs_extent_data_ref_objectid(eb, dref),
10862 btrfs_extent_data_ref_offset(eb, dref),
10863 key.objectid, key.offset,
10864 btrfs_extent_data_ref_count(eb, dref));
10867 case BTRFS_SHARED_DATA_REF_KEY:
10868 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10872 error("extent[%llu %d %llu] has unknown ref type: %d",
10873 key.objectid, key.type, key.offset, type);
10874 err |= UNKNOWN_TYPE;
10878 ptr += btrfs_extent_inline_ref_size(type);
10886 * Check if a dev extent item is referred correctly by its chunk
10888 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10889 struct extent_buffer *eb, int slot)
10891 struct btrfs_root *chunk_root = fs_info->chunk_root;
10892 struct btrfs_dev_extent *ptr;
10893 struct btrfs_path path;
10894 struct btrfs_key chunk_key;
10895 struct btrfs_key devext_key;
10896 struct btrfs_chunk *chunk;
10897 struct extent_buffer *l;
10901 int found_chunk = 0;
10904 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10905 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10906 length = btrfs_dev_extent_length(eb, ptr);
10908 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10909 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10910 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10912 btrfs_init_path(&path);
10913 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10918 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10919 if (btrfs_chunk_length(l, chunk) != length)
10922 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10923 for (i = 0; i < num_stripes; i++) {
10924 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10925 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10927 if (devid == devext_key.objectid &&
10928 offset == devext_key.offset) {
10934 btrfs_release_path(&path);
10935 if (!found_chunk) {
10937 "device extent[%llu, %llu, %llu] did not find the related chunk",
10938 devext_key.objectid, devext_key.offset, length);
10939 return REFERENCER_MISSING;
10945 * Check if the used space is correct with the dev item
10947 static int check_dev_item(struct btrfs_fs_info *fs_info,
10948 struct extent_buffer *eb, int slot)
10950 struct btrfs_root *dev_root = fs_info->dev_root;
10951 struct btrfs_dev_item *dev_item;
10952 struct btrfs_path path;
10953 struct btrfs_key key;
10954 struct btrfs_dev_extent *ptr;
10960 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10961 dev_id = btrfs_device_id(eb, dev_item);
10962 used = btrfs_device_bytes_used(eb, dev_item);
10964 key.objectid = dev_id;
10965 key.type = BTRFS_DEV_EXTENT_KEY;
10968 btrfs_init_path(&path);
10969 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10971 btrfs_item_key_to_cpu(eb, &key, slot);
10972 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10973 key.objectid, key.type, key.offset);
10974 btrfs_release_path(&path);
10975 return REFERENCER_MISSING;
10978 /* Iterate dev_extents to calculate the used space of a device */
10980 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10983 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10984 if (key.objectid > dev_id)
10986 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10989 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10990 struct btrfs_dev_extent);
10991 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10993 ret = btrfs_next_item(dev_root, &path);
10997 btrfs_release_path(&path);
10999 if (used != total) {
11000 btrfs_item_key_to_cpu(eb, &key, slot);
11002 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11003 total, used, BTRFS_ROOT_TREE_OBJECTID,
11004 BTRFS_DEV_EXTENT_KEY, dev_id);
11005 return ACCOUNTING_MISMATCH;
11011 * Check a block group item with its referener (chunk) and its used space
11012 * with extent/metadata item
11014 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11015 struct extent_buffer *eb, int slot)
11017 struct btrfs_root *extent_root = fs_info->extent_root;
11018 struct btrfs_root *chunk_root = fs_info->chunk_root;
11019 struct btrfs_block_group_item *bi;
11020 struct btrfs_block_group_item bg_item;
11021 struct btrfs_path path;
11022 struct btrfs_key bg_key;
11023 struct btrfs_key chunk_key;
11024 struct btrfs_key extent_key;
11025 struct btrfs_chunk *chunk;
11026 struct extent_buffer *leaf;
11027 struct btrfs_extent_item *ei;
11028 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11036 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11037 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11038 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11039 used = btrfs_block_group_used(&bg_item);
11040 bg_flags = btrfs_block_group_flags(&bg_item);
11042 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11043 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11044 chunk_key.offset = bg_key.objectid;
11046 btrfs_init_path(&path);
11047 /* Search for the referencer chunk */
11048 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11051 "block group[%llu %llu] did not find the related chunk item",
11052 bg_key.objectid, bg_key.offset);
11053 err |= REFERENCER_MISSING;
11055 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11056 struct btrfs_chunk);
11057 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11060 "block group[%llu %llu] related chunk item length does not match",
11061 bg_key.objectid, bg_key.offset);
11062 err |= REFERENCER_MISMATCH;
11065 btrfs_release_path(&path);
11067 /* Search from the block group bytenr */
11068 extent_key.objectid = bg_key.objectid;
11069 extent_key.type = 0;
11070 extent_key.offset = 0;
11072 btrfs_init_path(&path);
11073 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11077 /* Iterate extent tree to account used space */
11079 leaf = path.nodes[0];
11081 /* Search slot can point to the last item beyond leaf nritems */
11082 if (path.slots[0] >= btrfs_header_nritems(leaf))
11085 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11086 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11089 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11090 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11092 if (extent_key.objectid < bg_key.objectid)
11095 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11098 total += extent_key.offset;
11100 ei = btrfs_item_ptr(leaf, path.slots[0],
11101 struct btrfs_extent_item);
11102 flags = btrfs_extent_flags(leaf, ei);
11103 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11104 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11106 "bad extent[%llu, %llu) type mismatch with chunk",
11107 extent_key.objectid,
11108 extent_key.objectid + extent_key.offset);
11109 err |= CHUNK_TYPE_MISMATCH;
11111 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11112 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11113 BTRFS_BLOCK_GROUP_METADATA))) {
11115 "bad extent[%llu, %llu) type mismatch with chunk",
11116 extent_key.objectid,
11117 extent_key.objectid + nodesize);
11118 err |= CHUNK_TYPE_MISMATCH;
11122 ret = btrfs_next_item(extent_root, &path);
11128 btrfs_release_path(&path);
11130 if (total != used) {
11132 "block group[%llu %llu] used %llu but extent items used %llu",
11133 bg_key.objectid, bg_key.offset, used, total);
11134 err |= ACCOUNTING_MISMATCH;
11140 * Check a chunk item.
11141 * Including checking all referred dev_extents and block group
11143 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11144 struct extent_buffer *eb, int slot)
11146 struct btrfs_root *extent_root = fs_info->extent_root;
11147 struct btrfs_root *dev_root = fs_info->dev_root;
11148 struct btrfs_path path;
11149 struct btrfs_key chunk_key;
11150 struct btrfs_key bg_key;
11151 struct btrfs_key devext_key;
11152 struct btrfs_chunk *chunk;
11153 struct extent_buffer *leaf;
11154 struct btrfs_block_group_item *bi;
11155 struct btrfs_block_group_item bg_item;
11156 struct btrfs_dev_extent *ptr;
11157 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11169 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11170 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11171 length = btrfs_chunk_length(eb, chunk);
11172 chunk_end = chunk_key.offset + length;
11173 if (!IS_ALIGNED(length, sectorsize)) {
11174 error("chunk[%llu %llu) not aligned to %u",
11175 chunk_key.offset, chunk_end, sectorsize);
11176 err |= BYTES_UNALIGNED;
11180 type = btrfs_chunk_type(eb, chunk);
11181 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11182 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11183 error("chunk[%llu %llu) has no chunk type",
11184 chunk_key.offset, chunk_end);
11185 err |= UNKNOWN_TYPE;
11187 if (profile && (profile & (profile - 1))) {
11188 error("chunk[%llu %llu) multiple profiles detected: %llx",
11189 chunk_key.offset, chunk_end, profile);
11190 err |= UNKNOWN_TYPE;
11193 bg_key.objectid = chunk_key.offset;
11194 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11195 bg_key.offset = length;
11197 btrfs_init_path(&path);
11198 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11201 "chunk[%llu %llu) did not find the related block group item",
11202 chunk_key.offset, chunk_end);
11203 err |= REFERENCER_MISSING;
11205 leaf = path.nodes[0];
11206 bi = btrfs_item_ptr(leaf, path.slots[0],
11207 struct btrfs_block_group_item);
11208 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11210 if (btrfs_block_group_flags(&bg_item) != type) {
11212 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11213 chunk_key.offset, chunk_end, type,
11214 btrfs_block_group_flags(&bg_item));
11215 err |= REFERENCER_MISSING;
11219 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11220 for (i = 0; i < num_stripes; i++) {
11221 btrfs_release_path(&path);
11222 btrfs_init_path(&path);
11223 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11224 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11225 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11227 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11230 goto not_match_dev;
11232 leaf = path.nodes[0];
11233 ptr = btrfs_item_ptr(leaf, path.slots[0],
11234 struct btrfs_dev_extent);
11235 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11236 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11237 if (objectid != chunk_key.objectid ||
11238 offset != chunk_key.offset ||
11239 btrfs_dev_extent_length(leaf, ptr) != length)
11240 goto not_match_dev;
11243 err |= BACKREF_MISSING;
11245 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11246 chunk_key.objectid, chunk_end, i);
11249 btrfs_release_path(&path);
11255 * Main entry function to check known items and update related accounting info
11257 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11259 struct btrfs_fs_info *fs_info = root->fs_info;
11260 struct btrfs_key key;
11263 struct btrfs_extent_data_ref *dref;
11268 btrfs_item_key_to_cpu(eb, &key, slot);
11272 case BTRFS_EXTENT_DATA_KEY:
11273 ret = check_extent_data_item(root, eb, slot);
11276 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11277 ret = check_block_group_item(fs_info, eb, slot);
11280 case BTRFS_DEV_ITEM_KEY:
11281 ret = check_dev_item(fs_info, eb, slot);
11284 case BTRFS_CHUNK_ITEM_KEY:
11285 ret = check_chunk_item(fs_info, eb, slot);
11288 case BTRFS_DEV_EXTENT_KEY:
11289 ret = check_dev_extent_item(fs_info, eb, slot);
11292 case BTRFS_EXTENT_ITEM_KEY:
11293 case BTRFS_METADATA_ITEM_KEY:
11294 ret = check_extent_item(fs_info, eb, slot);
11297 case BTRFS_EXTENT_CSUM_KEY:
11298 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11300 case BTRFS_TREE_BLOCK_REF_KEY:
11301 ret = check_tree_block_backref(fs_info, key.offset,
11305 case BTRFS_EXTENT_DATA_REF_KEY:
11306 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11307 ret = check_extent_data_backref(fs_info,
11308 btrfs_extent_data_ref_root(eb, dref),
11309 btrfs_extent_data_ref_objectid(eb, dref),
11310 btrfs_extent_data_ref_offset(eb, dref),
11312 btrfs_extent_data_ref_count(eb, dref));
11315 case BTRFS_SHARED_BLOCK_REF_KEY:
11316 ret = check_shared_block_backref(fs_info, key.offset,
11320 case BTRFS_SHARED_DATA_REF_KEY:
11321 ret = check_shared_data_backref(fs_info, key.offset,
11329 if (++slot < btrfs_header_nritems(eb))
11336 * Helper function for later fs/subvol tree check. To determine if a tree
11337 * block should be checked.
11338 * This function will ensure only the direct referencer with lowest rootid to
11339 * check a fs/subvolume tree block.
11341 * Backref check at extent tree would detect errors like missing subvolume
11342 * tree, so we can do aggressive check to reduce duplicated checks.
11344 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11346 struct btrfs_root *extent_root = root->fs_info->extent_root;
11347 struct btrfs_key key;
11348 struct btrfs_path path;
11349 struct extent_buffer *leaf;
11351 struct btrfs_extent_item *ei;
11357 struct btrfs_extent_inline_ref *iref;
11360 btrfs_init_path(&path);
11361 key.objectid = btrfs_header_bytenr(eb);
11362 key.type = BTRFS_METADATA_ITEM_KEY;
11363 key.offset = (u64)-1;
11366 * Any failure in backref resolving means we can't determine
11367 * whom the tree block belongs to.
11368 * So in that case, we need to check that tree block
11370 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11374 ret = btrfs_previous_extent_item(extent_root, &path,
11375 btrfs_header_bytenr(eb));
11379 leaf = path.nodes[0];
11380 slot = path.slots[0];
11381 btrfs_item_key_to_cpu(leaf, &key, slot);
11382 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11384 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11385 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11387 struct btrfs_tree_block_info *info;
11389 info = (struct btrfs_tree_block_info *)(ei + 1);
11390 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11393 item_size = btrfs_item_size_nr(leaf, slot);
11394 ptr = (unsigned long)iref;
11395 end = (unsigned long)ei + item_size;
11396 while (ptr < end) {
11397 iref = (struct btrfs_extent_inline_ref *)ptr;
11398 type = btrfs_extent_inline_ref_type(leaf, iref);
11399 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11402 * We only check the tree block if current root is
11403 * the lowest referencer of it.
11405 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11406 offset < root->objectid) {
11407 btrfs_release_path(&path);
11411 ptr += btrfs_extent_inline_ref_size(type);
11414 * Normally we should also check keyed tree block ref, but that may be
11415 * very time consuming. Inlined ref should already make us skip a lot
11416 * of refs now. So skip search keyed tree block ref.
11420 btrfs_release_path(&path);
11425 * Traversal function for tree block. We will do:
11426 * 1) Skip shared fs/subvolume tree blocks
11427 * 2) Update related bytes accounting
11428 * 3) Pre-order traversal
11430 static int traverse_tree_block(struct btrfs_root *root,
11431 struct extent_buffer *node)
11433 struct extent_buffer *eb;
11434 struct btrfs_key key;
11435 struct btrfs_key drop_key;
11443 * Skip shared fs/subvolume tree block, in that case they will
11444 * be checked by referencer with lowest rootid
11446 if (is_fstree(root->objectid) && !should_check(root, node))
11449 /* Update bytes accounting */
11450 total_btree_bytes += node->len;
11451 if (fs_root_objectid(btrfs_header_owner(node)))
11452 total_fs_tree_bytes += node->len;
11453 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11454 total_extent_tree_bytes += node->len;
11455 if (!found_old_backref &&
11456 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11457 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11458 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11459 found_old_backref = 1;
11461 /* pre-order tranversal, check itself first */
11462 level = btrfs_header_level(node);
11463 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11464 btrfs_header_level(node),
11465 btrfs_header_owner(node));
11469 "check %s failed root %llu bytenr %llu level %d, force continue check",
11470 level ? "node":"leaf", root->objectid,
11471 btrfs_header_bytenr(node), btrfs_header_level(node));
11474 btree_space_waste += btrfs_leaf_free_space(root, node);
11475 ret = check_leaf_items(root, node);
11480 nr = btrfs_header_nritems(node);
11481 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11482 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11483 sizeof(struct btrfs_key_ptr);
11485 /* Then check all its children */
11486 for (i = 0; i < nr; i++) {
11487 u64 blocknr = btrfs_node_blockptr(node, i);
11489 btrfs_node_key_to_cpu(node, &key, i);
11490 if (level == root->root_item.drop_level &&
11491 is_dropped_key(&key, &drop_key))
11495 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11496 * to call the function itself.
11498 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11499 if (extent_buffer_uptodate(eb)) {
11500 ret = traverse_tree_block(root, eb);
11503 free_extent_buffer(eb);
11510 * Low memory usage version check_chunks_and_extents.
11512 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11514 struct btrfs_path path;
11515 struct btrfs_key key;
11516 struct btrfs_root *root1;
11517 struct btrfs_root *cur_root;
11521 root1 = root->fs_info->chunk_root;
11522 ret = traverse_tree_block(root1, root1->node);
11525 root1 = root->fs_info->tree_root;
11526 ret = traverse_tree_block(root1, root1->node);
11529 btrfs_init_path(&path);
11530 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11532 key.type = BTRFS_ROOT_ITEM_KEY;
11534 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11536 error("cannot find extent treet in tree_root");
11541 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11542 if (key.type != BTRFS_ROOT_ITEM_KEY)
11544 key.offset = (u64)-1;
11546 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11547 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11550 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11551 if (IS_ERR(cur_root) || !cur_root) {
11552 error("failed to read tree: %lld", key.objectid);
11556 ret = traverse_tree_block(cur_root, cur_root->node);
11559 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11560 btrfs_free_fs_root(cur_root);
11562 ret = btrfs_next_item(root1, &path);
11568 btrfs_release_path(&path);
11572 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11573 struct btrfs_root *root, int overwrite)
11575 struct extent_buffer *c;
11576 struct extent_buffer *old = root->node;
11579 struct btrfs_disk_key disk_key = {0,0,0};
11585 extent_buffer_get(c);
11588 c = btrfs_alloc_free_block(trans, root,
11590 root->root_key.objectid,
11591 &disk_key, level, 0, 0);
11594 extent_buffer_get(c);
11598 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11599 btrfs_set_header_level(c, level);
11600 btrfs_set_header_bytenr(c, c->start);
11601 btrfs_set_header_generation(c, trans->transid);
11602 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11603 btrfs_set_header_owner(c, root->root_key.objectid);
11605 write_extent_buffer(c, root->fs_info->fsid,
11606 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11608 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11609 btrfs_header_chunk_tree_uuid(c),
11612 btrfs_mark_buffer_dirty(c);
11614 * this case can happen in the following case:
11616 * 1.overwrite previous root.
11618 * 2.reinit reloc data root, this is because we skip pin
11619 * down reloc data tree before which means we can allocate
11620 * same block bytenr here.
11622 if (old->start == c->start) {
11623 btrfs_set_root_generation(&root->root_item,
11625 root->root_item.level = btrfs_header_level(root->node);
11626 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11627 &root->root_key, &root->root_item);
11629 free_extent_buffer(c);
11633 free_extent_buffer(old);
11635 add_root_to_dirty_list(root);
11639 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11640 struct extent_buffer *eb, int tree_root)
11642 struct extent_buffer *tmp;
11643 struct btrfs_root_item *ri;
11644 struct btrfs_key key;
11647 int level = btrfs_header_level(eb);
11653 * If we have pinned this block before, don't pin it again.
11654 * This can not only avoid forever loop with broken filesystem
11655 * but also give us some speedups.
11657 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11658 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11661 btrfs_pin_extent(fs_info, eb->start, eb->len);
11663 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11664 nritems = btrfs_header_nritems(eb);
11665 for (i = 0; i < nritems; i++) {
11667 btrfs_item_key_to_cpu(eb, &key, i);
11668 if (key.type != BTRFS_ROOT_ITEM_KEY)
11670 /* Skip the extent root and reloc roots */
11671 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11672 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11673 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11675 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11676 bytenr = btrfs_disk_root_bytenr(eb, ri);
11679 * If at any point we start needing the real root we
11680 * will have to build a stump root for the root we are
11681 * in, but for now this doesn't actually use the root so
11682 * just pass in extent_root.
11684 tmp = read_tree_block(fs_info->extent_root, bytenr,
11686 if (!extent_buffer_uptodate(tmp)) {
11687 fprintf(stderr, "Error reading root block\n");
11690 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11691 free_extent_buffer(tmp);
11695 bytenr = btrfs_node_blockptr(eb, i);
11697 /* If we aren't the tree root don't read the block */
11698 if (level == 1 && !tree_root) {
11699 btrfs_pin_extent(fs_info, bytenr, nodesize);
11703 tmp = read_tree_block(fs_info->extent_root, bytenr,
11705 if (!extent_buffer_uptodate(tmp)) {
11706 fprintf(stderr, "Error reading tree block\n");
11709 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11710 free_extent_buffer(tmp);
11719 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11723 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11727 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11730 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11732 struct btrfs_block_group_cache *cache;
11733 struct btrfs_path path;
11734 struct extent_buffer *leaf;
11735 struct btrfs_chunk *chunk;
11736 struct btrfs_key key;
11740 btrfs_init_path(&path);
11742 key.type = BTRFS_CHUNK_ITEM_KEY;
11744 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11746 btrfs_release_path(&path);
11751 * We do this in case the block groups were screwed up and had alloc
11752 * bits that aren't actually set on the chunks. This happens with
11753 * restored images every time and could happen in real life I guess.
11755 fs_info->avail_data_alloc_bits = 0;
11756 fs_info->avail_metadata_alloc_bits = 0;
11757 fs_info->avail_system_alloc_bits = 0;
11759 /* First we need to create the in-memory block groups */
11761 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11762 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11764 btrfs_release_path(&path);
11772 leaf = path.nodes[0];
11773 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11774 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11779 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11780 btrfs_add_block_group(fs_info, 0,
11781 btrfs_chunk_type(leaf, chunk),
11782 key.objectid, key.offset,
11783 btrfs_chunk_length(leaf, chunk));
11784 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11785 key.offset + btrfs_chunk_length(leaf, chunk));
11790 cache = btrfs_lookup_first_block_group(fs_info, start);
11794 start = cache->key.objectid + cache->key.offset;
11797 btrfs_release_path(&path);
11801 static int reset_balance(struct btrfs_trans_handle *trans,
11802 struct btrfs_fs_info *fs_info)
11804 struct btrfs_root *root = fs_info->tree_root;
11805 struct btrfs_path path;
11806 struct extent_buffer *leaf;
11807 struct btrfs_key key;
11808 int del_slot, del_nr = 0;
11812 btrfs_init_path(&path);
11813 key.objectid = BTRFS_BALANCE_OBJECTID;
11814 key.type = BTRFS_BALANCE_ITEM_KEY;
11816 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11821 goto reinit_data_reloc;
11826 ret = btrfs_del_item(trans, root, &path);
11829 btrfs_release_path(&path);
11831 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11832 key.type = BTRFS_ROOT_ITEM_KEY;
11834 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11838 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11843 ret = btrfs_del_items(trans, root, &path,
11850 btrfs_release_path(&path);
11853 ret = btrfs_search_slot(trans, root, &key, &path,
11860 leaf = path.nodes[0];
11861 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11862 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11864 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11869 del_slot = path.slots[0];
11878 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11882 btrfs_release_path(&path);
11885 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11886 key.type = BTRFS_ROOT_ITEM_KEY;
11887 key.offset = (u64)-1;
11888 root = btrfs_read_fs_root(fs_info, &key);
11889 if (IS_ERR(root)) {
11890 fprintf(stderr, "Error reading data reloc tree\n");
11891 ret = PTR_ERR(root);
11894 record_root_in_trans(trans, root);
11895 ret = btrfs_fsck_reinit_root(trans, root, 0);
11898 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11900 btrfs_release_path(&path);
11904 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11905 struct btrfs_fs_info *fs_info)
11911 * The only reason we don't do this is because right now we're just
11912 * walking the trees we find and pinning down their bytes, we don't look
11913 * at any of the leaves. In order to do mixed groups we'd have to check
11914 * the leaves of any fs roots and pin down the bytes for any file
11915 * extents we find. Not hard but why do it if we don't have to?
11917 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11918 fprintf(stderr, "We don't support re-initing the extent tree "
11919 "for mixed block groups yet, please notify a btrfs "
11920 "developer you want to do this so they can add this "
11921 "functionality.\n");
11926 * first we need to walk all of the trees except the extent tree and pin
11927 * down the bytes that are in use so we don't overwrite any existing
11930 ret = pin_metadata_blocks(fs_info);
11932 fprintf(stderr, "error pinning down used bytes\n");
11937 * Need to drop all the block groups since we're going to recreate all
11940 btrfs_free_block_groups(fs_info);
11941 ret = reset_block_groups(fs_info);
11943 fprintf(stderr, "error resetting the block groups\n");
11947 /* Ok we can allocate now, reinit the extent root */
11948 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11950 fprintf(stderr, "extent root initialization failed\n");
11952 * When the transaction code is updated we should end the
11953 * transaction, but for now progs only knows about commit so
11954 * just return an error.
11960 * Now we have all the in-memory block groups setup so we can make
11961 * allocations properly, and the metadata we care about is safe since we
11962 * pinned all of it above.
11965 struct btrfs_block_group_cache *cache;
11967 cache = btrfs_lookup_first_block_group(fs_info, start);
11970 start = cache->key.objectid + cache->key.offset;
11971 ret = btrfs_insert_item(trans, fs_info->extent_root,
11972 &cache->key, &cache->item,
11973 sizeof(cache->item));
11975 fprintf(stderr, "Error adding block group\n");
11978 btrfs_extent_post_op(trans, fs_info->extent_root);
11981 ret = reset_balance(trans, fs_info);
11983 fprintf(stderr, "error resetting the pending balance\n");
11988 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11990 struct btrfs_path path;
11991 struct btrfs_trans_handle *trans;
11992 struct btrfs_key key;
11995 printf("Recowing metadata block %llu\n", eb->start);
11996 key.objectid = btrfs_header_owner(eb);
11997 key.type = BTRFS_ROOT_ITEM_KEY;
11998 key.offset = (u64)-1;
12000 root = btrfs_read_fs_root(root->fs_info, &key);
12001 if (IS_ERR(root)) {
12002 fprintf(stderr, "Couldn't find owner root %llu\n",
12004 return PTR_ERR(root);
12007 trans = btrfs_start_transaction(root, 1);
12009 return PTR_ERR(trans);
12011 btrfs_init_path(&path);
12012 path.lowest_level = btrfs_header_level(eb);
12013 if (path.lowest_level)
12014 btrfs_node_key_to_cpu(eb, &key, 0);
12016 btrfs_item_key_to_cpu(eb, &key, 0);
12018 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12019 btrfs_commit_transaction(trans, root);
12020 btrfs_release_path(&path);
12024 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12026 struct btrfs_path path;
12027 struct btrfs_trans_handle *trans;
12028 struct btrfs_key key;
12031 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12032 bad->key.type, bad->key.offset);
12033 key.objectid = bad->root_id;
12034 key.type = BTRFS_ROOT_ITEM_KEY;
12035 key.offset = (u64)-1;
12037 root = btrfs_read_fs_root(root->fs_info, &key);
12038 if (IS_ERR(root)) {
12039 fprintf(stderr, "Couldn't find owner root %llu\n",
12041 return PTR_ERR(root);
12044 trans = btrfs_start_transaction(root, 1);
12046 return PTR_ERR(trans);
12048 btrfs_init_path(&path);
12049 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12055 ret = btrfs_del_item(trans, root, &path);
12057 btrfs_commit_transaction(trans, root);
12058 btrfs_release_path(&path);
12062 static int zero_log_tree(struct btrfs_root *root)
12064 struct btrfs_trans_handle *trans;
12067 trans = btrfs_start_transaction(root, 1);
12068 if (IS_ERR(trans)) {
12069 ret = PTR_ERR(trans);
12072 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12073 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12074 ret = btrfs_commit_transaction(trans, root);
12078 static int populate_csum(struct btrfs_trans_handle *trans,
12079 struct btrfs_root *csum_root, char *buf, u64 start,
12086 while (offset < len) {
12087 sectorsize = csum_root->sectorsize;
12088 ret = read_extent_data(csum_root, buf, start + offset,
12092 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12093 start + offset, buf, sectorsize);
12096 offset += sectorsize;
12101 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12102 struct btrfs_root *csum_root,
12103 struct btrfs_root *cur_root)
12105 struct btrfs_path path;
12106 struct btrfs_key key;
12107 struct extent_buffer *node;
12108 struct btrfs_file_extent_item *fi;
12115 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12119 btrfs_init_path(&path);
12123 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12126 /* Iterate all regular file extents and fill its csum */
12128 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12130 if (key.type != BTRFS_EXTENT_DATA_KEY)
12132 node = path.nodes[0];
12133 slot = path.slots[0];
12134 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12135 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12137 start = btrfs_file_extent_disk_bytenr(node, fi);
12138 len = btrfs_file_extent_disk_num_bytes(node, fi);
12140 ret = populate_csum(trans, csum_root, buf, start, len);
12141 if (ret == -EEXIST)
12147 * TODO: if next leaf is corrupted, jump to nearest next valid
12150 ret = btrfs_next_item(cur_root, &path);
12160 btrfs_release_path(&path);
12165 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12166 struct btrfs_root *csum_root)
12168 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12169 struct btrfs_path path;
12170 struct btrfs_root *tree_root = fs_info->tree_root;
12171 struct btrfs_root *cur_root;
12172 struct extent_buffer *node;
12173 struct btrfs_key key;
12177 btrfs_init_path(&path);
12178 key.objectid = BTRFS_FS_TREE_OBJECTID;
12180 key.type = BTRFS_ROOT_ITEM_KEY;
12181 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12190 node = path.nodes[0];
12191 slot = path.slots[0];
12192 btrfs_item_key_to_cpu(node, &key, slot);
12193 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12195 if (key.type != BTRFS_ROOT_ITEM_KEY)
12197 if (!is_fstree(key.objectid))
12199 key.offset = (u64)-1;
12201 cur_root = btrfs_read_fs_root(fs_info, &key);
12202 if (IS_ERR(cur_root) || !cur_root) {
12203 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12207 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12212 ret = btrfs_next_item(tree_root, &path);
12222 btrfs_release_path(&path);
12226 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12227 struct btrfs_root *csum_root)
12229 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12230 struct btrfs_path path;
12231 struct btrfs_extent_item *ei;
12232 struct extent_buffer *leaf;
12234 struct btrfs_key key;
12237 btrfs_init_path(&path);
12239 key.type = BTRFS_EXTENT_ITEM_KEY;
12241 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12243 btrfs_release_path(&path);
12247 buf = malloc(csum_root->sectorsize);
12249 btrfs_release_path(&path);
12254 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12255 ret = btrfs_next_leaf(extent_root, &path);
12263 leaf = path.nodes[0];
12265 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12266 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12271 ei = btrfs_item_ptr(leaf, path.slots[0],
12272 struct btrfs_extent_item);
12273 if (!(btrfs_extent_flags(leaf, ei) &
12274 BTRFS_EXTENT_FLAG_DATA)) {
12279 ret = populate_csum(trans, csum_root, buf, key.objectid,
12286 btrfs_release_path(&path);
12292 * Recalculate the csum and put it into the csum tree.
12294 * Extent tree init will wipe out all the extent info, so in that case, we
12295 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12296 * will use fs/subvol trees to init the csum tree.
12298 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12299 struct btrfs_root *csum_root,
12300 int search_fs_tree)
12302 if (search_fs_tree)
12303 return fill_csum_tree_from_fs(trans, csum_root);
12305 return fill_csum_tree_from_extent(trans, csum_root);
12308 static void free_roots_info_cache(void)
12310 if (!roots_info_cache)
12313 while (!cache_tree_empty(roots_info_cache)) {
12314 struct cache_extent *entry;
12315 struct root_item_info *rii;
12317 entry = first_cache_extent(roots_info_cache);
12320 remove_cache_extent(roots_info_cache, entry);
12321 rii = container_of(entry, struct root_item_info, cache_extent);
12325 free(roots_info_cache);
12326 roots_info_cache = NULL;
12329 static int build_roots_info_cache(struct btrfs_fs_info *info)
12332 struct btrfs_key key;
12333 struct extent_buffer *leaf;
12334 struct btrfs_path path;
12336 if (!roots_info_cache) {
12337 roots_info_cache = malloc(sizeof(*roots_info_cache));
12338 if (!roots_info_cache)
12340 cache_tree_init(roots_info_cache);
12343 btrfs_init_path(&path);
12345 key.type = BTRFS_EXTENT_ITEM_KEY;
12347 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12350 leaf = path.nodes[0];
12353 struct btrfs_key found_key;
12354 struct btrfs_extent_item *ei;
12355 struct btrfs_extent_inline_ref *iref;
12356 int slot = path.slots[0];
12361 struct cache_extent *entry;
12362 struct root_item_info *rii;
12364 if (slot >= btrfs_header_nritems(leaf)) {
12365 ret = btrfs_next_leaf(info->extent_root, &path);
12372 leaf = path.nodes[0];
12373 slot = path.slots[0];
12376 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12378 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12379 found_key.type != BTRFS_METADATA_ITEM_KEY)
12382 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12383 flags = btrfs_extent_flags(leaf, ei);
12385 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12386 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12389 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12390 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12391 level = found_key.offset;
12393 struct btrfs_tree_block_info *binfo;
12395 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12396 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12397 level = btrfs_tree_block_level(leaf, binfo);
12401 * For a root extent, it must be of the following type and the
12402 * first (and only one) iref in the item.
12404 type = btrfs_extent_inline_ref_type(leaf, iref);
12405 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12408 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12409 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12411 rii = malloc(sizeof(struct root_item_info));
12416 rii->cache_extent.start = root_id;
12417 rii->cache_extent.size = 1;
12418 rii->level = (u8)-1;
12419 entry = &rii->cache_extent;
12420 ret = insert_cache_extent(roots_info_cache, entry);
12423 rii = container_of(entry, struct root_item_info,
12427 ASSERT(rii->cache_extent.start == root_id);
12428 ASSERT(rii->cache_extent.size == 1);
12430 if (level > rii->level || rii->level == (u8)-1) {
12431 rii->level = level;
12432 rii->bytenr = found_key.objectid;
12433 rii->gen = btrfs_extent_generation(leaf, ei);
12434 rii->node_count = 1;
12435 } else if (level == rii->level) {
12443 btrfs_release_path(&path);
12448 static int maybe_repair_root_item(struct btrfs_path *path,
12449 const struct btrfs_key *root_key,
12450 const int read_only_mode)
12452 const u64 root_id = root_key->objectid;
12453 struct cache_extent *entry;
12454 struct root_item_info *rii;
12455 struct btrfs_root_item ri;
12456 unsigned long offset;
12458 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12461 "Error: could not find extent items for root %llu\n",
12462 root_key->objectid);
12466 rii = container_of(entry, struct root_item_info, cache_extent);
12467 ASSERT(rii->cache_extent.start == root_id);
12468 ASSERT(rii->cache_extent.size == 1);
12470 if (rii->node_count != 1) {
12472 "Error: could not find btree root extent for root %llu\n",
12477 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12478 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12480 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12481 btrfs_root_level(&ri) != rii->level ||
12482 btrfs_root_generation(&ri) != rii->gen) {
12485 * If we're in repair mode but our caller told us to not update
12486 * the root item, i.e. just check if it needs to be updated, don't
12487 * print this message, since the caller will call us again shortly
12488 * for the same root item without read only mode (the caller will
12489 * open a transaction first).
12491 if (!(read_only_mode && repair))
12493 "%sroot item for root %llu,"
12494 " current bytenr %llu, current gen %llu, current level %u,"
12495 " new bytenr %llu, new gen %llu, new level %u\n",
12496 (read_only_mode ? "" : "fixing "),
12498 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12499 btrfs_root_level(&ri),
12500 rii->bytenr, rii->gen, rii->level);
12502 if (btrfs_root_generation(&ri) > rii->gen) {
12504 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12505 root_id, btrfs_root_generation(&ri), rii->gen);
12509 if (!read_only_mode) {
12510 btrfs_set_root_bytenr(&ri, rii->bytenr);
12511 btrfs_set_root_level(&ri, rii->level);
12512 btrfs_set_root_generation(&ri, rii->gen);
12513 write_extent_buffer(path->nodes[0], &ri,
12514 offset, sizeof(ri));
12524 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12525 * caused read-only snapshots to be corrupted if they were created at a moment
12526 * when the source subvolume/snapshot had orphan items. The issue was that the
12527 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12528 * node instead of the post orphan cleanup root node.
12529 * So this function, and its callees, just detects and fixes those cases. Even
12530 * though the regression was for read-only snapshots, this function applies to
12531 * any snapshot/subvolume root.
12532 * This must be run before any other repair code - not doing it so, makes other
12533 * repair code delete or modify backrefs in the extent tree for example, which
12534 * will result in an inconsistent fs after repairing the root items.
12536 static int repair_root_items(struct btrfs_fs_info *info)
12538 struct btrfs_path path;
12539 struct btrfs_key key;
12540 struct extent_buffer *leaf;
12541 struct btrfs_trans_handle *trans = NULL;
12544 int need_trans = 0;
12546 btrfs_init_path(&path);
12548 ret = build_roots_info_cache(info);
12552 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12553 key.type = BTRFS_ROOT_ITEM_KEY;
12558 * Avoid opening and committing transactions if a leaf doesn't have
12559 * any root items that need to be fixed, so that we avoid rotating
12560 * backup roots unnecessarily.
12563 trans = btrfs_start_transaction(info->tree_root, 1);
12564 if (IS_ERR(trans)) {
12565 ret = PTR_ERR(trans);
12570 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12574 leaf = path.nodes[0];
12577 struct btrfs_key found_key;
12579 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12580 int no_more_keys = find_next_key(&path, &key);
12582 btrfs_release_path(&path);
12584 ret = btrfs_commit_transaction(trans,
12596 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12598 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12600 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12603 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12607 if (!trans && repair) {
12610 btrfs_release_path(&path);
12620 free_roots_info_cache();
12621 btrfs_release_path(&path);
12623 btrfs_commit_transaction(trans, info->tree_root);
12630 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12632 struct btrfs_trans_handle *trans;
12633 struct btrfs_block_group_cache *bg_cache;
12637 /* Clear all free space cache inodes and its extent data */
12639 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12642 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12645 current = bg_cache->key.objectid + bg_cache->key.offset;
12648 /* Don't forget to set cache_generation to -1 */
12649 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12650 if (IS_ERR(trans)) {
12651 error("failed to update super block cache generation");
12652 return PTR_ERR(trans);
12654 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12655 btrfs_commit_transaction(trans, fs_info->tree_root);
12660 const char * const cmd_check_usage[] = {
12661 "btrfs check [options] <device>",
12662 "Check structural integrity of a filesystem (unmounted).",
12663 "Check structural integrity of an unmounted filesystem. Verify internal",
12664 "trees' consistency and item connectivity. In the repair mode try to",
12665 "fix the problems found. ",
12666 "WARNING: the repair mode is considered dangerous",
12668 "-s|--super <superblock> use this superblock copy",
12669 "-b|--backup use the first valid backup root copy",
12670 "--repair try to repair the filesystem",
12671 "--readonly run in read-only mode (default)",
12672 "--init-csum-tree create a new CRC tree",
12673 "--init-extent-tree create a new extent tree",
12674 "--mode <MODE> allows choice of memory/IO trade-offs",
12675 " where MODE is one of:",
12676 " original - read inodes and extents to memory (requires",
12677 " more memory, does less IO)",
12678 " lowmem - try to use less memory but read blocks again",
12680 "--check-data-csum verify checksums of data blocks",
12681 "-Q|--qgroup-report print a report on qgroup consistency",
12682 "-E|--subvol-extents <subvolid>",
12683 " print subvolume extents and sharing state",
12684 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12685 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12686 "-p|--progress indicate progress",
12687 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12691 int cmd_check(int argc, char **argv)
12693 struct cache_tree root_cache;
12694 struct btrfs_root *root;
12695 struct btrfs_fs_info *info;
12698 u64 tree_root_bytenr = 0;
12699 u64 chunk_root_bytenr = 0;
12700 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12704 int init_csum_tree = 0;
12706 int clear_space_cache = 0;
12707 int qgroup_report = 0;
12708 int qgroups_repaired = 0;
12709 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12713 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12714 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12715 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12716 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12717 static const struct option long_options[] = {
12718 { "super", required_argument, NULL, 's' },
12719 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12720 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12721 { "init-csum-tree", no_argument, NULL,
12722 GETOPT_VAL_INIT_CSUM },
12723 { "init-extent-tree", no_argument, NULL,
12724 GETOPT_VAL_INIT_EXTENT },
12725 { "check-data-csum", no_argument, NULL,
12726 GETOPT_VAL_CHECK_CSUM },
12727 { "backup", no_argument, NULL, 'b' },
12728 { "subvol-extents", required_argument, NULL, 'E' },
12729 { "qgroup-report", no_argument, NULL, 'Q' },
12730 { "tree-root", required_argument, NULL, 'r' },
12731 { "chunk-root", required_argument, NULL,
12732 GETOPT_VAL_CHUNK_TREE },
12733 { "progress", no_argument, NULL, 'p' },
12734 { "mode", required_argument, NULL,
12736 { "clear-space-cache", required_argument, NULL,
12737 GETOPT_VAL_CLEAR_SPACE_CACHE},
12738 { NULL, 0, NULL, 0}
12741 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12745 case 'a': /* ignored */ break;
12747 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12750 num = arg_strtou64(optarg);
12751 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12753 "super mirror should be less than %d",
12754 BTRFS_SUPER_MIRROR_MAX);
12757 bytenr = btrfs_sb_offset(((int)num));
12758 printf("using SB copy %llu, bytenr %llu\n", num,
12759 (unsigned long long)bytenr);
12765 subvolid = arg_strtou64(optarg);
12768 tree_root_bytenr = arg_strtou64(optarg);
12770 case GETOPT_VAL_CHUNK_TREE:
12771 chunk_root_bytenr = arg_strtou64(optarg);
12774 ctx.progress_enabled = true;
12778 usage(cmd_check_usage);
12779 case GETOPT_VAL_REPAIR:
12780 printf("enabling repair mode\n");
12782 ctree_flags |= OPEN_CTREE_WRITES;
12784 case GETOPT_VAL_READONLY:
12787 case GETOPT_VAL_INIT_CSUM:
12788 printf("Creating a new CRC tree\n");
12789 init_csum_tree = 1;
12791 ctree_flags |= OPEN_CTREE_WRITES;
12793 case GETOPT_VAL_INIT_EXTENT:
12794 init_extent_tree = 1;
12795 ctree_flags |= (OPEN_CTREE_WRITES |
12796 OPEN_CTREE_NO_BLOCK_GROUPS);
12799 case GETOPT_VAL_CHECK_CSUM:
12800 check_data_csum = 1;
12802 case GETOPT_VAL_MODE:
12803 check_mode = parse_check_mode(optarg);
12804 if (check_mode == CHECK_MODE_UNKNOWN) {
12805 error("unknown mode: %s", optarg);
12809 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12810 if (strcmp(optarg, "v1") == 0) {
12811 clear_space_cache = 1;
12812 } else if (strcmp(optarg, "v2") == 0) {
12813 clear_space_cache = 2;
12814 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12817 "invalid argument to --clear-space-cache, must be v1 or v2");
12820 ctree_flags |= OPEN_CTREE_WRITES;
12825 if (check_argc_exact(argc - optind, 1))
12826 usage(cmd_check_usage);
12828 if (ctx.progress_enabled) {
12829 ctx.tp = TASK_NOTHING;
12830 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12833 /* This check is the only reason for --readonly to exist */
12834 if (readonly && repair) {
12835 error("repair options are not compatible with --readonly");
12840 * Not supported yet
12842 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12843 error("low memory mode doesn't support repair yet");
12848 cache_tree_init(&root_cache);
12850 if((ret = check_mounted(argv[optind])) < 0) {
12851 error("could not check mount status: %s", strerror(-ret));
12855 error("%s is currently mounted, aborting", argv[optind]);
12861 /* only allow partial opening under repair mode */
12863 ctree_flags |= OPEN_CTREE_PARTIAL;
12865 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12866 chunk_root_bytenr, ctree_flags);
12868 error("cannot open file system");
12874 global_info = info;
12875 root = info->fs_root;
12876 if (clear_space_cache == 1) {
12877 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12879 "free space cache v2 detected, use --clear-space-cache v2");
12883 printf("Clearing free space cache\n");
12884 ret = clear_free_space_cache(info);
12886 error("failed to clear free space cache");
12889 printf("Free space cache cleared\n");
12892 } else if (clear_space_cache == 2) {
12893 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12894 printf("no free space cache v2 to clear\n");
12898 printf("Clear free space cache v2\n");
12899 ret = btrfs_clear_free_space_tree(info);
12901 error("failed to clear free space cache v2: %d", ret);
12904 printf("free space cache v2 cleared\n");
12910 * repair mode will force us to commit transaction which
12911 * will make us fail to load log tree when mounting.
12913 if (repair && btrfs_super_log_root(info->super_copy)) {
12914 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12920 ret = zero_log_tree(root);
12923 error("failed to zero log tree: %d", ret);
12928 uuid_unparse(info->super_copy->fsid, uuidbuf);
12929 if (qgroup_report) {
12930 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12932 ret = qgroup_verify_all(info);
12939 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12940 subvolid, argv[optind], uuidbuf);
12941 ret = print_extent_state(info, subvolid);
12945 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12947 if (!extent_buffer_uptodate(info->tree_root->node) ||
12948 !extent_buffer_uptodate(info->dev_root->node) ||
12949 !extent_buffer_uptodate(info->chunk_root->node)) {
12950 error("critical roots corrupted, unable to check the filesystem");
12956 if (init_extent_tree || init_csum_tree) {
12957 struct btrfs_trans_handle *trans;
12959 trans = btrfs_start_transaction(info->extent_root, 0);
12960 if (IS_ERR(trans)) {
12961 error("error starting transaction");
12962 ret = PTR_ERR(trans);
12967 if (init_extent_tree) {
12968 printf("Creating a new extent tree\n");
12969 ret = reinit_extent_tree(trans, info);
12975 if (init_csum_tree) {
12976 printf("Reinitialize checksum tree\n");
12977 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12979 error("checksum tree initialization failed: %d",
12986 ret = fill_csum_tree(trans, info->csum_root,
12990 error("checksum tree refilling failed: %d", ret);
12995 * Ok now we commit and run the normal fsck, which will add
12996 * extent entries for all of the items it finds.
12998 ret = btrfs_commit_transaction(trans, info->extent_root);
13003 if (!extent_buffer_uptodate(info->extent_root->node)) {
13004 error("critical: extent_root, unable to check the filesystem");
13009 if (!extent_buffer_uptodate(info->csum_root->node)) {
13010 error("critical: csum_root, unable to check the filesystem");
13016 if (!ctx.progress_enabled)
13017 fprintf(stderr, "checking extents\n");
13018 if (check_mode == CHECK_MODE_LOWMEM)
13019 ret = check_chunks_and_extents_v2(root);
13021 ret = check_chunks_and_extents(root);
13025 "errors found in extent allocation tree or chunk allocation");
13027 ret = repair_root_items(info);
13030 error("failed to repair root items: %s", strerror(-ret));
13034 fprintf(stderr, "Fixed %d roots.\n", ret);
13036 } else if (ret > 0) {
13038 "Found %d roots with an outdated root item.\n",
13041 "Please run a filesystem check with the option --repair to fix them.\n");
13047 if (!ctx.progress_enabled) {
13048 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13049 fprintf(stderr, "checking free space tree\n");
13051 fprintf(stderr, "checking free space cache\n");
13053 ret = check_space_cache(root);
13056 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13057 error("errors found in free space tree");
13059 error("errors found in free space cache");
13064 * We used to have to have these hole extents in between our real
13065 * extents so if we don't have this flag set we need to make sure there
13066 * are no gaps in the file extents for inodes, otherwise we can just
13067 * ignore it when this happens.
13069 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13070 if (!ctx.progress_enabled)
13071 fprintf(stderr, "checking fs roots\n");
13072 if (check_mode == CHECK_MODE_LOWMEM)
13073 ret = check_fs_roots_v2(root->fs_info);
13075 ret = check_fs_roots(root, &root_cache);
13078 error("errors found in fs roots");
13082 fprintf(stderr, "checking csums\n");
13083 ret = check_csums(root);
13086 error("errors found in csum tree");
13090 fprintf(stderr, "checking root refs\n");
13091 /* For low memory mode, check_fs_roots_v2 handles root refs */
13092 if (check_mode != CHECK_MODE_LOWMEM) {
13093 ret = check_root_refs(root, &root_cache);
13096 error("errors found in root refs");
13101 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13102 struct extent_buffer *eb;
13104 eb = list_first_entry(&root->fs_info->recow_ebs,
13105 struct extent_buffer, recow);
13106 list_del_init(&eb->recow);
13107 ret = recow_extent_buffer(root, eb);
13110 error("fails to fix transid errors");
13115 while (!list_empty(&delete_items)) {
13116 struct bad_item *bad;
13118 bad = list_first_entry(&delete_items, struct bad_item, list);
13119 list_del_init(&bad->list);
13121 ret = delete_bad_item(root, bad);
13127 if (info->quota_enabled) {
13128 fprintf(stderr, "checking quota groups\n");
13129 ret = qgroup_verify_all(info);
13132 error("failed to check quota groups");
13136 ret = repair_qgroups(info, &qgroups_repaired);
13139 error("failed to repair quota groups");
13145 if (!list_empty(&root->fs_info->recow_ebs)) {
13146 error("transid errors in file system");
13151 if (found_old_backref) { /*
13152 * there was a disk format change when mixed
13153 * backref was in testing tree. The old format
13154 * existed about one week.
13156 printf("\n * Found old mixed backref format. "
13157 "The old format is not supported! *"
13158 "\n * Please mount the FS in readonly mode, "
13159 "backup data and re-format the FS. *\n\n");
13162 printf("found %llu bytes used, ",
13163 (unsigned long long)bytes_used);
13165 printf("error(s) found\n");
13167 printf("no error found\n");
13168 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13169 printf("total tree bytes: %llu\n",
13170 (unsigned long long)total_btree_bytes);
13171 printf("total fs tree bytes: %llu\n",
13172 (unsigned long long)total_fs_tree_bytes);
13173 printf("total extent tree bytes: %llu\n",
13174 (unsigned long long)total_extent_tree_bytes);
13175 printf("btree space waste bytes: %llu\n",
13176 (unsigned long long)btree_space_waste);
13177 printf("file data blocks allocated: %llu\n referenced %llu\n",
13178 (unsigned long long)data_bytes_allocated,
13179 (unsigned long long)data_bytes_referenced);
13181 free_qgroup_counts();
13182 free_root_recs_tree(&root_cache);
13186 if (ctx.progress_enabled)
13187 task_deinit(ctx.info);