2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1534 fprintf(stderr, "invalid location in dir item %u\n",
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1683 if (key.offset > start)
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1698 btrfs_release_path(&path);
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1833 BUG_ON(IS_ERR(active_node->current));
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1872 * Returns >0 Found error, not fatal, should continue
1873 * Returns <0 Fatal error, must exit the whole check
1874 * Returns 0 No errors found
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877 struct node_refs *nrefs, int *level, int ext_ref)
1879 struct extent_buffer *cur = path->nodes[0];
1880 struct btrfs_key key;
1884 int root_level = btrfs_header_level(root->node);
1886 int ret = 0; /* Final return value */
1887 int err = 0; /* Positive error bitmap */
1889 cur_bytenr = cur->start;
1891 /* skip to first inode item or the first inode number change */
1892 nritems = btrfs_header_nritems(cur);
1893 for (i = 0; i < nritems; i++) {
1894 btrfs_item_key_to_cpu(cur, &key, i);
1896 first_ino = key.objectid;
1897 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898 (first_ino && first_ino != key.objectid))
1902 path->slots[0] = nritems;
1908 err |= check_inode_item(root, path, ext_ref);
1910 if (err & LAST_ITEM)
1913 /* still have inode items in thie leaf */
1914 if (cur->start == cur_bytenr)
1918 * we have switched to another leaf, above nodes may
1919 * have changed, here walk down the path, if a node
1920 * or leaf is shared, check whether we can skip this
1923 for (i = root_level; i >= 0; i--) {
1924 if (path->nodes[i]->start == nrefs->bytenr[i])
1927 ret = update_nodes_refs(root,
1928 path->nodes[i]->start,
1933 if (!nrefs->need_check[i]) {
1939 for (i = 0; i < *level; i++) {
1940 free_extent_buffer(path->nodes[i]);
1941 path->nodes[i] = NULL;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1960 level = btrfs_header_level(node);
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2072 check = need_check(root, roots);
2074 nrefs->need_check[level] = check;
2076 nrefs->need_check[level] = 1;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134 ret = process_one_leaf(root, cur, wc);
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2157 ret = enter_shared_node(root, bytenr, refs,
2160 path->slots[*level]++;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2186 ret = check_child_node(cur, path->slots[*level], next);
2188 free_extent_buffer(next);
2193 if (btrfs_is_leaf(next))
2194 status = btrfs_check_leaf(root, NULL, next);
2196 status = btrfs_check_node(root, NULL, next);
2197 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198 free_extent_buffer(next);
2203 *level = *level - 1;
2204 free_extent_buffer(path->nodes[*level]);
2205 path->nodes[*level] = next;
2206 path->slots[*level] = 0;
2209 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214 unsigned int ext_ref);
2217 * Returns >0 Found error, should continue
2218 * Returns <0 Fatal error, must exit the whole check
2219 * Returns 0 No errors found
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222 int *level, struct node_refs *nrefs, int ext_ref)
2224 enum btrfs_tree_block_status status;
2227 struct extent_buffer *next;
2228 struct extent_buffer *cur;
2232 WARN_ON(*level < 0);
2233 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235 ret = update_nodes_refs(root, path->nodes[*level]->start,
2240 while (*level >= 0) {
2241 WARN_ON(*level < 0);
2242 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243 cur = path->nodes[*level];
2245 if (btrfs_header_level(cur) != *level)
2248 if (path->slots[*level] >= btrfs_header_nritems(cur))
2250 /* Don't forgot to check leaf/node validation */
2252 ret = btrfs_check_leaf(root, NULL, cur);
2253 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = process_one_leaf_v2(root, path, nrefs,
2261 ret = btrfs_check_node(root, NULL, cur);
2262 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2267 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269 blocksize = root->nodesize;
2271 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2274 if (!nrefs->need_check[*level - 1]) {
2275 path->slots[*level]++;
2279 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root, bytenr, blocksize,
2285 if (!extent_buffer_uptodate(next)) {
2286 struct btrfs_key node_key;
2288 btrfs_node_key_to_cpu(path->nodes[*level],
2290 path->slots[*level]);
2291 btrfs_add_corrupt_extent_record(root->fs_info,
2293 path->nodes[*level]->start,
2294 root->nodesize, *level);
2300 ret = check_child_node(cur, path->slots[*level], next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323 struct walk_control *wc, int *level)
2326 struct extent_buffer *leaf;
2328 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329 leaf = path->nodes[i];
2330 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = NULL;
2337 BUG_ON(*level > wc->active_node);
2338 if (*level == wc->active_node)
2339 leave_shared_node(root, wc, *level);
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2350 struct extent_buffer *leaf;
2352 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353 leaf = path->nodes[i];
2354 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2359 free_extent_buffer(path->nodes[*level]);
2360 path->nodes[*level] = NULL;
2367 static int check_root_dir(struct inode_record *rec)
2369 struct inode_backref *backref;
2372 if (!rec->found_inode_item || rec->errors)
2374 if (rec->nlink != 1 || rec->found_link != 0)
2376 if (list_empty(&rec->backrefs))
2378 backref = to_inode_backref(rec->backrefs.next);
2379 if (!backref->found_inode_ref)
2381 if (backref->index != 0 || backref->namelen != 2 ||
2382 memcmp(backref->name, "..", 2))
2384 if (backref->found_dir_index || backref->found_dir_item)
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root, struct btrfs_path *path,
2393 struct inode_record *rec)
2395 struct btrfs_inode_item *ei;
2396 struct btrfs_key key;
2399 key.objectid = rec->ino;
2400 key.type = BTRFS_INODE_ITEM_KEY;
2401 key.offset = (u64)-1;
2403 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2407 if (!path->slots[0]) {
2414 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415 if (key.objectid != rec->ino) {
2420 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421 struct btrfs_inode_item);
2422 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423 btrfs_mark_buffer_dirty(path->nodes[0]);
2424 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426 root->root_key.objectid);
2428 btrfs_release_path(path);
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433 struct btrfs_root *root,
2434 struct btrfs_path *path,
2435 struct inode_record *rec)
2439 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440 btrfs_release_path(path);
2442 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447 struct btrfs_root *root,
2448 struct btrfs_path *path,
2449 struct inode_record *rec)
2451 struct btrfs_inode_item *ei;
2452 struct btrfs_key key;
2455 key.objectid = rec->ino;
2456 key.type = BTRFS_INODE_ITEM_KEY;
2459 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2466 /* Since ret == 0, no need to check anything */
2467 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468 struct btrfs_inode_item);
2469 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470 btrfs_mark_buffer_dirty(path->nodes[0]);
2471 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472 printf("reset nbytes for ino %llu root %llu\n",
2473 rec->ino, root->root_key.objectid);
2475 btrfs_release_path(path);
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480 struct cache_tree *inode_cache,
2481 struct inode_record *rec,
2482 struct inode_backref *backref)
2484 struct btrfs_path path;
2485 struct btrfs_trans_handle *trans;
2486 struct btrfs_dir_item *dir_item;
2487 struct extent_buffer *leaf;
2488 struct btrfs_key key;
2489 struct btrfs_disk_key disk_key;
2490 struct inode_record *dir_rec;
2491 unsigned long name_ptr;
2492 u32 data_size = sizeof(*dir_item) + backref->namelen;
2495 trans = btrfs_start_transaction(root, 1);
2497 return PTR_ERR(trans);
2499 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500 (unsigned long long)rec->ino);
2502 btrfs_init_path(&path);
2503 key.objectid = backref->dir;
2504 key.type = BTRFS_DIR_INDEX_KEY;
2505 key.offset = backref->index;
2506 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2509 leaf = path.nodes[0];
2510 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2512 disk_key.objectid = cpu_to_le64(rec->ino);
2513 disk_key.type = BTRFS_INODE_ITEM_KEY;
2514 disk_key.offset = 0;
2516 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518 btrfs_set_dir_data_len(leaf, dir_item, 0);
2519 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520 name_ptr = (unsigned long)(dir_item + 1);
2521 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522 btrfs_mark_buffer_dirty(leaf);
2523 btrfs_release_path(&path);
2524 btrfs_commit_transaction(trans, root);
2526 backref->found_dir_index = 1;
2527 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528 BUG_ON(IS_ERR(dir_rec));
2531 dir_rec->found_size += backref->namelen;
2532 if (dir_rec->found_size == dir_rec->isize &&
2533 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535 if (dir_rec->found_size != dir_rec->isize)
2536 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2541 static int delete_dir_index(struct btrfs_root *root,
2542 struct inode_backref *backref)
2544 struct btrfs_trans_handle *trans;
2545 struct btrfs_dir_item *di;
2546 struct btrfs_path path;
2549 trans = btrfs_start_transaction(root, 1);
2551 return PTR_ERR(trans);
2553 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554 (unsigned long long)backref->dir,
2555 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556 (unsigned long long)root->objectid);
2558 btrfs_init_path(&path);
2559 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560 backref->name, backref->namelen,
2561 backref->index, -1);
2564 btrfs_release_path(&path);
2565 btrfs_commit_transaction(trans, root);
2572 ret = btrfs_del_item(trans, root, &path);
2574 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2576 btrfs_release_path(&path);
2577 btrfs_commit_transaction(trans, root);
2581 static int create_inode_item(struct btrfs_root *root,
2582 struct inode_record *rec,
2585 struct btrfs_trans_handle *trans;
2586 struct btrfs_inode_item inode_item;
2587 time_t now = time(NULL);
2590 trans = btrfs_start_transaction(root, 1);
2591 if (IS_ERR(trans)) {
2592 ret = PTR_ERR(trans);
2596 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597 "be incomplete, please check permissions and content after "
2598 "the fsck completes.\n", (unsigned long long)root->objectid,
2599 (unsigned long long)rec->ino);
2601 memset(&inode_item, 0, sizeof(inode_item));
2602 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2604 btrfs_set_stack_inode_nlink(&inode_item, 1);
2606 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608 if (rec->found_dir_item) {
2609 if (rec->found_file_extent)
2610 fprintf(stderr, "root %llu inode %llu has both a dir "
2611 "item and extents, unsure if it is a dir or a "
2612 "regular file so setting it as a directory\n",
2613 (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2615 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617 } else if (!rec->found_dir_item) {
2618 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2621 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2630 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2632 btrfs_commit_transaction(trans, root);
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637 struct inode_record *rec,
2638 struct cache_tree *inode_cache,
2641 struct inode_backref *tmp, *backref;
2642 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2646 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647 if (!delete && rec->ino == root_dirid) {
2648 if (!rec->found_inode_item) {
2649 ret = create_inode_item(root, rec, 1);
2656 /* Index 0 for root dir's are special, don't mess with it */
2657 if (rec->ino == root_dirid && backref->index == 0)
2661 ((backref->found_dir_index && !backref->found_inode_ref) ||
2662 (backref->found_dir_index && backref->found_inode_ref &&
2663 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664 ret = delete_dir_index(root, backref);
2668 list_del(&backref->list);
2672 if (!delete && !backref->found_dir_index &&
2673 backref->found_dir_item && backref->found_inode_ref) {
2674 ret = add_missing_dir_index(root, inode_cache, rec,
2679 if (backref->found_dir_item &&
2680 backref->found_dir_index &&
2681 backref->found_dir_index) {
2682 if (!backref->errors &&
2683 backref->found_inode_ref) {
2684 list_del(&backref->list);
2690 if (!delete && (!backref->found_dir_index &&
2691 !backref->found_dir_item &&
2692 backref->found_inode_ref)) {
2693 struct btrfs_trans_handle *trans;
2694 struct btrfs_key location;
2696 ret = check_dir_conflict(root, backref->name,
2702 * let nlink fixing routine to handle it,
2703 * which can do it better.
2708 location.objectid = rec->ino;
2709 location.type = BTRFS_INODE_ITEM_KEY;
2710 location.offset = 0;
2712 trans = btrfs_start_transaction(root, 1);
2713 if (IS_ERR(trans)) {
2714 ret = PTR_ERR(trans);
2717 fprintf(stderr, "adding missing dir index/item pair "
2719 (unsigned long long)rec->ino);
2720 ret = btrfs_insert_dir_item(trans, root, backref->name,
2722 backref->dir, &location,
2723 imode_to_type(rec->imode),
2726 btrfs_commit_transaction(trans, root);
2730 if (!delete && (backref->found_inode_ref &&
2731 backref->found_dir_index &&
2732 backref->found_dir_item &&
2733 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734 !rec->found_inode_item)) {
2735 ret = create_inode_item(root, rec, 0);
2742 return ret ? ret : repaired;
2746 * To determine the file type for nlink/inode_item repair
2748 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749 * Return -ENOENT if file type is not found.
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2753 struct inode_backref *backref;
2755 /* For inode item recovered case */
2756 if (rec->found_inode_item) {
2757 *type = imode_to_type(rec->imode);
2761 list_for_each_entry(backref, &rec->backrefs, list) {
2762 if (backref->found_dir_index || backref->found_dir_item) {
2763 *type = backref->filetype;
2771 * To determine the file name for nlink repair
2773 * Return 0 if file name is found, set name and namelen.
2774 * Return -ENOENT if file name is not found.
2776 static int find_file_name(struct inode_record *rec,
2777 char *name, int *namelen)
2779 struct inode_backref *backref;
2781 list_for_each_entry(backref, &rec->backrefs, list) {
2782 if (backref->found_dir_index || backref->found_dir_item ||
2783 backref->found_inode_ref) {
2784 memcpy(name, backref->name, backref->namelen);
2785 *namelen = backref->namelen;
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794 struct btrfs_root *root,
2795 struct btrfs_path *path,
2796 struct inode_record *rec)
2798 struct inode_backref *backref;
2799 struct inode_backref *tmp;
2800 struct btrfs_key key;
2801 struct btrfs_inode_item *inode_item;
2804 /* We don't believe this either, reset it and iterate backref */
2805 rec->found_link = 0;
2807 /* Remove all backref including the valid ones */
2808 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810 backref->index, backref->name,
2811 backref->namelen, 0);
2815 /* remove invalid backref, so it won't be added back */
2816 if (!(backref->found_dir_index &&
2817 backref->found_dir_item &&
2818 backref->found_inode_ref)) {
2819 list_del(&backref->list);
2826 /* Set nlink to 0 */
2827 key.objectid = rec->ino;
2828 key.type = BTRFS_INODE_ITEM_KEY;
2830 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2837 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838 struct btrfs_inode_item);
2839 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840 btrfs_mark_buffer_dirty(path->nodes[0]);
2841 btrfs_release_path(path);
2844 * Add back valid inode_ref/dir_item/dir_index,
2845 * add_link() will handle the nlink inc, so new nlink must be correct
2847 list_for_each_entry(backref, &rec->backrefs, list) {
2848 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849 backref->name, backref->namelen,
2850 backref->filetype, &backref->index, 1);
2855 btrfs_release_path(path);
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860 struct btrfs_root *root,
2861 struct btrfs_path *path,
2864 struct btrfs_key key, found_key;
2867 btrfs_init_path(path);
2868 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2870 key.type = BTRFS_INODE_ITEM_KEY;
2871 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2873 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874 path->slots[0] - 1);
2875 *highest_ino = found_key.objectid;
2878 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2880 btrfs_release_path(path);
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885 struct btrfs_root *root,
2886 struct btrfs_path *path,
2887 struct inode_record *rec)
2889 char *dir_name = "lost+found";
2890 char namebuf[BTRFS_NAME_LEN] = {0};
2895 int name_recovered = 0;
2896 int type_recovered = 0;
2900 * Get file name and type first before these invalid inode ref
2901 * are deleted by remove_all_invalid_backref()
2903 name_recovered = !find_file_name(rec, namebuf, &namelen);
2904 type_recovered = !find_file_type(rec, &type);
2906 if (!name_recovered) {
2907 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908 rec->ino, rec->ino);
2909 namelen = count_digits(rec->ino);
2910 sprintf(namebuf, "%llu", rec->ino);
2913 if (!type_recovered) {
2914 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2916 type = BTRFS_FT_REG_FILE;
2920 ret = reset_nlink(trans, root, path, rec);
2923 "Failed to reset nlink for inode %llu: %s\n",
2924 rec->ino, strerror(-ret));
2928 if (rec->found_link == 0) {
2929 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2933 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2937 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938 dir_name, strerror(-ret));
2941 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942 namebuf, namelen, type, NULL, 1);
2944 * Add ".INO" suffix several times to handle case where
2945 * "FILENAME.INO" is already taken by another file.
2947 while (ret == -EEXIST) {
2949 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2951 if (namelen + count_digits(rec->ino) + 1 >
2956 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2958 namelen += count_digits(rec->ino) + 1;
2959 ret = btrfs_add_link(trans, root, rec->ino,
2960 lost_found_ino, namebuf,
2961 namelen, type, NULL, 1);
2965 "Failed to link the inode %llu to %s dir: %s\n",
2966 rec->ino, dir_name, strerror(-ret));
2970 * Just increase the found_link, don't actually add the
2971 * backref. This will make things easier and this inode
2972 * record will be freed after the repair is done.
2973 * So fsck will not report problem about this inode.
2976 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977 namelen, namebuf, dir_name);
2979 printf("Fixed the nlink of inode %llu\n", rec->ino);
2982 * Clear the flag anyway, or we will loop forever for the same inode
2983 * as it will not be removed from the bad inode list and the dead loop
2986 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987 btrfs_release_path(path);
2992 * Check if there is any normal(reg or prealloc) file extent for given
2994 * This is used to determine the file type when neither its dir_index/item or
2995 * inode_item exists.
2997 * This will *NOT* report error, if any error happens, just consider it does
2998 * not have any normal file extent.
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3002 struct btrfs_path path;
3003 struct btrfs_key key;
3004 struct btrfs_key found_key;
3005 struct btrfs_file_extent_item *fi;
3009 btrfs_init_path(&path);
3011 key.type = BTRFS_EXTENT_DATA_KEY;
3014 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3019 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020 ret = btrfs_next_leaf(root, &path);
3027 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3029 if (found_key.objectid != ino ||
3030 found_key.type != BTRFS_EXTENT_DATA_KEY)
3032 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033 struct btrfs_file_extent_item);
3034 type = btrfs_file_extent_type(path.nodes[0], fi);
3035 if (type != BTRFS_FILE_EXTENT_INLINE) {
3041 btrfs_release_path(&path);
3045 static u32 btrfs_type_to_imode(u8 type)
3047 static u32 imode_by_btrfs_type[] = {
3048 [BTRFS_FT_REG_FILE] = S_IFREG,
3049 [BTRFS_FT_DIR] = S_IFDIR,
3050 [BTRFS_FT_CHRDEV] = S_IFCHR,
3051 [BTRFS_FT_BLKDEV] = S_IFBLK,
3052 [BTRFS_FT_FIFO] = S_IFIFO,
3053 [BTRFS_FT_SOCK] = S_IFSOCK,
3054 [BTRFS_FT_SYMLINK] = S_IFLNK,
3057 return imode_by_btrfs_type[(type)];
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061 struct btrfs_root *root,
3062 struct btrfs_path *path,
3063 struct inode_record *rec)
3067 int type_recovered = 0;
3070 printf("Trying to rebuild inode:%llu\n", rec->ino);
3072 type_recovered = !find_file_type(rec, &filetype);
3075 * Try to determine inode type if type not found.
3077 * For found regular file extent, it must be FILE.
3078 * For found dir_item/index, it must be DIR.
3080 * For undetermined one, use FILE as fallback.
3083 * 1. If found backref(inode_index/item is already handled) to it,
3085 * Need new inode-inode ref structure to allow search for that.
3087 if (!type_recovered) {
3088 if (rec->found_file_extent &&
3089 find_normal_file_extent(root, rec->ino)) {
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else if (rec->found_dir_item) {
3094 filetype = BTRFS_FT_DIR;
3095 } else if (!list_empty(&rec->orphan_extents)) {
3097 filetype = BTRFS_FT_REG_FILE;
3099 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3102 filetype = BTRFS_FT_REG_FILE;
3106 ret = btrfs_new_inode(trans, root, rec->ino,
3107 mode | btrfs_type_to_imode(filetype));
3112 * Here inode rebuild is done, we only rebuild the inode item,
3113 * don't repair the nlink(like move to lost+found).
3114 * That is the job of nlink repair.
3116 * We just fill the record and return
3118 rec->found_dir_item = 1;
3119 rec->imode = mode | btrfs_type_to_imode(filetype);
3121 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122 /* Ensure the inode_nlinks repair function will be called */
3123 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129 struct btrfs_root *root,
3130 struct btrfs_path *path,
3131 struct inode_record *rec)
3133 struct orphan_data_extent *orphan;
3134 struct orphan_data_extent *tmp;
3137 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3139 * Check for conflicting file extents
3141 * Here we don't know whether the extents is compressed or not,
3142 * so we can only assume it not compressed nor data offset,
3143 * and use its disk_len as extent length.
3145 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146 orphan->offset, orphan->disk_len, 0);
3147 btrfs_release_path(path);
3152 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153 orphan->disk_bytenr, orphan->disk_len);
3154 ret = btrfs_free_extent(trans,
3155 root->fs_info->extent_root,
3156 orphan->disk_bytenr, orphan->disk_len,
3157 0, root->objectid, orphan->objectid,
3162 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163 orphan->offset, orphan->disk_bytenr,
3164 orphan->disk_len, orphan->disk_len);
3168 /* Update file size info */
3169 rec->found_size += orphan->disk_len;
3170 if (rec->found_size == rec->nbytes)
3171 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3173 /* Update the file extent hole info too */
3174 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3178 if (RB_EMPTY_ROOT(&rec->holes))
3179 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3181 list_del(&orphan->list);
3184 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190 struct btrfs_root *root,
3191 struct btrfs_path *path,
3192 struct inode_record *rec)
3194 struct rb_node *node;
3195 struct file_extent_hole *hole;
3199 node = rb_first(&rec->holes);
3203 hole = rb_entry(node, struct file_extent_hole, node);
3204 ret = btrfs_punch_hole(trans, root, rec->ino,
3205 hole->start, hole->len);
3208 ret = del_file_extent_hole(&rec->holes, hole->start,
3212 if (RB_EMPTY_ROOT(&rec->holes))
3213 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214 node = rb_first(&rec->holes);
3216 /* special case for a file losing all its file extent */
3218 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219 round_up(rec->isize, root->sectorsize));
3223 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224 rec->ino, root->objectid);
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3231 struct btrfs_trans_handle *trans;
3232 struct btrfs_path path;
3235 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236 I_ERR_NO_ORPHAN_ITEM |
3237 I_ERR_LINK_COUNT_WRONG |
3238 I_ERR_NO_INODE_ITEM |
3239 I_ERR_FILE_EXTENT_ORPHAN |
3240 I_ERR_FILE_EXTENT_DISCOUNT|
3241 I_ERR_FILE_NBYTES_WRONG)))
3245 * For nlink repair, it may create a dir and add link, so
3246 * 2 for parent(256)'s dir_index and dir_item
3247 * 2 for lost+found dir's inode_item and inode_ref
3248 * 1 for the new inode_ref of the file
3249 * 2 for lost+found dir's dir_index and dir_item for the file
3251 trans = btrfs_start_transaction(root, 7);
3253 return PTR_ERR(trans);
3255 btrfs_init_path(&path);
3256 if (rec->errors & I_ERR_NO_INODE_ITEM)
3257 ret = repair_inode_no_item(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263 ret = repair_inode_isize(trans, root, &path, rec);
3264 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267 ret = repair_inode_nlinks(trans, root, &path, rec);
3268 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269 ret = repair_inode_nbytes(trans, root, &path, rec);
3270 btrfs_commit_transaction(trans, root);
3271 btrfs_release_path(&path);
3275 static int check_inode_recs(struct btrfs_root *root,
3276 struct cache_tree *inode_cache)
3278 struct cache_extent *cache;
3279 struct ptr_node *node;
3280 struct inode_record *rec;
3281 struct inode_backref *backref;
3286 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3288 if (btrfs_root_refs(&root->root_item) == 0) {
3289 if (!cache_tree_empty(inode_cache))
3290 fprintf(stderr, "warning line %d\n", __LINE__);
3295 * We need to repair backrefs first because we could change some of the
3296 * errors in the inode recs.
3298 * We also need to go through and delete invalid backrefs first and then
3299 * add the correct ones second. We do this because we may get EEXIST
3300 * when adding back the correct index because we hadn't yet deleted the
3303 * For example, if we were missing a dir index then the directories
3304 * isize would be wrong, so if we fixed the isize to what we thought it
3305 * would be and then fixed the backref we'd still have a invalid fs, so
3306 * we need to add back the dir index and then check to see if the isize
3311 if (stage == 3 && !err)
3314 cache = search_cache_extent(inode_cache, 0);
3315 while (repair && cache) {
3316 node = container_of(cache, struct ptr_node, cache);
3318 cache = next_cache_extent(cache);
3320 /* Need to free everything up and rescan */
3322 remove_cache_extent(inode_cache, &node->cache);
3324 free_inode_rec(rec);
3328 if (list_empty(&rec->backrefs))
3331 ret = repair_inode_backrefs(root, rec, inode_cache,
3345 rec = get_inode_rec(inode_cache, root_dirid, 0);
3346 BUG_ON(IS_ERR(rec));
3348 ret = check_root_dir(rec);
3350 fprintf(stderr, "root %llu root dir %llu error\n",
3351 (unsigned long long)root->root_key.objectid,
3352 (unsigned long long)root_dirid);
3353 print_inode_error(root, rec);
3358 struct btrfs_trans_handle *trans;
3360 trans = btrfs_start_transaction(root, 1);
3361 if (IS_ERR(trans)) {
3362 err = PTR_ERR(trans);
3367 "root %llu missing its root dir, recreating\n",
3368 (unsigned long long)root->objectid);
3370 ret = btrfs_make_root_dir(trans, root, root_dirid);
3373 btrfs_commit_transaction(trans, root);
3377 fprintf(stderr, "root %llu root dir %llu not found\n",
3378 (unsigned long long)root->root_key.objectid,
3379 (unsigned long long)root_dirid);
3383 cache = search_cache_extent(inode_cache, 0);
3386 node = container_of(cache, struct ptr_node, cache);
3388 remove_cache_extent(inode_cache, &node->cache);
3390 if (rec->ino == root_dirid ||
3391 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392 free_inode_rec(rec);
3396 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397 ret = check_orphan_item(root, rec->ino);
3399 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400 if (can_free_inode_rec(rec)) {
3401 free_inode_rec(rec);
3406 if (!rec->found_inode_item)
3407 rec->errors |= I_ERR_NO_INODE_ITEM;
3408 if (rec->found_link != rec->nlink)
3409 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3411 ret = try_repair_inode(root, rec);
3412 if (ret == 0 && can_free_inode_rec(rec)) {
3413 free_inode_rec(rec);
3419 if (!(repair && ret == 0))
3421 print_inode_error(root, rec);
3422 list_for_each_entry(backref, &rec->backrefs, list) {
3423 if (!backref->found_dir_item)
3424 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425 if (!backref->found_dir_index)
3426 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427 if (!backref->found_inode_ref)
3428 backref->errors |= REF_ERR_NO_INODE_REF;
3429 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430 " namelen %u name %s filetype %d errors %x",
3431 (unsigned long long)backref->dir,
3432 (unsigned long long)backref->index,
3433 backref->namelen, backref->name,
3434 backref->filetype, backref->errors);
3435 print_ref_error(backref->errors);
3437 free_inode_rec(rec);
3439 return (error > 0) ? -1 : 0;
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3445 struct cache_extent *cache;
3446 struct root_record *rec = NULL;
3449 cache = lookup_cache_extent(root_cache, objectid, 1);
3451 rec = container_of(cache, struct root_record, cache);
3453 rec = calloc(1, sizeof(*rec));
3455 return ERR_PTR(-ENOMEM);
3456 rec->objectid = objectid;
3457 INIT_LIST_HEAD(&rec->backrefs);
3458 rec->cache.start = objectid;
3459 rec->cache.size = 1;
3461 ret = insert_cache_extent(root_cache, &rec->cache);
3463 return ERR_PTR(-EEXIST);
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469 u64 ref_root, u64 dir, u64 index,
3470 const char *name, int namelen)
3472 struct root_backref *backref;
3474 list_for_each_entry(backref, &rec->backrefs, list) {
3475 if (backref->ref_root != ref_root || backref->dir != dir ||
3476 backref->namelen != namelen)
3478 if (memcmp(name, backref->name, namelen))
3483 backref = calloc(1, sizeof(*backref) + namelen + 1);
3486 backref->ref_root = ref_root;
3488 backref->index = index;
3489 backref->namelen = namelen;
3490 memcpy(backref->name, name, namelen);
3491 backref->name[namelen] = '\0';
3492 list_add_tail(&backref->list, &rec->backrefs);
3496 static void free_root_record(struct cache_extent *cache)
3498 struct root_record *rec;
3499 struct root_backref *backref;
3501 rec = container_of(cache, struct root_record, cache);
3502 while (!list_empty(&rec->backrefs)) {
3503 backref = to_root_backref(rec->backrefs.next);
3504 list_del(&backref->list);
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3513 static int add_root_backref(struct cache_tree *root_cache,
3514 u64 root_id, u64 ref_root, u64 dir, u64 index,
3515 const char *name, int namelen,
3516 int item_type, int errors)
3518 struct root_record *rec;
3519 struct root_backref *backref;
3521 rec = get_root_rec(root_cache, root_id);
3522 BUG_ON(IS_ERR(rec));
3523 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3526 backref->errors |= errors;
3528 if (item_type != BTRFS_DIR_ITEM_KEY) {
3529 if (backref->found_dir_index || backref->found_back_ref ||
3530 backref->found_forward_ref) {
3531 if (backref->index != index)
3532 backref->errors |= REF_ERR_INDEX_UNMATCH;
3534 backref->index = index;
3538 if (item_type == BTRFS_DIR_ITEM_KEY) {
3539 if (backref->found_forward_ref)
3541 backref->found_dir_item = 1;
3542 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543 backref->found_dir_index = 1;
3544 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545 if (backref->found_forward_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_REF;
3547 else if (backref->found_dir_item)
3549 backref->found_forward_ref = 1;
3550 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551 if (backref->found_back_ref)
3552 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553 backref->found_back_ref = 1;
3558 if (backref->found_forward_ref && backref->found_dir_item)
3559 backref->reachable = 1;
3563 static int merge_root_recs(struct btrfs_root *root,
3564 struct cache_tree *src_cache,
3565 struct cache_tree *dst_cache)
3567 struct cache_extent *cache;
3568 struct ptr_node *node;
3569 struct inode_record *rec;
3570 struct inode_backref *backref;
3573 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574 free_inode_recs_tree(src_cache);
3579 cache = search_cache_extent(src_cache, 0);
3582 node = container_of(cache, struct ptr_node, cache);
3584 remove_cache_extent(src_cache, &node->cache);
3587 ret = is_child_root(root, root->objectid, rec->ino);
3593 list_for_each_entry(backref, &rec->backrefs, list) {
3594 BUG_ON(backref->found_inode_ref);
3595 if (backref->found_dir_item)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_ITEM_KEY,
3601 if (backref->found_dir_index)
3602 add_root_backref(dst_cache, rec->ino,
3603 root->root_key.objectid, backref->dir,
3604 backref->index, backref->name,
3605 backref->namelen, BTRFS_DIR_INDEX_KEY,
3609 free_inode_rec(rec);
3616 static int check_root_refs(struct btrfs_root *root,
3617 struct cache_tree *root_cache)
3619 struct root_record *rec;
3620 struct root_record *ref_root;
3621 struct root_backref *backref;
3622 struct cache_extent *cache;
3628 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629 BUG_ON(IS_ERR(rec));
3632 /* fixme: this can not detect circular references */
3635 cache = search_cache_extent(root_cache, 0);
3639 rec = container_of(cache, struct root_record, cache);
3640 cache = next_cache_extent(cache);
3642 if (rec->found_ref == 0)
3645 list_for_each_entry(backref, &rec->backrefs, list) {
3646 if (!backref->reachable)
3649 ref_root = get_root_rec(root_cache,
3651 BUG_ON(IS_ERR(ref_root));
3652 if (ref_root->found_ref > 0)
3655 backref->reachable = 0;
3657 if (rec->found_ref == 0)
3663 cache = search_cache_extent(root_cache, 0);
3667 rec = container_of(cache, struct root_record, cache);
3668 cache = next_cache_extent(cache);
3670 if (rec->found_ref == 0 &&
3671 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673 ret = check_orphan_item(root->fs_info->tree_root,
3679 * If we don't have a root item then we likely just have
3680 * a dir item in a snapshot for this root but no actual
3681 * ref key or anything so it's meaningless.
3683 if (!rec->found_root_item)
3686 fprintf(stderr, "fs tree %llu not referenced\n",
3687 (unsigned long long)rec->objectid);
3691 if (rec->found_ref > 0 && !rec->found_root_item)
3693 list_for_each_entry(backref, &rec->backrefs, list) {
3694 if (!backref->found_dir_item)
3695 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696 if (!backref->found_dir_index)
3697 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698 if (!backref->found_back_ref)
3699 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700 if (!backref->found_forward_ref)
3701 backref->errors |= REF_ERR_NO_ROOT_REF;
3702 if (backref->reachable && backref->errors)
3709 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710 (unsigned long long)rec->objectid, rec->found_ref,
3711 rec->found_root_item ? "" : "not found");
3713 list_for_each_entry(backref, &rec->backrefs, list) {
3714 if (!backref->reachable)
3716 if (!backref->errors && rec->found_root_item)
3718 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719 " index %llu namelen %u name %s errors %x\n",
3720 (unsigned long long)backref->ref_root,
3721 (unsigned long long)backref->dir,
3722 (unsigned long long)backref->index,
3723 backref->namelen, backref->name,
3725 print_ref_error(backref->errors);
3728 return errors > 0 ? 1 : 0;
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732 struct btrfs_key *key,
3733 struct cache_tree *root_cache)
3739 struct btrfs_root_ref *ref;
3740 char namebuf[BTRFS_NAME_LEN];
3743 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3745 dirid = btrfs_root_ref_dirid(eb, ref);
3746 index = btrfs_root_ref_sequence(eb, ref);
3747 name_len = btrfs_root_ref_name_len(eb, ref);
3749 if (name_len <= BTRFS_NAME_LEN) {
3753 len = BTRFS_NAME_LEN;
3754 error = REF_ERR_NAME_TOO_LONG;
3756 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3758 if (key->type == BTRFS_ROOT_REF_KEY) {
3759 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760 index, namebuf, len, key->type, error);
3762 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763 index, namebuf, len, key->type, error);
3768 static void free_corrupt_block(struct cache_extent *cache)
3770 struct btrfs_corrupt_block *corrupt;
3772 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3779 * Repair the btree of the given root.
3781 * The fix is to remove the node key in corrupt_blocks cache_tree.
3782 * and rebalance the tree.
3783 * After the fix, the btree should be writeable.
3785 static int repair_btree(struct btrfs_root *root,
3786 struct cache_tree *corrupt_blocks)
3788 struct btrfs_trans_handle *trans;
3789 struct btrfs_path path;
3790 struct btrfs_corrupt_block *corrupt;
3791 struct cache_extent *cache;
3792 struct btrfs_key key;
3797 if (cache_tree_empty(corrupt_blocks))
3800 trans = btrfs_start_transaction(root, 1);
3801 if (IS_ERR(trans)) {
3802 ret = PTR_ERR(trans);
3803 fprintf(stderr, "Error starting transaction: %s\n",
3807 btrfs_init_path(&path);
3808 cache = first_cache_extent(corrupt_blocks);
3810 corrupt = container_of(cache, struct btrfs_corrupt_block,
3812 level = corrupt->level;
3813 path.lowest_level = level;
3814 key.objectid = corrupt->key.objectid;
3815 key.type = corrupt->key.type;
3816 key.offset = corrupt->key.offset;
3819 * Here we don't want to do any tree balance, since it may
3820 * cause a balance with corrupted brother leaf/node,
3821 * so ins_len set to 0 here.
3822 * Balance will be done after all corrupt node/leaf is deleted.
3824 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3827 offset = btrfs_node_blockptr(path.nodes[level],
3830 /* Remove the ptr */
3831 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3835 * Remove the corresponding extent
3836 * return value is not concerned.
3838 btrfs_release_path(&path);
3839 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840 0, root->root_key.objectid,
3842 cache = next_cache_extent(cache);
3845 /* Balance the btree using btrfs_search_slot() */
3846 cache = first_cache_extent(corrupt_blocks);
3848 corrupt = container_of(cache, struct btrfs_corrupt_block,
3850 memcpy(&key, &corrupt->key, sizeof(key));
3851 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3854 /* return will always >0 since it won't find the item */
3856 btrfs_release_path(&path);
3857 cache = next_cache_extent(cache);
3860 btrfs_commit_transaction(trans, root);
3861 btrfs_release_path(&path);
3865 static int check_fs_root(struct btrfs_root *root,
3866 struct cache_tree *root_cache,
3867 struct walk_control *wc)
3873 struct btrfs_path path;
3874 struct shared_node root_node;
3875 struct root_record *rec;
3876 struct btrfs_root_item *root_item = &root->root_item;
3877 struct cache_tree corrupt_blocks;
3878 struct orphan_data_extent *orphan;
3879 struct orphan_data_extent *tmp;
3880 enum btrfs_tree_block_status status;
3881 struct node_refs nrefs;
3884 * Reuse the corrupt_block cache tree to record corrupted tree block
3886 * Unlike the usage in extent tree check, here we do it in a per
3887 * fs/subvol tree base.
3889 cache_tree_init(&corrupt_blocks);
3890 root->fs_info->corrupt_blocks = &corrupt_blocks;
3892 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893 rec = get_root_rec(root_cache, root->root_key.objectid);
3894 BUG_ON(IS_ERR(rec));
3895 if (btrfs_root_refs(root_item) > 0)
3896 rec->found_root_item = 1;
3899 btrfs_init_path(&path);
3900 memset(&root_node, 0, sizeof(root_node));
3901 cache_tree_init(&root_node.root_cache);
3902 cache_tree_init(&root_node.inode_cache);
3903 memset(&nrefs, 0, sizeof(nrefs));
3905 /* Move the orphan extent record to corresponding inode_record */
3906 list_for_each_entry_safe(orphan, tmp,
3907 &root->orphan_data_extents, list) {
3908 struct inode_record *inode;
3910 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3912 BUG_ON(IS_ERR(inode));
3913 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914 list_move(&orphan->list, &inode->orphan_extents);
3917 level = btrfs_header_level(root->node);
3918 memset(wc->nodes, 0, sizeof(wc->nodes));
3919 wc->nodes[level] = &root_node;
3920 wc->active_node = level;
3921 wc->root_level = level;
3923 /* We may not have checked the root block, lets do that now */
3924 if (btrfs_is_leaf(root->node))
3925 status = btrfs_check_leaf(root, NULL, root->node);
3927 status = btrfs_check_node(root, NULL, root->node);
3928 if (status != BTRFS_TREE_BLOCK_CLEAN)
3931 if (btrfs_root_refs(root_item) > 0 ||
3932 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933 path.nodes[level] = root->node;
3934 extent_buffer_get(root->node);
3935 path.slots[level] = 0;
3937 struct btrfs_key key;
3938 struct btrfs_disk_key found_key;
3940 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941 level = root_item->drop_level;
3942 path.lowest_level = level;
3943 if (level > btrfs_header_level(root->node) ||
3944 level >= BTRFS_MAX_LEVEL) {
3945 error("ignoring invalid drop level: %u", level);
3948 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3951 btrfs_node_key(path.nodes[level], &found_key,
3953 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954 sizeof(found_key)));
3958 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3964 wret = walk_up_tree(root, &path, wc, &level);
3971 btrfs_release_path(&path);
3973 if (!cache_tree_empty(&corrupt_blocks)) {
3974 struct cache_extent *cache;
3975 struct btrfs_corrupt_block *corrupt;
3977 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978 root->root_key.objectid);
3979 cache = first_cache_extent(&corrupt_blocks);
3981 corrupt = container_of(cache,
3982 struct btrfs_corrupt_block,
3984 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985 cache->start, corrupt->level,
3986 corrupt->key.objectid, corrupt->key.type,
3987 corrupt->key.offset);
3988 cache = next_cache_extent(cache);
3991 printf("Try to repair the btree for root %llu\n",
3992 root->root_key.objectid);
3993 ret = repair_btree(root, &corrupt_blocks);
3995 fprintf(stderr, "Failed to repair btree: %s\n",
3998 printf("Btree for root %llu is fixed\n",
3999 root->root_key.objectid);
4003 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4007 if (root_node.current) {
4008 root_node.current->checked = 1;
4009 maybe_free_inode_rec(&root_node.inode_cache,
4013 err = check_inode_recs(root, &root_node.inode_cache);
4017 free_corrupt_blocks_tree(&corrupt_blocks);
4018 root->fs_info->corrupt_blocks = NULL;
4019 free_orphan_data_extents(&root->orphan_data_extents);
4023 static int fs_root_objectid(u64 objectid)
4025 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4028 return is_fstree(objectid);
4031 static int check_fs_roots(struct btrfs_root *root,
4032 struct cache_tree *root_cache)
4034 struct btrfs_path path;
4035 struct btrfs_key key;
4036 struct walk_control wc;
4037 struct extent_buffer *leaf, *tree_node;
4038 struct btrfs_root *tmp_root;
4039 struct btrfs_root *tree_root = root->fs_info->tree_root;
4043 if (ctx.progress_enabled) {
4044 ctx.tp = TASK_FS_ROOTS;
4045 task_start(ctx.info);
4049 * Just in case we made any changes to the extent tree that weren't
4050 * reflected into the free space cache yet.
4053 reset_cached_block_groups(root->fs_info);
4054 memset(&wc, 0, sizeof(wc));
4055 cache_tree_init(&wc.shared);
4056 btrfs_init_path(&path);
4061 key.type = BTRFS_ROOT_ITEM_KEY;
4062 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4067 tree_node = tree_root->node;
4069 if (tree_node != tree_root->node) {
4070 free_root_recs_tree(root_cache);
4071 btrfs_release_path(&path);
4074 leaf = path.nodes[0];
4075 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076 ret = btrfs_next_leaf(tree_root, &path);
4082 leaf = path.nodes[0];
4084 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086 fs_root_objectid(key.objectid)) {
4087 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088 tmp_root = btrfs_read_fs_root_no_cache(
4089 root->fs_info, &key);
4091 key.offset = (u64)-1;
4092 tmp_root = btrfs_read_fs_root(
4093 root->fs_info, &key);
4095 if (IS_ERR(tmp_root)) {
4099 ret = check_fs_root(tmp_root, root_cache, &wc);
4100 if (ret == -EAGAIN) {
4101 free_root_recs_tree(root_cache);
4102 btrfs_release_path(&path);
4107 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108 btrfs_free_fs_root(tmp_root);
4109 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110 key.type == BTRFS_ROOT_BACKREF_KEY) {
4111 process_root_ref(leaf, path.slots[0], &key,
4118 btrfs_release_path(&path);
4120 free_extent_cache_tree(&wc.shared);
4121 if (!cache_tree_empty(&wc.shared))
4122 fprintf(stderr, "warning line %d\n", __LINE__);
4124 task_stop(ctx.info);
4130 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131 * INODE_REF/INODE_EXTREF match.
4133 * @root: the root of the fs/file tree
4134 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4135 * @key: the key of the DIR_ITEM/DIR_INDEX
4136 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4137 * distinguish root_dir between normal dir/file
4138 * @name: the name in the INODE_REF/INODE_EXTREF
4139 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4140 * @mode: the st_mode of INODE_ITEM
4142 * Return 0 if no error occurred.
4143 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4146 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147 * not match for normal dir/file.
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150 struct btrfs_key *key, u64 index, char *name,
4151 u32 namelen, u32 mode)
4153 struct btrfs_path path;
4154 struct extent_buffer *node;
4155 struct btrfs_dir_item *di;
4156 struct btrfs_key location;
4157 char namebuf[BTRFS_NAME_LEN] = {0};
4167 btrfs_init_path(&path);
4168 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170 ret = DIR_ITEM_MISSING;
4174 /* Process root dir and goto out*/
4177 ret = ROOT_DIR_ERROR;
4179 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4181 ref_key->type == BTRFS_INODE_REF_KEY ?
4183 ref_key->objectid, ref_key->offset,
4184 key->type == BTRFS_DIR_ITEM_KEY ?
4185 "DIR_ITEM" : "DIR_INDEX");
4193 /* Process normal file/dir */
4195 ret = DIR_ITEM_MISSING;
4197 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4199 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200 ref_key->objectid, ref_key->offset,
4201 key->type == BTRFS_DIR_ITEM_KEY ?
4202 "DIR_ITEM" : "DIR_INDEX",
4203 key->objectid, key->offset, namelen, name,
4204 imode_to_type(mode));
4208 /* Check whether inode_id/filetype/name match */
4209 node = path.nodes[0];
4210 slot = path.slots[0];
4211 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212 total = btrfs_item_size_nr(node, slot);
4213 while (cur < total) {
4214 ret = DIR_ITEM_MISMATCH;
4215 name_len = btrfs_dir_name_len(node, di);
4216 data_len = btrfs_dir_data_len(node, di);
4218 btrfs_dir_item_key_to_cpu(node, di, &location);
4219 if (location.objectid != ref_key->objectid ||
4220 location.type != BTRFS_INODE_ITEM_KEY ||
4221 location.offset != 0)
4224 filetype = btrfs_dir_type(node, di);
4225 if (imode_to_type(mode) != filetype)
4228 if (name_len <= BTRFS_NAME_LEN) {
4231 len = BTRFS_NAME_LEN;
4232 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4234 key->type == BTRFS_DIR_ITEM_KEY ?
4235 "DIR_ITEM" : "DIR_INDEX",
4236 key->objectid, key->offset, name_len);
4238 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239 if (len != namelen || strncmp(namebuf, name, len))
4245 len = sizeof(*di) + name_len + data_len;
4246 di = (struct btrfs_dir_item *)((char *)di + len);
4249 if (ret == DIR_ITEM_MISMATCH)
4251 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4253 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254 ref_key->objectid, ref_key->offset,
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, namelen, name,
4258 imode_to_type(mode));
4260 btrfs_release_path(&path);
4265 * Traverse the given INODE_REF and call find_dir_item() to find related
4266 * DIR_ITEM/DIR_INDEX.
4268 * @root: the root of the fs/file tree
4269 * @ref_key: the key of the INODE_REF
4270 * @refs: the count of INODE_REF
4271 * @mode: the st_mode of INODE_ITEM
4273 * Return 0 if no error occurred.
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276 struct extent_buffer *node, int slot, u64 *refs,
4279 struct btrfs_key key;
4280 struct btrfs_inode_ref *ref;
4281 char namebuf[BTRFS_NAME_LEN] = {0};
4289 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290 total = btrfs_item_size_nr(node, slot);
4293 /* Update inode ref count */
4296 index = btrfs_inode_ref_index(node, ref);
4297 name_len = btrfs_inode_ref_name_len(node, ref);
4298 if (name_len <= BTRFS_NAME_LEN) {
4301 len = BTRFS_NAME_LEN;
4302 warning("root %llu INODE_REF[%llu %llu] name too long",
4303 root->objectid, ref_key->objectid, ref_key->offset);
4306 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4308 /* Check root dir ref name */
4309 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311 root->objectid, ref_key->objectid, ref_key->offset,
4313 err |= ROOT_DIR_ERROR;
4316 /* Find related DIR_INDEX */
4317 key.objectid = ref_key->offset;
4318 key.type = BTRFS_DIR_INDEX_KEY;
4320 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4323 /* Find related dir_item */
4324 key.objectid = ref_key->offset;
4325 key.type = BTRFS_DIR_ITEM_KEY;
4326 key.offset = btrfs_name_hash(namebuf, len);
4327 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4330 len = sizeof(*ref) + name_len;
4331 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4340 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341 * DIR_ITEM/DIR_INDEX.
4343 * @root: the root of the fs/file tree
4344 * @ref_key: the key of the INODE_EXTREF
4345 * @refs: the count of INODE_EXTREF
4346 * @mode: the st_mode of INODE_ITEM
4348 * Return 0 if no error occurred.
4350 static int check_inode_extref(struct btrfs_root *root,
4351 struct btrfs_key *ref_key,
4352 struct extent_buffer *node, int slot, u64 *refs,
4355 struct btrfs_key key;
4356 struct btrfs_inode_extref *extref;
4357 char namebuf[BTRFS_NAME_LEN] = {0};
4367 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368 total = btrfs_item_size_nr(node, slot);
4371 /* update inode ref count */
4373 name_len = btrfs_inode_extref_name_len(node, extref);
4374 index = btrfs_inode_extref_index(node, extref);
4375 parent = btrfs_inode_extref_parent(node, extref);
4376 if (name_len <= BTRFS_NAME_LEN) {
4379 len = BTRFS_NAME_LEN;
4380 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381 root->objectid, ref_key->objectid, ref_key->offset);
4383 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4385 /* Check root dir ref name */
4386 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388 root->objectid, ref_key->objectid, ref_key->offset,
4390 err |= ROOT_DIR_ERROR;
4393 /* find related dir_index */
4394 key.objectid = parent;
4395 key.type = BTRFS_DIR_INDEX_KEY;
4397 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4400 /* find related dir_item */
4401 key.objectid = parent;
4402 key.type = BTRFS_DIR_ITEM_KEY;
4403 key.offset = btrfs_name_hash(namebuf, len);
4404 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4407 len = sizeof(*extref) + name_len;
4408 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4418 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419 * DIR_ITEM/DIR_INDEX match.
4421 * @root: the root of the fs/file tree
4422 * @key: the key of the INODE_REF/INODE_EXTREF
4423 * @name: the name in the INODE_REF/INODE_EXTREF
4424 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4425 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4427 * @ext_ref: the EXTENDED_IREF feature
4429 * Return 0 if no error occurred.
4430 * Return >0 for error bitmap
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433 char *name, int namelen, u64 index,
4434 unsigned int ext_ref)
4436 struct btrfs_path path;
4437 struct btrfs_inode_ref *ref;
4438 struct btrfs_inode_extref *extref;
4439 struct extent_buffer *node;
4440 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4451 btrfs_init_path(&path);
4452 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4454 ret = INODE_REF_MISSING;
4458 node = path.nodes[0];
4459 slot = path.slots[0];
4461 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462 total = btrfs_item_size_nr(node, slot);
4464 /* Iterate all entry of INODE_REF */
4465 while (cur < total) {
4466 ret = INODE_REF_MISSING;
4468 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469 ref_index = btrfs_inode_ref_index(node, ref);
4470 if (index != (u64)-1 && index != ref_index)
4473 if (ref_namelen <= BTRFS_NAME_LEN) {
4476 len = BTRFS_NAME_LEN;
4477 warning("root %llu INODE %s[%llu %llu] name too long",
4479 key->type == BTRFS_INODE_REF_KEY ?
4481 key->objectid, key->offset);
4483 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4486 if (len != namelen || strncmp(ref_namebuf, name, len))
4492 len = sizeof(*ref) + ref_namelen;
4493 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4498 /* Skip if not support EXTENDED_IREF feature */
4502 btrfs_release_path(&path);
4503 btrfs_init_path(&path);
4505 dir_id = key->offset;
4506 key->type = BTRFS_INODE_EXTREF_KEY;
4507 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4509 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4511 ret = INODE_REF_MISSING;
4515 node = path.nodes[0];
4516 slot = path.slots[0];
4518 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4520 total = btrfs_item_size_nr(node, slot);
4522 /* Iterate all entry of INODE_EXTREF */
4523 while (cur < total) {
4524 ret = INODE_REF_MISSING;
4526 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527 ref_index = btrfs_inode_extref_index(node, extref);
4528 parent = btrfs_inode_extref_parent(node, extref);
4529 if (index != (u64)-1 && index != ref_index)
4532 if (parent != dir_id)
4535 if (ref_namelen <= BTRFS_NAME_LEN) {
4538 len = BTRFS_NAME_LEN;
4539 warning("root %llu INODE %s[%llu %llu] name too long",
4541 key->type == BTRFS_INODE_REF_KEY ?
4543 key->objectid, key->offset);
4545 read_extent_buffer(node, ref_namebuf,
4546 (unsigned long)(extref + 1), len);
4548 if (len != namelen || strncmp(ref_namebuf, name, len))
4555 len = sizeof(*extref) + ref_namelen;
4556 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4561 btrfs_release_path(&path);
4566 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4569 * @root: the root of the fs/file tree
4570 * @key: the key of the INODE_REF/INODE_EXTREF
4571 * @size: the st_size of the INODE_ITEM
4572 * @ext_ref: the EXTENDED_IREF feature
4574 * Return 0 if no error occurred.
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577 struct extent_buffer *node, int slot, u64 *size,
4578 unsigned int ext_ref)
4580 struct btrfs_dir_item *di;
4581 struct btrfs_inode_item *ii;
4582 struct btrfs_path path;
4583 struct btrfs_key location;
4584 char namebuf[BTRFS_NAME_LEN] = {0};
4597 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598 * ignore index check.
4600 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4602 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603 total = btrfs_item_size_nr(node, slot);
4605 while (cur < total) {
4606 data_len = btrfs_dir_data_len(node, di);
4608 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610 "DIR_ITEM" : "DIR_INDEX",
4611 key->objectid, key->offset, data_len);
4613 name_len = btrfs_dir_name_len(node, di);
4614 if (name_len <= BTRFS_NAME_LEN) {
4617 len = BTRFS_NAME_LEN;
4618 warning("root %llu %s[%llu %llu] name too long",
4620 key->type == BTRFS_DIR_ITEM_KEY ?
4621 "DIR_ITEM" : "DIR_INDEX",
4622 key->objectid, key->offset);
4624 (*size) += name_len;
4626 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627 filetype = btrfs_dir_type(node, di);
4629 btrfs_init_path(&path);
4630 btrfs_dir_item_key_to_cpu(node, di, &location);
4632 /* Ignore related ROOT_ITEM check */
4633 if (location.type == BTRFS_ROOT_ITEM_KEY)
4636 /* Check relative INODE_ITEM(existence/filetype) */
4637 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4639 err |= INODE_ITEM_MISSING;
4640 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643 key->offset, location.objectid, name_len,
4648 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649 struct btrfs_inode_item);
4650 mode = btrfs_inode_mode(path.nodes[0], ii);
4652 if (imode_to_type(mode) != filetype) {
4653 err |= INODE_ITEM_MISMATCH;
4654 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657 key->offset, name_len, namebuf, filetype);
4660 /* Check relative INODE_REF/INODE_EXTREF */
4661 location.type = BTRFS_INODE_REF_KEY;
4662 location.offset = key->objectid;
4663 ret = find_inode_ref(root, &location, namebuf, len,
4666 if (ret & INODE_REF_MISSING)
4667 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670 key->offset, name_len, namebuf, filetype);
4673 btrfs_release_path(&path);
4674 len = sizeof(*di) + name_len + data_len;
4675 di = (struct btrfs_dir_item *)((char *)di + len);
4678 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680 root->objectid, key->objectid, key->offset);
4689 * Check file extent datasum/hole, update the size of the file extents,
4690 * check and update the last offset of the file extent.
4692 * @root: the root of fs/file tree.
4693 * @fkey: the key of the file extent.
4694 * @nodatasum: INODE_NODATASUM feature.
4695 * @size: the sum of all EXTENT_DATA items size for this inode.
4696 * @end: the offset of the last extent.
4698 * Return 0 if no error occurred.
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701 struct extent_buffer *node, int slot,
4702 unsigned int nodatasum, u64 *size, u64 *end)
4704 struct btrfs_file_extent_item *fi;
4707 u64 extent_num_bytes;
4709 u64 csum_found; /* In byte size, sectorsize aligned */
4710 u64 search_start; /* Logical range start we search for csum */
4711 u64 search_len; /* Logical range len we search for csum */
4712 unsigned int extent_type;
4713 unsigned int is_hole;
4718 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4720 /* Check inline extent */
4721 extent_type = btrfs_file_extent_type(node, fi);
4722 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723 struct btrfs_item *e = btrfs_item_nr(slot);
4724 u32 item_inline_len;
4726 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728 compressed = btrfs_file_extent_compression(node, fi);
4729 if (extent_num_bytes == 0) {
4731 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732 root->objectid, fkey->objectid, fkey->offset);
4733 err |= FILE_EXTENT_ERROR;
4735 if (!compressed && extent_num_bytes != item_inline_len) {
4737 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738 root->objectid, fkey->objectid, fkey->offset,
4739 extent_num_bytes, item_inline_len);
4740 err |= FILE_EXTENT_ERROR;
4742 *size += extent_num_bytes;
4746 /* Check extent type */
4747 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749 err |= FILE_EXTENT_ERROR;
4750 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751 root->objectid, fkey->objectid, fkey->offset);
4755 /* Check REG_EXTENT/PREALLOC_EXTENT */
4756 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759 extent_offset = btrfs_file_extent_offset(node, fi);
4760 compressed = btrfs_file_extent_compression(node, fi);
4761 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4764 * Check EXTENT_DATA csum
4766 * For plain (uncompressed) extent, we should only check the range
4767 * we're referring to, as it's possible that part of prealloc extent
4768 * has been written, and has csum:
4770 * |<--- Original large preallocated extent A ---->|
4771 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4774 * For compressed extent, we should check the whole range.
4777 search_start = disk_bytenr + extent_offset;
4778 search_len = extent_num_bytes;
4780 search_start = disk_bytenr;
4781 search_len = disk_num_bytes;
4783 ret = count_csum_range(root, search_start, search_len, &csum_found);
4784 if (csum_found > 0 && nodatasum) {
4785 err |= ODD_CSUM_ITEM;
4786 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787 root->objectid, fkey->objectid, fkey->offset);
4788 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789 !is_hole && (ret < 0 || csum_found < search_len)) {
4790 err |= CSUM_ITEM_MISSING;
4791 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792 root->objectid, fkey->objectid, fkey->offset,
4793 csum_found, search_len);
4794 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795 err |= ODD_CSUM_ITEM;
4796 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797 root->objectid, fkey->objectid, fkey->offset, csum_found);
4800 /* Check EXTENT_DATA hole */
4801 if (no_holes && is_hole) {
4802 err |= FILE_EXTENT_ERROR;
4803 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804 root->objectid, fkey->objectid, fkey->offset);
4805 } else if (!no_holes && *end != fkey->offset) {
4806 err |= FILE_EXTENT_ERROR;
4807 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808 root->objectid, fkey->objectid, fkey->offset);
4811 *end += extent_num_bytes;
4813 *size += extent_num_bytes;
4819 * Check INODE_ITEM and related ITEMs (the same inode number)
4820 * 1. check link count
4821 * 2. check inode ref/extref
4822 * 3. check dir item/index
4824 * @ext_ref: the EXTENDED_IREF feature
4826 * Return 0 if no error occurred.
4827 * Return >0 for error or hit the traversal is done(by error bitmap)
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830 unsigned int ext_ref)
4832 struct extent_buffer *node;
4833 struct btrfs_inode_item *ii;
4834 struct btrfs_key key;
4843 u64 extent_size = 0;
4845 unsigned int nodatasum;
4850 node = path->nodes[0];
4851 slot = path->slots[0];
4853 btrfs_item_key_to_cpu(node, &key, slot);
4854 inode_id = key.objectid;
4856 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857 ret = btrfs_next_item(root, path);
4863 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864 isize = btrfs_inode_size(node, ii);
4865 nbytes = btrfs_inode_nbytes(node, ii);
4866 mode = btrfs_inode_mode(node, ii);
4867 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868 nlink = btrfs_inode_nlink(node, ii);
4869 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4872 ret = btrfs_next_item(root, path);
4874 /* out will fill 'err' rusing current statistics */
4876 } else if (ret > 0) {
4881 node = path->nodes[0];
4882 slot = path->slots[0];
4883 btrfs_item_key_to_cpu(node, &key, slot);
4884 if (key.objectid != inode_id)
4888 case BTRFS_INODE_REF_KEY:
4889 ret = check_inode_ref(root, &key, node, slot, &refs,
4893 case BTRFS_INODE_EXTREF_KEY:
4894 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896 root->objectid, key.objectid,
4898 ret = check_inode_extref(root, &key, node, slot, &refs,
4902 case BTRFS_DIR_ITEM_KEY:
4903 case BTRFS_DIR_INDEX_KEY:
4905 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906 root->objectid, inode_id,
4907 imode_to_type(mode), key.objectid,
4910 ret = check_dir_item(root, &key, node, slot, &size,
4914 case BTRFS_EXTENT_DATA_KEY:
4916 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917 root->objectid, inode_id, key.objectid,
4920 ret = check_file_extent(root, &key, node, slot,
4921 nodatasum, &extent_size,
4925 case BTRFS_XATTR_ITEM_KEY:
4928 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929 key.objectid, key.type, key.offset);
4934 /* verify INODE_ITEM nlink/isize/nbytes */
4937 err |= LINK_COUNT_ERROR;
4938 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939 root->objectid, inode_id, nlink);
4943 * Just a warning, as dir inode nbytes is just an
4944 * instructive value.
4946 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948 root->objectid, inode_id, root->nodesize);
4951 if (isize != size) {
4953 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954 root->objectid, inode_id, isize, size);
4957 if (nlink != refs) {
4958 err |= LINK_COUNT_ERROR;
4959 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960 root->objectid, inode_id, nlink, refs);
4961 } else if (!nlink) {
4965 if (!nbytes && !no_holes && extent_end < isize) {
4966 err |= NBYTES_ERROR;
4967 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968 root->objectid, inode_id, isize);
4971 if (nbytes != extent_size) {
4972 err |= NBYTES_ERROR;
4973 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974 root->objectid, inode_id, nbytes, extent_size);
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4983 struct btrfs_path path;
4984 struct btrfs_key key;
4988 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989 key.type = BTRFS_INODE_ITEM_KEY;
4992 /* For root being dropped, we don't need to check first inode */
4993 if (btrfs_root_refs(&root->root_item) == 0 &&
4994 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4998 btrfs_init_path(&path);
5000 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5005 err |= INODE_ITEM_MISSING;
5006 error("first inode item of root %llu is missing",
5010 err |= check_inode_item(root, &path, ext_ref);
5015 btrfs_release_path(&path);
5020 * Iterate all item on the tree and call check_inode_item() to check.
5022 * @root: the root of the tree to be checked.
5023 * @ext_ref: the EXTENDED_IREF feature
5025 * Return 0 if no error found.
5026 * Return <0 for error.
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5030 struct btrfs_path path;
5031 struct node_refs nrefs;
5032 struct btrfs_root_item *root_item = &root->root_item;
5038 * We need to manually check the first inode item(256)
5039 * As the following traversal function will only start from
5040 * the first inode item in the leaf, if inode item(256) is missing
5041 * we will just skip it forever.
5043 ret = check_fs_first_inode(root, ext_ref);
5047 memset(&nrefs, 0, sizeof(nrefs));
5048 level = btrfs_header_level(root->node);
5049 btrfs_init_path(&path);
5051 if (btrfs_root_refs(root_item) > 0 ||
5052 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053 path.nodes[level] = root->node;
5054 path.slots[level] = 0;
5055 extent_buffer_get(root->node);
5057 struct btrfs_key key;
5059 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060 level = root_item->drop_level;
5061 path.lowest_level = level;
5062 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5069 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5072 /* if ret is negative, walk shall stop */
5078 ret = walk_up_tree_v2(root, &path, &level);
5080 /* Normal exit, reset ret to err */
5087 btrfs_release_path(&path);
5092 * Find the relative ref for root_ref and root_backref.
5094 * @root: the root of the root tree.
5095 * @ref_key: the key of the root ref.
5097 * Return 0 if no error occurred.
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100 struct extent_buffer *node, int slot)
5102 struct btrfs_path path;
5103 struct btrfs_key key;
5104 struct btrfs_root_ref *ref;
5105 struct btrfs_root_ref *backref;
5106 char ref_name[BTRFS_NAME_LEN] = {0};
5107 char backref_name[BTRFS_NAME_LEN] = {0};
5113 u32 backref_namelen;
5118 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119 ref_dirid = btrfs_root_ref_dirid(node, ref);
5120 ref_seq = btrfs_root_ref_sequence(node, ref);
5121 ref_namelen = btrfs_root_ref_name_len(node, ref);
5123 if (ref_namelen <= BTRFS_NAME_LEN) {
5126 len = BTRFS_NAME_LEN;
5127 warning("%s[%llu %llu] ref_name too long",
5128 ref_key->type == BTRFS_ROOT_REF_KEY ?
5129 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5132 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5134 /* Find relative root_ref */
5135 key.objectid = ref_key->offset;
5136 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137 key.offset = ref_key->objectid;
5139 btrfs_init_path(&path);
5140 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5142 err |= ROOT_REF_MISSING;
5143 error("%s[%llu %llu] couldn't find relative ref",
5144 ref_key->type == BTRFS_ROOT_REF_KEY ?
5145 "ROOT_REF" : "ROOT_BACKREF",
5146 ref_key->objectid, ref_key->offset);
5150 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151 struct btrfs_root_ref);
5152 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5156 if (backref_namelen <= BTRFS_NAME_LEN) {
5157 len = backref_namelen;
5159 len = BTRFS_NAME_LEN;
5160 warning("%s[%llu %llu] ref_name too long",
5161 key.type == BTRFS_ROOT_REF_KEY ?
5162 "ROOT_REF" : "ROOT_BACKREF",
5163 key.objectid, key.offset);
5165 read_extent_buffer(path.nodes[0], backref_name,
5166 (unsigned long)(backref + 1), len);
5168 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169 ref_namelen != backref_namelen ||
5170 strncmp(ref_name, backref_name, len)) {
5171 err |= ROOT_REF_MISMATCH;
5172 error("%s[%llu %llu] mismatch relative ref",
5173 ref_key->type == BTRFS_ROOT_REF_KEY ?
5174 "ROOT_REF" : "ROOT_BACKREF",
5175 ref_key->objectid, ref_key->offset);
5178 btrfs_release_path(&path);
5183 * Check all fs/file tree in low_memory mode.
5185 * 1. for fs tree root item, call check_fs_root_v2()
5186 * 2. for fs tree root ref/backref, call check_root_ref()
5188 * Return 0 if no error occurred.
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5192 struct btrfs_root *tree_root = fs_info->tree_root;
5193 struct btrfs_root *cur_root = NULL;
5194 struct btrfs_path path;
5195 struct btrfs_key key;
5196 struct extent_buffer *node;
5197 unsigned int ext_ref;
5202 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5204 btrfs_init_path(&path);
5205 key.objectid = BTRFS_FS_TREE_OBJECTID;
5207 key.type = BTRFS_ROOT_ITEM_KEY;
5209 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5213 } else if (ret > 0) {
5219 node = path.nodes[0];
5220 slot = path.slots[0];
5221 btrfs_item_key_to_cpu(node, &key, slot);
5222 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5224 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225 fs_root_objectid(key.objectid)) {
5226 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5230 key.offset = (u64)-1;
5231 cur_root = btrfs_read_fs_root(fs_info, &key);
5234 if (IS_ERR(cur_root)) {
5235 error("Fail to read fs/subvol tree: %lld",
5241 ret = check_fs_root_v2(cur_root, ext_ref);
5244 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245 btrfs_free_fs_root(cur_root);
5246 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248 ret = check_root_ref(tree_root, &key, node, slot);
5252 ret = btrfs_next_item(tree_root, &path);
5262 btrfs_release_path(&path);
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5268 struct list_head *cur = rec->backrefs.next;
5269 struct extent_backref *back;
5270 struct tree_backref *tback;
5271 struct data_backref *dback;
5275 while(cur != &rec->backrefs) {
5276 back = to_extent_backref(cur);
5278 if (!back->found_extent_tree) {
5282 if (back->is_data) {
5283 dback = to_data_backref(back);
5284 fprintf(stderr, "Backref %llu %s %llu"
5285 " owner %llu offset %llu num_refs %lu"
5286 " not found in extent tree\n",
5287 (unsigned long long)rec->start,
5288 back->full_backref ?
5290 back->full_backref ?
5291 (unsigned long long)dback->parent:
5292 (unsigned long long)dback->root,
5293 (unsigned long long)dback->owner,
5294 (unsigned long long)dback->offset,
5295 (unsigned long)dback->num_refs);
5297 tback = to_tree_backref(back);
5298 fprintf(stderr, "Backref %llu parent %llu"
5299 " root %llu not found in extent tree\n",
5300 (unsigned long long)rec->start,
5301 (unsigned long long)tback->parent,
5302 (unsigned long long)tback->root);
5305 if (!back->is_data && !back->found_ref) {
5309 tback = to_tree_backref(back);
5310 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311 (unsigned long long)rec->start,
5312 back->full_backref ? "parent" : "root",
5313 back->full_backref ?
5314 (unsigned long long)tback->parent :
5315 (unsigned long long)tback->root, back);
5317 if (back->is_data) {
5318 dback = to_data_backref(back);
5319 if (dback->found_ref != dback->num_refs) {
5323 fprintf(stderr, "Incorrect local backref count"
5324 " on %llu %s %llu owner %llu"
5325 " offset %llu found %u wanted %u back %p\n",
5326 (unsigned long long)rec->start,
5327 back->full_backref ?
5329 back->full_backref ?
5330 (unsigned long long)dback->parent:
5331 (unsigned long long)dback->root,
5332 (unsigned long long)dback->owner,
5333 (unsigned long long)dback->offset,
5334 dback->found_ref, dback->num_refs, back);
5336 if (dback->disk_bytenr != rec->start) {
5340 fprintf(stderr, "Backref disk bytenr does not"
5341 " match extent record, bytenr=%llu, "
5342 "ref bytenr=%llu\n",
5343 (unsigned long long)rec->start,
5344 (unsigned long long)dback->disk_bytenr);
5347 if (dback->bytes != rec->nr) {
5351 fprintf(stderr, "Backref bytes do not match "
5352 "extent backref, bytenr=%llu, ref "
5353 "bytes=%llu, backref bytes=%llu\n",
5354 (unsigned long long)rec->start,
5355 (unsigned long long)rec->nr,
5356 (unsigned long long)dback->bytes);
5359 if (!back->is_data) {
5362 dback = to_data_backref(back);
5363 found += dback->found_ref;
5366 if (found != rec->refs) {
5370 fprintf(stderr, "Incorrect global backref count "
5371 "on %llu found %llu wanted %llu\n",
5372 (unsigned long long)rec->start,
5373 (unsigned long long)found,
5374 (unsigned long long)rec->refs);
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5382 struct extent_backref *back;
5383 struct list_head *cur;
5384 while (!list_empty(&rec->backrefs)) {
5385 cur = rec->backrefs.next;
5386 back = to_extent_backref(cur);
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5395 struct cache_extent *cache;
5396 struct extent_record *rec;
5399 cache = first_cache_extent(extent_cache);
5402 rec = container_of(cache, struct extent_record, cache);
5403 remove_cache_extent(extent_cache, cache);
5404 free_all_extent_backrefs(rec);
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410 struct extent_record *rec)
5412 if (rec->content_checked && rec->owner_ref_checked &&
5413 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415 !rec->bad_full_backref && !rec->crossing_stripes &&
5416 !rec->wrong_chunk_type) {
5417 remove_cache_extent(extent_cache, &rec->cache);
5418 free_all_extent_backrefs(rec);
5419 list_del_init(&rec->list);
5425 static int check_owner_ref(struct btrfs_root *root,
5426 struct extent_record *rec,
5427 struct extent_buffer *buf)
5429 struct extent_backref *node;
5430 struct tree_backref *back;
5431 struct btrfs_root *ref_root;
5432 struct btrfs_key key;
5433 struct btrfs_path path;
5434 struct extent_buffer *parent;
5439 list_for_each_entry(node, &rec->backrefs, list) {
5442 if (!node->found_ref)
5444 if (node->full_backref)
5446 back = to_tree_backref(node);
5447 if (btrfs_header_owner(buf) == back->root)
5450 BUG_ON(rec->is_root);
5452 /* try to find the block by search corresponding fs tree */
5453 key.objectid = btrfs_header_owner(buf);
5454 key.type = BTRFS_ROOT_ITEM_KEY;
5455 key.offset = (u64)-1;
5457 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458 if (IS_ERR(ref_root))
5461 level = btrfs_header_level(buf);
5463 btrfs_item_key_to_cpu(buf, &key, 0);
5465 btrfs_node_key_to_cpu(buf, &key, 0);
5467 btrfs_init_path(&path);
5468 path.lowest_level = level + 1;
5469 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5473 parent = path.nodes[level + 1];
5474 if (parent && buf->start == btrfs_node_blockptr(parent,
5475 path.slots[level + 1]))
5478 btrfs_release_path(&path);
5479 return found ? 0 : 1;
5482 static int is_extent_tree_record(struct extent_record *rec)
5484 struct list_head *cur = rec->backrefs.next;
5485 struct extent_backref *node;
5486 struct tree_backref *back;
5489 while(cur != &rec->backrefs) {
5490 node = to_extent_backref(cur);
5494 back = to_tree_backref(node);
5495 if (node->full_backref)
5497 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505 struct cache_tree *extent_cache,
5508 struct extent_record *rec;
5509 struct cache_extent *cache;
5510 struct btrfs_key key;
5512 cache = lookup_cache_extent(extent_cache, start, len);
5516 rec = container_of(cache, struct extent_record, cache);
5517 if (!is_extent_tree_record(rec))
5520 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525 struct extent_buffer *buf, int slot)
5527 if (btrfs_header_level(buf)) {
5528 struct btrfs_key_ptr ptr1, ptr2;
5530 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531 sizeof(struct btrfs_key_ptr));
5532 read_extent_buffer(buf, &ptr2,
5533 btrfs_node_key_ptr_offset(slot + 1),
5534 sizeof(struct btrfs_key_ptr));
5535 write_extent_buffer(buf, &ptr1,
5536 btrfs_node_key_ptr_offset(slot + 1),
5537 sizeof(struct btrfs_key_ptr));
5538 write_extent_buffer(buf, &ptr2,
5539 btrfs_node_key_ptr_offset(slot),
5540 sizeof(struct btrfs_key_ptr));
5542 struct btrfs_disk_key key;
5543 btrfs_node_key(buf, &key, 0);
5544 btrfs_fixup_low_keys(root, path, &key,
5545 btrfs_header_level(buf) + 1);
5548 struct btrfs_item *item1, *item2;
5549 struct btrfs_key k1, k2;
5550 char *item1_data, *item2_data;
5551 u32 item1_offset, item2_offset, item1_size, item2_size;
5553 item1 = btrfs_item_nr(slot);
5554 item2 = btrfs_item_nr(slot + 1);
5555 btrfs_item_key_to_cpu(buf, &k1, slot);
5556 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557 item1_offset = btrfs_item_offset(buf, item1);
5558 item2_offset = btrfs_item_offset(buf, item2);
5559 item1_size = btrfs_item_size(buf, item1);
5560 item2_size = btrfs_item_size(buf, item2);
5562 item1_data = malloc(item1_size);
5565 item2_data = malloc(item2_size);
5571 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5574 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5579 btrfs_set_item_offset(buf, item1, item2_offset);
5580 btrfs_set_item_offset(buf, item2, item1_offset);
5581 btrfs_set_item_size(buf, item1, item2_size);
5582 btrfs_set_item_size(buf, item2, item1_size);
5584 path->slots[0] = slot;
5585 btrfs_set_item_key_unsafe(root, path, &k2);
5586 path->slots[0] = slot + 1;
5587 btrfs_set_item_key_unsafe(root, path, &k1);
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5594 struct extent_buffer *buf;
5595 struct btrfs_key k1, k2;
5597 int level = path->lowest_level;
5600 buf = path->nodes[level];
5601 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5603 btrfs_node_key_to_cpu(buf, &k1, i);
5604 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5606 btrfs_item_key_to_cpu(buf, &k1, i);
5607 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5609 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5611 ret = swap_values(root, path, buf, i);
5614 btrfs_mark_buffer_dirty(buf);
5620 static int delete_bogus_item(struct btrfs_root *root,
5621 struct btrfs_path *path,
5622 struct extent_buffer *buf, int slot)
5624 struct btrfs_key key;
5625 int nritems = btrfs_header_nritems(buf);
5627 btrfs_item_key_to_cpu(buf, &key, slot);
5629 /* These are all the keys we can deal with missing. */
5630 if (key.type != BTRFS_DIR_INDEX_KEY &&
5631 key.type != BTRFS_EXTENT_ITEM_KEY &&
5632 key.type != BTRFS_METADATA_ITEM_KEY &&
5633 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5637 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638 (unsigned long long)key.objectid, key.type,
5639 (unsigned long long)key.offset, slot, buf->start);
5640 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641 btrfs_item_nr_offset(slot + 1),
5642 sizeof(struct btrfs_item) *
5643 (nritems - slot - 1));
5644 btrfs_set_header_nritems(buf, nritems - 1);
5646 struct btrfs_disk_key disk_key;
5648 btrfs_item_key(buf, &disk_key, 0);
5649 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5651 btrfs_mark_buffer_dirty(buf);
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5657 struct extent_buffer *buf;
5661 /* We should only get this for leaves */
5662 BUG_ON(path->lowest_level);
5663 buf = path->nodes[0];
5665 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666 unsigned int shift = 0, offset;
5668 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669 BTRFS_LEAF_DATA_SIZE(root)) {
5670 if (btrfs_item_end_nr(buf, i) >
5671 BTRFS_LEAF_DATA_SIZE(root)) {
5672 ret = delete_bogus_item(root, path, buf, i);
5675 fprintf(stderr, "item is off the end of the "
5676 "leaf, can't fix\n");
5680 shift = BTRFS_LEAF_DATA_SIZE(root) -
5681 btrfs_item_end_nr(buf, i);
5682 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683 btrfs_item_offset_nr(buf, i - 1)) {
5684 if (btrfs_item_end_nr(buf, i) >
5685 btrfs_item_offset_nr(buf, i - 1)) {
5686 ret = delete_bogus_item(root, path, buf, i);
5689 fprintf(stderr, "items overlap, can't fix\n");
5693 shift = btrfs_item_offset_nr(buf, i - 1) -
5694 btrfs_item_end_nr(buf, i);
5699 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700 i, shift, (unsigned long long)buf->start);
5701 offset = btrfs_item_offset_nr(buf, i);
5702 memmove_extent_buffer(buf,
5703 btrfs_leaf_data(buf) + offset + shift,
5704 btrfs_leaf_data(buf) + offset,
5705 btrfs_item_size_nr(buf, i));
5706 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5708 btrfs_mark_buffer_dirty(buf);
5712 * We may have moved things, in which case we want to exit so we don't
5713 * write those changes out. Once we have proper abort functionality in
5714 * progs this can be changed to something nicer.
5721 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5722 * then just return -EIO.
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725 struct extent_buffer *buf,
5726 enum btrfs_tree_block_status status)
5728 struct btrfs_trans_handle *trans;
5729 struct ulist *roots;
5730 struct ulist_node *node;
5731 struct btrfs_root *search_root;
5732 struct btrfs_path path;
5733 struct ulist_iterator iter;
5734 struct btrfs_key root_key, key;
5737 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5741 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5745 btrfs_init_path(&path);
5746 ULIST_ITER_INIT(&iter);
5747 while ((node = ulist_next(roots, &iter))) {
5748 root_key.objectid = node->val;
5749 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750 root_key.offset = (u64)-1;
5752 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5759 trans = btrfs_start_transaction(search_root, 0);
5760 if (IS_ERR(trans)) {
5761 ret = PTR_ERR(trans);
5765 path.lowest_level = btrfs_header_level(buf);
5766 path.skip_check_block = 1;
5767 if (path.lowest_level)
5768 btrfs_node_key_to_cpu(buf, &key, 0);
5770 btrfs_item_key_to_cpu(buf, &key, 0);
5771 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5774 btrfs_commit_transaction(trans, search_root);
5777 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778 ret = fix_key_order(search_root, &path);
5779 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780 ret = fix_item_offset(search_root, &path);
5782 btrfs_commit_transaction(trans, search_root);
5785 btrfs_release_path(&path);
5786 btrfs_commit_transaction(trans, search_root);
5789 btrfs_release_path(&path);
5793 static int check_block(struct btrfs_root *root,
5794 struct cache_tree *extent_cache,
5795 struct extent_buffer *buf, u64 flags)
5797 struct extent_record *rec;
5798 struct cache_extent *cache;
5799 struct btrfs_key key;
5800 enum btrfs_tree_block_status status;
5804 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5807 rec = container_of(cache, struct extent_record, cache);
5808 rec->generation = btrfs_header_generation(buf);
5810 level = btrfs_header_level(buf);
5811 if (btrfs_header_nritems(buf) > 0) {
5814 btrfs_item_key_to_cpu(buf, &key, 0);
5816 btrfs_node_key_to_cpu(buf, &key, 0);
5818 rec->info_objectid = key.objectid;
5820 rec->info_level = level;
5822 if (btrfs_is_leaf(buf))
5823 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5825 status = btrfs_check_node(root, &rec->parent_key, buf);
5827 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5829 status = try_to_fix_bad_block(root, buf, status);
5830 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5832 fprintf(stderr, "bad block %llu\n",
5833 (unsigned long long)buf->start);
5836 * Signal to callers we need to start the scan over
5837 * again since we'll have cowed blocks.
5842 rec->content_checked = 1;
5843 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844 rec->owner_ref_checked = 1;
5846 ret = check_owner_ref(root, rec, buf);
5848 rec->owner_ref_checked = 1;
5852 maybe_free_extent_rec(extent_cache, rec);
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857 u64 parent, u64 root)
5859 struct list_head *cur = rec->backrefs.next;
5860 struct extent_backref *node;
5861 struct tree_backref *back;
5863 while(cur != &rec->backrefs) {
5864 node = to_extent_backref(cur);
5868 back = to_tree_backref(node);
5870 if (!node->full_backref)
5872 if (parent == back->parent)
5875 if (node->full_backref)
5877 if (back->root == root)
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885 u64 parent, u64 root)
5887 struct tree_backref *ref = malloc(sizeof(*ref));
5891 memset(&ref->node, 0, sizeof(ref->node));
5893 ref->parent = parent;
5894 ref->node.full_backref = 1;
5897 ref->node.full_backref = 0;
5899 list_add_tail(&ref->node.list, &rec->backrefs);
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905 u64 parent, u64 root,
5906 u64 owner, u64 offset,
5908 u64 disk_bytenr, u64 bytes)
5910 struct list_head *cur = rec->backrefs.next;
5911 struct extent_backref *node;
5912 struct data_backref *back;
5914 while(cur != &rec->backrefs) {
5915 node = to_extent_backref(cur);
5919 back = to_data_backref(node);
5921 if (!node->full_backref)
5923 if (parent == back->parent)
5926 if (node->full_backref)
5928 if (back->root == root && back->owner == owner &&
5929 back->offset == offset) {
5930 if (found_ref && node->found_ref &&
5931 (back->bytes != bytes ||
5932 back->disk_bytenr != disk_bytenr))
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942 u64 parent, u64 root,
5943 u64 owner, u64 offset,
5946 struct data_backref *ref = malloc(sizeof(*ref));
5950 memset(&ref->node, 0, sizeof(ref->node));
5951 ref->node.is_data = 1;
5954 ref->parent = parent;
5957 ref->node.full_backref = 1;
5961 ref->offset = offset;
5962 ref->node.full_backref = 0;
5964 ref->bytes = max_size;
5967 list_add_tail(&ref->node.list, &rec->backrefs);
5968 if (max_size > rec->max_size)
5969 rec->max_size = max_size;
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5976 struct btrfs_block_group_cache *bg_cache;
5978 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5982 /* data extent, check chunk directly*/
5983 if (!rec->metadata) {
5984 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985 rec->wrong_chunk_type = 1;
5989 /* metadata extent, check the obvious case first */
5990 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991 BTRFS_BLOCK_GROUP_METADATA))) {
5992 rec->wrong_chunk_type = 1;
5997 * Check SYSTEM extent, as it's also marked as metadata, we can only
5998 * make sure it's a SYSTEM extent by its backref
6000 if (!list_empty(&rec->backrefs)) {
6001 struct extent_backref *node;
6002 struct tree_backref *tback;
6005 node = to_extent_backref(rec->backrefs.next);
6006 if (node->is_data) {
6007 /* tree block shouldn't have data backref */
6008 rec->wrong_chunk_type = 1;
6011 tback = container_of(node, struct tree_backref, node);
6013 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6016 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017 if (!(bg_cache->flags & bg_type))
6018 rec->wrong_chunk_type = 1;
6023 * Allocate a new extent record, fill default values from @tmpl and insert int
6024 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025 * the cache, otherwise it fails.
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028 struct extent_record *tmpl)
6030 struct extent_record *rec;
6033 rec = malloc(sizeof(*rec));
6036 rec->start = tmpl->start;
6037 rec->max_size = tmpl->max_size;
6038 rec->nr = max(tmpl->nr, tmpl->max_size);
6039 rec->found_rec = tmpl->found_rec;
6040 rec->content_checked = tmpl->content_checked;
6041 rec->owner_ref_checked = tmpl->owner_ref_checked;
6042 rec->num_duplicates = 0;
6043 rec->metadata = tmpl->metadata;
6044 rec->flag_block_full_backref = FLAG_UNSET;
6045 rec->bad_full_backref = 0;
6046 rec->crossing_stripes = 0;
6047 rec->wrong_chunk_type = 0;
6048 rec->is_root = tmpl->is_root;
6049 rec->refs = tmpl->refs;
6050 rec->extent_item_refs = tmpl->extent_item_refs;
6051 rec->parent_generation = tmpl->parent_generation;
6052 INIT_LIST_HEAD(&rec->backrefs);
6053 INIT_LIST_HEAD(&rec->dups);
6054 INIT_LIST_HEAD(&rec->list);
6055 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6056 rec->cache.start = tmpl->start;
6057 rec->cache.size = tmpl->nr;
6058 ret = insert_cache_extent(extent_cache, &rec->cache);
6063 bytes_used += rec->nr;
6066 rec->crossing_stripes = check_crossing_stripes(global_info,
6067 rec->start, global_info->tree_root->nodesize);
6068 check_extent_type(rec);
6073 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6075 * - refs - if found, increase refs
6076 * - is_root - if found, set
6077 * - content_checked - if found, set
6078 * - owner_ref_checked - if found, set
6080 * If not found, create a new one, initialize and insert.
6082 static int add_extent_rec(struct cache_tree *extent_cache,
6083 struct extent_record *tmpl)
6085 struct extent_record *rec;
6086 struct cache_extent *cache;
6090 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6092 rec = container_of(cache, struct extent_record, cache);
6096 rec->nr = max(tmpl->nr, tmpl->max_size);
6099 * We need to make sure to reset nr to whatever the extent
6100 * record says was the real size, this way we can compare it to
6103 if (tmpl->found_rec) {
6104 if (tmpl->start != rec->start || rec->found_rec) {
6105 struct extent_record *tmp;
6108 if (list_empty(&rec->list))
6109 list_add_tail(&rec->list,
6110 &duplicate_extents);
6113 * We have to do this song and dance in case we
6114 * find an extent record that falls inside of
6115 * our current extent record but does not have
6116 * the same objectid.
6118 tmp = malloc(sizeof(*tmp));
6121 tmp->start = tmpl->start;
6122 tmp->max_size = tmpl->max_size;
6125 tmp->metadata = tmpl->metadata;
6126 tmp->extent_item_refs = tmpl->extent_item_refs;
6127 INIT_LIST_HEAD(&tmp->list);
6128 list_add_tail(&tmp->list, &rec->dups);
6129 rec->num_duplicates++;
6136 if (tmpl->extent_item_refs && !dup) {
6137 if (rec->extent_item_refs) {
6138 fprintf(stderr, "block %llu rec "
6139 "extent_item_refs %llu, passed %llu\n",
6140 (unsigned long long)tmpl->start,
6141 (unsigned long long)
6142 rec->extent_item_refs,
6143 (unsigned long long)tmpl->extent_item_refs);
6145 rec->extent_item_refs = tmpl->extent_item_refs;
6149 if (tmpl->content_checked)
6150 rec->content_checked = 1;
6151 if (tmpl->owner_ref_checked)
6152 rec->owner_ref_checked = 1;
6153 memcpy(&rec->parent_key, &tmpl->parent_key,
6154 sizeof(tmpl->parent_key));
6155 if (tmpl->parent_generation)
6156 rec->parent_generation = tmpl->parent_generation;
6157 if (rec->max_size < tmpl->max_size)
6158 rec->max_size = tmpl->max_size;
6161 * A metadata extent can't cross stripe_len boundary, otherwise
6162 * kernel scrub won't be able to handle it.
6163 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6167 rec->crossing_stripes = check_crossing_stripes(
6168 global_info, rec->start,
6169 global_info->tree_root->nodesize);
6170 check_extent_type(rec);
6171 maybe_free_extent_rec(extent_cache, rec);
6175 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6180 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6181 u64 parent, u64 root, int found_ref)
6183 struct extent_record *rec;
6184 struct tree_backref *back;
6185 struct cache_extent *cache;
6188 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6190 struct extent_record tmpl;
6192 memset(&tmpl, 0, sizeof(tmpl));
6193 tmpl.start = bytenr;
6197 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6201 /* really a bug in cache_extent implement now */
6202 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6207 rec = container_of(cache, struct extent_record, cache);
6208 if (rec->start != bytenr) {
6210 * Several cause, from unaligned bytenr to over lapping extents
6215 back = find_tree_backref(rec, parent, root);
6217 back = alloc_tree_backref(rec, parent, root);
6223 if (back->node.found_ref) {
6224 fprintf(stderr, "Extent back ref already exists "
6225 "for %llu parent %llu root %llu \n",
6226 (unsigned long long)bytenr,
6227 (unsigned long long)parent,
6228 (unsigned long long)root);
6230 back->node.found_ref = 1;
6232 if (back->node.found_extent_tree) {
6233 fprintf(stderr, "Extent back ref already exists "
6234 "for %llu parent %llu root %llu \n",
6235 (unsigned long long)bytenr,
6236 (unsigned long long)parent,
6237 (unsigned long long)root);
6239 back->node.found_extent_tree = 1;
6241 check_extent_type(rec);
6242 maybe_free_extent_rec(extent_cache, rec);
6246 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6247 u64 parent, u64 root, u64 owner, u64 offset,
6248 u32 num_refs, int found_ref, u64 max_size)
6250 struct extent_record *rec;
6251 struct data_backref *back;
6252 struct cache_extent *cache;
6255 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6257 struct extent_record tmpl;
6259 memset(&tmpl, 0, sizeof(tmpl));
6260 tmpl.start = bytenr;
6262 tmpl.max_size = max_size;
6264 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6268 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6273 rec = container_of(cache, struct extent_record, cache);
6274 if (rec->max_size < max_size)
6275 rec->max_size = max_size;
6278 * If found_ref is set then max_size is the real size and must match the
6279 * existing refs. So if we have already found a ref then we need to
6280 * make sure that this ref matches the existing one, otherwise we need
6281 * to add a new backref so we can notice that the backrefs don't match
6282 * and we need to figure out who is telling the truth. This is to
6283 * account for that awful fsync bug I introduced where we'd end up with
6284 * a btrfs_file_extent_item that would have its length include multiple
6285 * prealloc extents or point inside of a prealloc extent.
6287 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6290 back = alloc_data_backref(rec, parent, root, owner, offset,
6296 BUG_ON(num_refs != 1);
6297 if (back->node.found_ref)
6298 BUG_ON(back->bytes != max_size);
6299 back->node.found_ref = 1;
6300 back->found_ref += 1;
6301 back->bytes = max_size;
6302 back->disk_bytenr = bytenr;
6304 rec->content_checked = 1;
6305 rec->owner_ref_checked = 1;
6307 if (back->node.found_extent_tree) {
6308 fprintf(stderr, "Extent back ref already exists "
6309 "for %llu parent %llu root %llu "
6310 "owner %llu offset %llu num_refs %lu\n",
6311 (unsigned long long)bytenr,
6312 (unsigned long long)parent,
6313 (unsigned long long)root,
6314 (unsigned long long)owner,
6315 (unsigned long long)offset,
6316 (unsigned long)num_refs);
6318 back->num_refs = num_refs;
6319 back->node.found_extent_tree = 1;
6321 maybe_free_extent_rec(extent_cache, rec);
6325 static int add_pending(struct cache_tree *pending,
6326 struct cache_tree *seen, u64 bytenr, u32 size)
6329 ret = add_cache_extent(seen, bytenr, size);
6332 add_cache_extent(pending, bytenr, size);
6336 static int pick_next_pending(struct cache_tree *pending,
6337 struct cache_tree *reada,
6338 struct cache_tree *nodes,
6339 u64 last, struct block_info *bits, int bits_nr,
6342 unsigned long node_start = last;
6343 struct cache_extent *cache;
6346 cache = search_cache_extent(reada, 0);
6348 bits[0].start = cache->start;
6349 bits[0].size = cache->size;
6354 if (node_start > 32768)
6355 node_start -= 32768;
6357 cache = search_cache_extent(nodes, node_start);
6359 cache = search_cache_extent(nodes, 0);
6362 cache = search_cache_extent(pending, 0);
6367 bits[ret].start = cache->start;
6368 bits[ret].size = cache->size;
6369 cache = next_cache_extent(cache);
6371 } while (cache && ret < bits_nr);
6377 bits[ret].start = cache->start;
6378 bits[ret].size = cache->size;
6379 cache = next_cache_extent(cache);
6381 } while (cache && ret < bits_nr);
6383 if (bits_nr - ret > 8) {
6384 u64 lookup = bits[0].start + bits[0].size;
6385 struct cache_extent *next;
6386 next = search_cache_extent(pending, lookup);
6388 if (next->start - lookup > 32768)
6390 bits[ret].start = next->start;
6391 bits[ret].size = next->size;
6392 lookup = next->start + next->size;
6396 next = next_cache_extent(next);
6404 static void free_chunk_record(struct cache_extent *cache)
6406 struct chunk_record *rec;
6408 rec = container_of(cache, struct chunk_record, cache);
6409 list_del_init(&rec->list);
6410 list_del_init(&rec->dextents);
6414 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6416 cache_tree_free_extents(chunk_cache, free_chunk_record);
6419 static void free_device_record(struct rb_node *node)
6421 struct device_record *rec;
6423 rec = container_of(node, struct device_record, node);
6427 FREE_RB_BASED_TREE(device_cache, free_device_record);
6429 int insert_block_group_record(struct block_group_tree *tree,
6430 struct block_group_record *bg_rec)
6434 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6438 list_add_tail(&bg_rec->list, &tree->block_groups);
6442 static void free_block_group_record(struct cache_extent *cache)
6444 struct block_group_record *rec;
6446 rec = container_of(cache, struct block_group_record, cache);
6447 list_del_init(&rec->list);
6451 void free_block_group_tree(struct block_group_tree *tree)
6453 cache_tree_free_extents(&tree->tree, free_block_group_record);
6456 int insert_device_extent_record(struct device_extent_tree *tree,
6457 struct device_extent_record *de_rec)
6462 * Device extent is a bit different from the other extents, because
6463 * the extents which belong to the different devices may have the
6464 * same start and size, so we need use the special extent cache
6465 * search/insert functions.
6467 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6471 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6472 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6476 static void free_device_extent_record(struct cache_extent *cache)
6478 struct device_extent_record *rec;
6480 rec = container_of(cache, struct device_extent_record, cache);
6481 if (!list_empty(&rec->chunk_list))
6482 list_del_init(&rec->chunk_list);
6483 if (!list_empty(&rec->device_list))
6484 list_del_init(&rec->device_list);
6488 void free_device_extent_tree(struct device_extent_tree *tree)
6490 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6493 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6494 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6495 struct extent_buffer *leaf, int slot)
6497 struct btrfs_extent_ref_v0 *ref0;
6498 struct btrfs_key key;
6501 btrfs_item_key_to_cpu(leaf, &key, slot);
6502 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6503 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6504 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6507 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6508 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6514 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6515 struct btrfs_key *key,
6518 struct btrfs_chunk *ptr;
6519 struct chunk_record *rec;
6522 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6523 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6525 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6527 fprintf(stderr, "memory allocation failed\n");
6531 INIT_LIST_HEAD(&rec->list);
6532 INIT_LIST_HEAD(&rec->dextents);
6535 rec->cache.start = key->offset;
6536 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6538 rec->generation = btrfs_header_generation(leaf);
6540 rec->objectid = key->objectid;
6541 rec->type = key->type;
6542 rec->offset = key->offset;
6544 rec->length = rec->cache.size;
6545 rec->owner = btrfs_chunk_owner(leaf, ptr);
6546 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6547 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6548 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6549 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6550 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6551 rec->num_stripes = num_stripes;
6552 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6554 for (i = 0; i < rec->num_stripes; ++i) {
6555 rec->stripes[i].devid =
6556 btrfs_stripe_devid_nr(leaf, ptr, i);
6557 rec->stripes[i].offset =
6558 btrfs_stripe_offset_nr(leaf, ptr, i);
6559 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6560 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6567 static int process_chunk_item(struct cache_tree *chunk_cache,
6568 struct btrfs_key *key, struct extent_buffer *eb,
6571 struct chunk_record *rec;
6572 struct btrfs_chunk *chunk;
6575 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6577 * Do extra check for this chunk item,
6579 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6580 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6581 * and owner<->key_type check.
6583 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6586 error("chunk(%llu, %llu) is not valid, ignore it",
6587 key->offset, btrfs_chunk_length(eb, chunk));
6590 rec = btrfs_new_chunk_record(eb, key, slot);
6591 ret = insert_cache_extent(chunk_cache, &rec->cache);
6593 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6594 rec->offset, rec->length);
6601 static int process_device_item(struct rb_root *dev_cache,
6602 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6604 struct btrfs_dev_item *ptr;
6605 struct device_record *rec;
6608 ptr = btrfs_item_ptr(eb,
6609 slot, struct btrfs_dev_item);
6611 rec = malloc(sizeof(*rec));
6613 fprintf(stderr, "memory allocation failed\n");
6617 rec->devid = key->offset;
6618 rec->generation = btrfs_header_generation(eb);
6620 rec->objectid = key->objectid;
6621 rec->type = key->type;
6622 rec->offset = key->offset;
6624 rec->devid = btrfs_device_id(eb, ptr);
6625 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6626 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6628 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6630 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6637 struct block_group_record *
6638 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6641 struct btrfs_block_group_item *ptr;
6642 struct block_group_record *rec;
6644 rec = calloc(1, sizeof(*rec));
6646 fprintf(stderr, "memory allocation failed\n");
6650 rec->cache.start = key->objectid;
6651 rec->cache.size = key->offset;
6653 rec->generation = btrfs_header_generation(leaf);
6655 rec->objectid = key->objectid;
6656 rec->type = key->type;
6657 rec->offset = key->offset;
6659 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6660 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6662 INIT_LIST_HEAD(&rec->list);
6667 static int process_block_group_item(struct block_group_tree *block_group_cache,
6668 struct btrfs_key *key,
6669 struct extent_buffer *eb, int slot)
6671 struct block_group_record *rec;
6674 rec = btrfs_new_block_group_record(eb, key, slot);
6675 ret = insert_block_group_record(block_group_cache, rec);
6677 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6678 rec->objectid, rec->offset);
6685 struct device_extent_record *
6686 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6687 struct btrfs_key *key, int slot)
6689 struct device_extent_record *rec;
6690 struct btrfs_dev_extent *ptr;
6692 rec = calloc(1, sizeof(*rec));
6694 fprintf(stderr, "memory allocation failed\n");
6698 rec->cache.objectid = key->objectid;
6699 rec->cache.start = key->offset;
6701 rec->generation = btrfs_header_generation(leaf);
6703 rec->objectid = key->objectid;
6704 rec->type = key->type;
6705 rec->offset = key->offset;
6707 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6708 rec->chunk_objecteid =
6709 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6711 btrfs_dev_extent_chunk_offset(leaf, ptr);
6712 rec->length = btrfs_dev_extent_length(leaf, ptr);
6713 rec->cache.size = rec->length;
6715 INIT_LIST_HEAD(&rec->chunk_list);
6716 INIT_LIST_HEAD(&rec->device_list);
6722 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6723 struct btrfs_key *key, struct extent_buffer *eb,
6726 struct device_extent_record *rec;
6729 rec = btrfs_new_device_extent_record(eb, key, slot);
6730 ret = insert_device_extent_record(dev_extent_cache, rec);
6733 "Device extent[%llu, %llu, %llu] existed.\n",
6734 rec->objectid, rec->offset, rec->length);
6741 static int process_extent_item(struct btrfs_root *root,
6742 struct cache_tree *extent_cache,
6743 struct extent_buffer *eb, int slot)
6745 struct btrfs_extent_item *ei;
6746 struct btrfs_extent_inline_ref *iref;
6747 struct btrfs_extent_data_ref *dref;
6748 struct btrfs_shared_data_ref *sref;
6749 struct btrfs_key key;
6750 struct extent_record tmpl;
6755 u32 item_size = btrfs_item_size_nr(eb, slot);
6761 btrfs_item_key_to_cpu(eb, &key, slot);
6763 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6765 num_bytes = root->nodesize;
6767 num_bytes = key.offset;
6770 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6771 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6772 key.objectid, root->sectorsize);
6775 if (item_size < sizeof(*ei)) {
6776 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6777 struct btrfs_extent_item_v0 *ei0;
6778 BUG_ON(item_size != sizeof(*ei0));
6779 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6780 refs = btrfs_extent_refs_v0(eb, ei0);
6784 memset(&tmpl, 0, sizeof(tmpl));
6785 tmpl.start = key.objectid;
6786 tmpl.nr = num_bytes;
6787 tmpl.extent_item_refs = refs;
6788 tmpl.metadata = metadata;
6790 tmpl.max_size = num_bytes;
6792 return add_extent_rec(extent_cache, &tmpl);
6795 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6796 refs = btrfs_extent_refs(eb, ei);
6797 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6801 if (metadata && num_bytes != root->nodesize) {
6802 error("ignore invalid metadata extent, length %llu does not equal to %u",
6803 num_bytes, root->nodesize);
6806 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6807 error("ignore invalid data extent, length %llu is not aligned to %u",
6808 num_bytes, root->sectorsize);
6812 memset(&tmpl, 0, sizeof(tmpl));
6813 tmpl.start = key.objectid;
6814 tmpl.nr = num_bytes;
6815 tmpl.extent_item_refs = refs;
6816 tmpl.metadata = metadata;
6818 tmpl.max_size = num_bytes;
6819 add_extent_rec(extent_cache, &tmpl);
6821 ptr = (unsigned long)(ei + 1);
6822 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6823 key.type == BTRFS_EXTENT_ITEM_KEY)
6824 ptr += sizeof(struct btrfs_tree_block_info);
6826 end = (unsigned long)ei + item_size;
6828 iref = (struct btrfs_extent_inline_ref *)ptr;
6829 type = btrfs_extent_inline_ref_type(eb, iref);
6830 offset = btrfs_extent_inline_ref_offset(eb, iref);
6832 case BTRFS_TREE_BLOCK_REF_KEY:
6833 ret = add_tree_backref(extent_cache, key.objectid,
6837 "add_tree_backref failed (extent items tree block): %s",
6840 case BTRFS_SHARED_BLOCK_REF_KEY:
6841 ret = add_tree_backref(extent_cache, key.objectid,
6845 "add_tree_backref failed (extent items shared block): %s",
6848 case BTRFS_EXTENT_DATA_REF_KEY:
6849 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6850 add_data_backref(extent_cache, key.objectid, 0,
6851 btrfs_extent_data_ref_root(eb, dref),
6852 btrfs_extent_data_ref_objectid(eb,
6854 btrfs_extent_data_ref_offset(eb, dref),
6855 btrfs_extent_data_ref_count(eb, dref),
6858 case BTRFS_SHARED_DATA_REF_KEY:
6859 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6860 add_data_backref(extent_cache, key.objectid, offset,
6862 btrfs_shared_data_ref_count(eb, sref),
6866 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6867 key.objectid, key.type, num_bytes);
6870 ptr += btrfs_extent_inline_ref_size(type);
6877 static int check_cache_range(struct btrfs_root *root,
6878 struct btrfs_block_group_cache *cache,
6879 u64 offset, u64 bytes)
6881 struct btrfs_free_space *entry;
6887 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6888 bytenr = btrfs_sb_offset(i);
6889 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6890 cache->key.objectid, bytenr, 0,
6891 &logical, &nr, &stripe_len);
6896 if (logical[nr] + stripe_len <= offset)
6898 if (offset + bytes <= logical[nr])
6900 if (logical[nr] == offset) {
6901 if (stripe_len >= bytes) {
6905 bytes -= stripe_len;
6906 offset += stripe_len;
6907 } else if (logical[nr] < offset) {
6908 if (logical[nr] + stripe_len >=
6913 bytes = (offset + bytes) -
6914 (logical[nr] + stripe_len);
6915 offset = logical[nr] + stripe_len;
6918 * Could be tricky, the super may land in the
6919 * middle of the area we're checking. First
6920 * check the easiest case, it's at the end.
6922 if (logical[nr] + stripe_len >=
6924 bytes = logical[nr] - offset;
6928 /* Check the left side */
6929 ret = check_cache_range(root, cache,
6931 logical[nr] - offset);
6937 /* Now we continue with the right side */
6938 bytes = (offset + bytes) -
6939 (logical[nr] + stripe_len);
6940 offset = logical[nr] + stripe_len;
6947 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6949 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6950 offset, offset+bytes);
6954 if (entry->offset != offset) {
6955 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6960 if (entry->bytes != bytes) {
6961 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6962 bytes, entry->bytes, offset);
6966 unlink_free_space(cache->free_space_ctl, entry);
6971 static int verify_space_cache(struct btrfs_root *root,
6972 struct btrfs_block_group_cache *cache)
6974 struct btrfs_path path;
6975 struct extent_buffer *leaf;
6976 struct btrfs_key key;
6980 root = root->fs_info->extent_root;
6982 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6984 btrfs_init_path(&path);
6985 key.objectid = last;
6987 key.type = BTRFS_EXTENT_ITEM_KEY;
6988 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6993 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6994 ret = btrfs_next_leaf(root, &path);
7002 leaf = path.nodes[0];
7003 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7004 if (key.objectid >= cache->key.offset + cache->key.objectid)
7006 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7007 key.type != BTRFS_METADATA_ITEM_KEY) {
7012 if (last == key.objectid) {
7013 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7014 last = key.objectid + key.offset;
7016 last = key.objectid + root->nodesize;
7021 ret = check_cache_range(root, cache, last,
7022 key.objectid - last);
7025 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7026 last = key.objectid + key.offset;
7028 last = key.objectid + root->nodesize;
7032 if (last < cache->key.objectid + cache->key.offset)
7033 ret = check_cache_range(root, cache, last,
7034 cache->key.objectid +
7035 cache->key.offset - last);
7038 btrfs_release_path(&path);
7041 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7042 fprintf(stderr, "There are still entries left in the space "
7050 static int check_space_cache(struct btrfs_root *root)
7052 struct btrfs_block_group_cache *cache;
7053 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7057 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7058 btrfs_super_generation(root->fs_info->super_copy) !=
7059 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7060 printf("cache and super generation don't match, space cache "
7061 "will be invalidated\n");
7065 if (ctx.progress_enabled) {
7066 ctx.tp = TASK_FREE_SPACE;
7067 task_start(ctx.info);
7071 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7075 start = cache->key.objectid + cache->key.offset;
7076 if (!cache->free_space_ctl) {
7077 if (btrfs_init_free_space_ctl(cache,
7078 root->sectorsize)) {
7083 btrfs_remove_free_space_cache(cache);
7086 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7087 ret = exclude_super_stripes(root, cache);
7089 fprintf(stderr, "could not exclude super stripes: %s\n",
7094 ret = load_free_space_tree(root->fs_info, cache);
7095 free_excluded_extents(root, cache);
7097 fprintf(stderr, "could not load free space tree: %s\n",
7104 ret = load_free_space_cache(root->fs_info, cache);
7109 ret = verify_space_cache(root, cache);
7111 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7112 cache->key.objectid);
7117 task_stop(ctx.info);
7119 return error ? -EINVAL : 0;
7122 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7123 u64 num_bytes, unsigned long leaf_offset,
7124 struct extent_buffer *eb) {
7127 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7129 unsigned long csum_offset;
7133 u64 data_checked = 0;
7139 if (num_bytes % root->sectorsize)
7142 data = malloc(num_bytes);
7146 while (offset < num_bytes) {
7149 read_len = num_bytes - offset;
7150 /* read as much space once a time */
7151 ret = read_extent_data(root, data + offset,
7152 bytenr + offset, &read_len, mirror);
7156 /* verify every 4k data's checksum */
7157 while (data_checked < read_len) {
7159 tmp = offset + data_checked;
7161 csum = btrfs_csum_data((char *)data + tmp,
7162 csum, root->sectorsize);
7163 btrfs_csum_final(csum, (u8 *)&csum);
7165 csum_offset = leaf_offset +
7166 tmp / root->sectorsize * csum_size;
7167 read_extent_buffer(eb, (char *)&csum_expected,
7168 csum_offset, csum_size);
7169 /* try another mirror */
7170 if (csum != csum_expected) {
7171 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7172 mirror, bytenr + tmp,
7173 csum, csum_expected);
7174 num_copies = btrfs_num_copies(
7175 &root->fs_info->mapping_tree,
7177 if (mirror < num_copies - 1) {
7182 data_checked += root->sectorsize;
7191 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7194 struct btrfs_path path;
7195 struct extent_buffer *leaf;
7196 struct btrfs_key key;
7199 btrfs_init_path(&path);
7200 key.objectid = bytenr;
7201 key.type = BTRFS_EXTENT_ITEM_KEY;
7202 key.offset = (u64)-1;
7205 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7208 fprintf(stderr, "Error looking up extent record %d\n", ret);
7209 btrfs_release_path(&path);
7212 if (path.slots[0] > 0) {
7215 ret = btrfs_prev_leaf(root, &path);
7218 } else if (ret > 0) {
7225 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7228 * Block group items come before extent items if they have the same
7229 * bytenr, so walk back one more just in case. Dear future traveller,
7230 * first congrats on mastering time travel. Now if it's not too much
7231 * trouble could you go back to 2006 and tell Chris to make the
7232 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7233 * EXTENT_ITEM_KEY please?
7235 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7236 if (path.slots[0] > 0) {
7239 ret = btrfs_prev_leaf(root, &path);
7242 } else if (ret > 0) {
7247 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7251 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7252 ret = btrfs_next_leaf(root, &path);
7254 fprintf(stderr, "Error going to next leaf "
7256 btrfs_release_path(&path);
7262 leaf = path.nodes[0];
7263 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7264 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7268 if (key.objectid + key.offset < bytenr) {
7272 if (key.objectid > bytenr + num_bytes)
7275 if (key.objectid == bytenr) {
7276 if (key.offset >= num_bytes) {
7280 num_bytes -= key.offset;
7281 bytenr += key.offset;
7282 } else if (key.objectid < bytenr) {
7283 if (key.objectid + key.offset >= bytenr + num_bytes) {
7287 num_bytes = (bytenr + num_bytes) -
7288 (key.objectid + key.offset);
7289 bytenr = key.objectid + key.offset;
7291 if (key.objectid + key.offset < bytenr + num_bytes) {
7292 u64 new_start = key.objectid + key.offset;
7293 u64 new_bytes = bytenr + num_bytes - new_start;
7296 * Weird case, the extent is in the middle of
7297 * our range, we'll have to search one side
7298 * and then the other. Not sure if this happens
7299 * in real life, but no harm in coding it up
7300 * anyway just in case.
7302 btrfs_release_path(&path);
7303 ret = check_extent_exists(root, new_start,
7306 fprintf(stderr, "Right section didn't "
7310 num_bytes = key.objectid - bytenr;
7313 num_bytes = key.objectid - bytenr;
7320 if (num_bytes && !ret) {
7321 fprintf(stderr, "There are no extents for csum range "
7322 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7326 btrfs_release_path(&path);
7330 static int check_csums(struct btrfs_root *root)
7332 struct btrfs_path path;
7333 struct extent_buffer *leaf;
7334 struct btrfs_key key;
7335 u64 offset = 0, num_bytes = 0;
7336 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7340 unsigned long leaf_offset;
7342 root = root->fs_info->csum_root;
7343 if (!extent_buffer_uptodate(root->node)) {
7344 fprintf(stderr, "No valid csum tree found\n");
7348 btrfs_init_path(&path);
7349 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7350 key.type = BTRFS_EXTENT_CSUM_KEY;
7352 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7354 fprintf(stderr, "Error searching csum tree %d\n", ret);
7355 btrfs_release_path(&path);
7359 if (ret > 0 && path.slots[0])
7364 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7365 ret = btrfs_next_leaf(root, &path);
7367 fprintf(stderr, "Error going to next leaf "
7374 leaf = path.nodes[0];
7376 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7377 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7382 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7383 csum_size) * root->sectorsize;
7384 if (!check_data_csum)
7385 goto skip_csum_check;
7386 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7387 ret = check_extent_csums(root, key.offset, data_len,
7393 offset = key.offset;
7394 } else if (key.offset != offset + num_bytes) {
7395 ret = check_extent_exists(root, offset, num_bytes);
7397 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7398 "there is no extent record\n",
7399 offset, offset+num_bytes);
7402 offset = key.offset;
7405 num_bytes += data_len;
7409 btrfs_release_path(&path);
7413 static int is_dropped_key(struct btrfs_key *key,
7414 struct btrfs_key *drop_key) {
7415 if (key->objectid < drop_key->objectid)
7417 else if (key->objectid == drop_key->objectid) {
7418 if (key->type < drop_key->type)
7420 else if (key->type == drop_key->type) {
7421 if (key->offset < drop_key->offset)
7429 * Here are the rules for FULL_BACKREF.
7431 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7432 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7434 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7435 * if it happened after the relocation occurred since we'll have dropped the
7436 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7437 * have no real way to know for sure.
7439 * We process the blocks one root at a time, and we start from the lowest root
7440 * objectid and go to the highest. So we can just lookup the owner backref for
7441 * the record and if we don't find it then we know it doesn't exist and we have
7444 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7445 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7446 * be set or not and then we can check later once we've gathered all the refs.
7448 static int calc_extent_flag(struct cache_tree *extent_cache,
7449 struct extent_buffer *buf,
7450 struct root_item_record *ri,
7453 struct extent_record *rec;
7454 struct cache_extent *cache;
7455 struct tree_backref *tback;
7458 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7459 /* we have added this extent before */
7463 rec = container_of(cache, struct extent_record, cache);
7466 * Except file/reloc tree, we can not have
7469 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7474 if (buf->start == ri->bytenr)
7477 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7480 owner = btrfs_header_owner(buf);
7481 if (owner == ri->objectid)
7484 tback = find_tree_backref(rec, 0, owner);
7489 if (rec->flag_block_full_backref != FLAG_UNSET &&
7490 rec->flag_block_full_backref != 0)
7491 rec->bad_full_backref = 1;
7494 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7495 if (rec->flag_block_full_backref != FLAG_UNSET &&
7496 rec->flag_block_full_backref != 1)
7497 rec->bad_full_backref = 1;
7501 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7503 fprintf(stderr, "Invalid key type(");
7504 print_key_type(stderr, 0, key_type);
7505 fprintf(stderr, ") found in root(");
7506 print_objectid(stderr, rootid, 0);
7507 fprintf(stderr, ")\n");
7511 * Check if the key is valid with its extent buffer.
7513 * This is a early check in case invalid key exists in a extent buffer
7514 * This is not comprehensive yet, but should prevent wrong key/item passed
7517 static int check_type_with_root(u64 rootid, u8 key_type)
7520 /* Only valid in chunk tree */
7521 case BTRFS_DEV_ITEM_KEY:
7522 case BTRFS_CHUNK_ITEM_KEY:
7523 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7526 /* valid in csum and log tree */
7527 case BTRFS_CSUM_TREE_OBJECTID:
7528 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7532 case BTRFS_EXTENT_ITEM_KEY:
7533 case BTRFS_METADATA_ITEM_KEY:
7534 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7535 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7538 case BTRFS_ROOT_ITEM_KEY:
7539 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7542 case BTRFS_DEV_EXTENT_KEY:
7543 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7549 report_mismatch_key_root(key_type, rootid);
7553 static int run_next_block(struct btrfs_root *root,
7554 struct block_info *bits,
7557 struct cache_tree *pending,
7558 struct cache_tree *seen,
7559 struct cache_tree *reada,
7560 struct cache_tree *nodes,
7561 struct cache_tree *extent_cache,
7562 struct cache_tree *chunk_cache,
7563 struct rb_root *dev_cache,
7564 struct block_group_tree *block_group_cache,
7565 struct device_extent_tree *dev_extent_cache,
7566 struct root_item_record *ri)
7568 struct extent_buffer *buf;
7569 struct extent_record *rec = NULL;
7580 struct btrfs_key key;
7581 struct cache_extent *cache;
7584 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7585 bits_nr, &reada_bits);
7590 for(i = 0; i < nritems; i++) {
7591 ret = add_cache_extent(reada, bits[i].start,
7596 /* fixme, get the parent transid */
7597 readahead_tree_block(root, bits[i].start,
7601 *last = bits[0].start;
7602 bytenr = bits[0].start;
7603 size = bits[0].size;
7605 cache = lookup_cache_extent(pending, bytenr, size);
7607 remove_cache_extent(pending, cache);
7610 cache = lookup_cache_extent(reada, bytenr, size);
7612 remove_cache_extent(reada, cache);
7615 cache = lookup_cache_extent(nodes, bytenr, size);
7617 remove_cache_extent(nodes, cache);
7620 cache = lookup_cache_extent(extent_cache, bytenr, size);
7622 rec = container_of(cache, struct extent_record, cache);
7623 gen = rec->parent_generation;
7626 /* fixme, get the real parent transid */
7627 buf = read_tree_block(root, bytenr, size, gen);
7628 if (!extent_buffer_uptodate(buf)) {
7629 record_bad_block_io(root->fs_info,
7630 extent_cache, bytenr, size);
7634 nritems = btrfs_header_nritems(buf);
7637 if (!init_extent_tree) {
7638 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7639 btrfs_header_level(buf), 1, NULL,
7642 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7644 fprintf(stderr, "Couldn't calc extent flags\n");
7645 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7650 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7652 fprintf(stderr, "Couldn't calc extent flags\n");
7653 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7657 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7659 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7660 ri->objectid == btrfs_header_owner(buf)) {
7662 * Ok we got to this block from it's original owner and
7663 * we have FULL_BACKREF set. Relocation can leave
7664 * converted blocks over so this is altogether possible,
7665 * however it's not possible if the generation > the
7666 * last snapshot, so check for this case.
7668 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7669 btrfs_header_generation(buf) > ri->last_snapshot) {
7670 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7671 rec->bad_full_backref = 1;
7676 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7677 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7678 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7679 rec->bad_full_backref = 1;
7683 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7684 rec->flag_block_full_backref = 1;
7688 rec->flag_block_full_backref = 0;
7690 owner = btrfs_header_owner(buf);
7693 ret = check_block(root, extent_cache, buf, flags);
7697 if (btrfs_is_leaf(buf)) {
7698 btree_space_waste += btrfs_leaf_free_space(root, buf);
7699 for (i = 0; i < nritems; i++) {
7700 struct btrfs_file_extent_item *fi;
7701 btrfs_item_key_to_cpu(buf, &key, i);
7703 * Check key type against the leaf owner.
7704 * Could filter quite a lot of early error if
7707 if (check_type_with_root(btrfs_header_owner(buf),
7709 fprintf(stderr, "ignoring invalid key\n");
7712 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7713 process_extent_item(root, extent_cache, buf,
7717 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7718 process_extent_item(root, extent_cache, buf,
7722 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7724 btrfs_item_size_nr(buf, i);
7727 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7728 process_chunk_item(chunk_cache, &key, buf, i);
7731 if (key.type == BTRFS_DEV_ITEM_KEY) {
7732 process_device_item(dev_cache, &key, buf, i);
7735 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7736 process_block_group_item(block_group_cache,
7740 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7741 process_device_extent_item(dev_extent_cache,
7746 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7747 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7748 process_extent_ref_v0(extent_cache, buf, i);
7755 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7756 ret = add_tree_backref(extent_cache,
7757 key.objectid, 0, key.offset, 0);
7760 "add_tree_backref failed (leaf tree block): %s",
7764 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7765 ret = add_tree_backref(extent_cache,
7766 key.objectid, key.offset, 0, 0);
7769 "add_tree_backref failed (leaf shared block): %s",
7773 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7774 struct btrfs_extent_data_ref *ref;
7775 ref = btrfs_item_ptr(buf, i,
7776 struct btrfs_extent_data_ref);
7777 add_data_backref(extent_cache,
7779 btrfs_extent_data_ref_root(buf, ref),
7780 btrfs_extent_data_ref_objectid(buf,
7782 btrfs_extent_data_ref_offset(buf, ref),
7783 btrfs_extent_data_ref_count(buf, ref),
7784 0, root->sectorsize);
7787 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7788 struct btrfs_shared_data_ref *ref;
7789 ref = btrfs_item_ptr(buf, i,
7790 struct btrfs_shared_data_ref);
7791 add_data_backref(extent_cache,
7792 key.objectid, key.offset, 0, 0, 0,
7793 btrfs_shared_data_ref_count(buf, ref),
7794 0, root->sectorsize);
7797 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7798 struct bad_item *bad;
7800 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7804 bad = malloc(sizeof(struct bad_item));
7807 INIT_LIST_HEAD(&bad->list);
7808 memcpy(&bad->key, &key,
7809 sizeof(struct btrfs_key));
7810 bad->root_id = owner;
7811 list_add_tail(&bad->list, &delete_items);
7814 if (key.type != BTRFS_EXTENT_DATA_KEY)
7816 fi = btrfs_item_ptr(buf, i,
7817 struct btrfs_file_extent_item);
7818 if (btrfs_file_extent_type(buf, fi) ==
7819 BTRFS_FILE_EXTENT_INLINE)
7821 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7824 data_bytes_allocated +=
7825 btrfs_file_extent_disk_num_bytes(buf, fi);
7826 if (data_bytes_allocated < root->sectorsize) {
7829 data_bytes_referenced +=
7830 btrfs_file_extent_num_bytes(buf, fi);
7831 add_data_backref(extent_cache,
7832 btrfs_file_extent_disk_bytenr(buf, fi),
7833 parent, owner, key.objectid, key.offset -
7834 btrfs_file_extent_offset(buf, fi), 1, 1,
7835 btrfs_file_extent_disk_num_bytes(buf, fi));
7839 struct btrfs_key first_key;
7841 first_key.objectid = 0;
7844 btrfs_item_key_to_cpu(buf, &first_key, 0);
7845 level = btrfs_header_level(buf);
7846 for (i = 0; i < nritems; i++) {
7847 struct extent_record tmpl;
7849 ptr = btrfs_node_blockptr(buf, i);
7850 size = root->nodesize;
7851 btrfs_node_key_to_cpu(buf, &key, i);
7853 if ((level == ri->drop_level)
7854 && is_dropped_key(&key, &ri->drop_key)) {
7859 memset(&tmpl, 0, sizeof(tmpl));
7860 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7861 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7866 tmpl.max_size = size;
7867 ret = add_extent_rec(extent_cache, &tmpl);
7871 ret = add_tree_backref(extent_cache, ptr, parent,
7875 "add_tree_backref failed (non-leaf block): %s",
7881 add_pending(nodes, seen, ptr, size);
7883 add_pending(pending, seen, ptr, size);
7886 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7887 nritems) * sizeof(struct btrfs_key_ptr);
7889 total_btree_bytes += buf->len;
7890 if (fs_root_objectid(btrfs_header_owner(buf)))
7891 total_fs_tree_bytes += buf->len;
7892 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7893 total_extent_tree_bytes += buf->len;
7894 if (!found_old_backref &&
7895 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7896 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7897 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7898 found_old_backref = 1;
7900 free_extent_buffer(buf);
7904 static int add_root_to_pending(struct extent_buffer *buf,
7905 struct cache_tree *extent_cache,
7906 struct cache_tree *pending,
7907 struct cache_tree *seen,
7908 struct cache_tree *nodes,
7911 struct extent_record tmpl;
7914 if (btrfs_header_level(buf) > 0)
7915 add_pending(nodes, seen, buf->start, buf->len);
7917 add_pending(pending, seen, buf->start, buf->len);
7919 memset(&tmpl, 0, sizeof(tmpl));
7920 tmpl.start = buf->start;
7925 tmpl.max_size = buf->len;
7926 add_extent_rec(extent_cache, &tmpl);
7928 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7929 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7930 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7933 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7938 /* as we fix the tree, we might be deleting blocks that
7939 * we're tracking for repair. This hook makes sure we
7940 * remove any backrefs for blocks as we are fixing them.
7942 static int free_extent_hook(struct btrfs_trans_handle *trans,
7943 struct btrfs_root *root,
7944 u64 bytenr, u64 num_bytes, u64 parent,
7945 u64 root_objectid, u64 owner, u64 offset,
7948 struct extent_record *rec;
7949 struct cache_extent *cache;
7951 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7953 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7954 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7958 rec = container_of(cache, struct extent_record, cache);
7960 struct data_backref *back;
7961 back = find_data_backref(rec, parent, root_objectid, owner,
7962 offset, 1, bytenr, num_bytes);
7965 if (back->node.found_ref) {
7966 back->found_ref -= refs_to_drop;
7968 rec->refs -= refs_to_drop;
7970 if (back->node.found_extent_tree) {
7971 back->num_refs -= refs_to_drop;
7972 if (rec->extent_item_refs)
7973 rec->extent_item_refs -= refs_to_drop;
7975 if (back->found_ref == 0)
7976 back->node.found_ref = 0;
7977 if (back->num_refs == 0)
7978 back->node.found_extent_tree = 0;
7980 if (!back->node.found_extent_tree && back->node.found_ref) {
7981 list_del(&back->node.list);
7985 struct tree_backref *back;
7986 back = find_tree_backref(rec, parent, root_objectid);
7989 if (back->node.found_ref) {
7992 back->node.found_ref = 0;
7994 if (back->node.found_extent_tree) {
7995 if (rec->extent_item_refs)
7996 rec->extent_item_refs--;
7997 back->node.found_extent_tree = 0;
7999 if (!back->node.found_extent_tree && back->node.found_ref) {
8000 list_del(&back->node.list);
8004 maybe_free_extent_rec(extent_cache, rec);
8009 static int delete_extent_records(struct btrfs_trans_handle *trans,
8010 struct btrfs_root *root,
8011 struct btrfs_path *path,
8014 struct btrfs_key key;
8015 struct btrfs_key found_key;
8016 struct extent_buffer *leaf;
8021 key.objectid = bytenr;
8023 key.offset = (u64)-1;
8026 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8033 if (path->slots[0] == 0)
8039 leaf = path->nodes[0];
8040 slot = path->slots[0];
8042 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8043 if (found_key.objectid != bytenr)
8046 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8047 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8048 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8049 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8050 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8051 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8052 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8053 btrfs_release_path(path);
8054 if (found_key.type == 0) {
8055 if (found_key.offset == 0)
8057 key.offset = found_key.offset - 1;
8058 key.type = found_key.type;
8060 key.type = found_key.type - 1;
8061 key.offset = (u64)-1;
8065 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8066 found_key.objectid, found_key.type, found_key.offset);
8068 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8071 btrfs_release_path(path);
8073 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8074 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8075 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8076 found_key.offset : root->nodesize;
8078 ret = btrfs_update_block_group(trans, root, bytenr,
8085 btrfs_release_path(path);
8090 * for a single backref, this will allocate a new extent
8091 * and add the backref to it.
8093 static int record_extent(struct btrfs_trans_handle *trans,
8094 struct btrfs_fs_info *info,
8095 struct btrfs_path *path,
8096 struct extent_record *rec,
8097 struct extent_backref *back,
8098 int allocated, u64 flags)
8101 struct btrfs_root *extent_root = info->extent_root;
8102 struct extent_buffer *leaf;
8103 struct btrfs_key ins_key;
8104 struct btrfs_extent_item *ei;
8105 struct data_backref *dback;
8106 struct btrfs_tree_block_info *bi;
8109 rec->max_size = max_t(u64, rec->max_size,
8110 info->extent_root->nodesize);
8113 u32 item_size = sizeof(*ei);
8116 item_size += sizeof(*bi);
8118 ins_key.objectid = rec->start;
8119 ins_key.offset = rec->max_size;
8120 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8122 ret = btrfs_insert_empty_item(trans, extent_root, path,
8123 &ins_key, item_size);
8127 leaf = path->nodes[0];
8128 ei = btrfs_item_ptr(leaf, path->slots[0],
8129 struct btrfs_extent_item);
8131 btrfs_set_extent_refs(leaf, ei, 0);
8132 btrfs_set_extent_generation(leaf, ei, rec->generation);
8134 if (back->is_data) {
8135 btrfs_set_extent_flags(leaf, ei,
8136 BTRFS_EXTENT_FLAG_DATA);
8138 struct btrfs_disk_key copy_key;;
8140 bi = (struct btrfs_tree_block_info *)(ei + 1);
8141 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8144 btrfs_set_disk_key_objectid(©_key,
8145 rec->info_objectid);
8146 btrfs_set_disk_key_type(©_key, 0);
8147 btrfs_set_disk_key_offset(©_key, 0);
8149 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8150 btrfs_set_tree_block_key(leaf, bi, ©_key);
8152 btrfs_set_extent_flags(leaf, ei,
8153 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8156 btrfs_mark_buffer_dirty(leaf);
8157 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8158 rec->max_size, 1, 0);
8161 btrfs_release_path(path);
8164 if (back->is_data) {
8168 dback = to_data_backref(back);
8169 if (back->full_backref)
8170 parent = dback->parent;
8174 for (i = 0; i < dback->found_ref; i++) {
8175 /* if parent != 0, we're doing a full backref
8176 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8177 * just makes the backref allocator create a data
8180 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8181 rec->start, rec->max_size,
8185 BTRFS_FIRST_FREE_OBJECTID :
8191 fprintf(stderr, "adding new data backref"
8192 " on %llu %s %llu owner %llu"
8193 " offset %llu found %d\n",
8194 (unsigned long long)rec->start,
8195 back->full_backref ?
8197 back->full_backref ?
8198 (unsigned long long)parent :
8199 (unsigned long long)dback->root,
8200 (unsigned long long)dback->owner,
8201 (unsigned long long)dback->offset,
8205 struct tree_backref *tback;
8207 tback = to_tree_backref(back);
8208 if (back->full_backref)
8209 parent = tback->parent;
8213 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8214 rec->start, rec->max_size,
8215 parent, tback->root, 0, 0);
8216 fprintf(stderr, "adding new tree backref on "
8217 "start %llu len %llu parent %llu root %llu\n",
8218 rec->start, rec->max_size, parent, tback->root);
8221 btrfs_release_path(path);
8225 static struct extent_entry *find_entry(struct list_head *entries,
8226 u64 bytenr, u64 bytes)
8228 struct extent_entry *entry = NULL;
8230 list_for_each_entry(entry, entries, list) {
8231 if (entry->bytenr == bytenr && entry->bytes == bytes)
8238 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8240 struct extent_entry *entry, *best = NULL, *prev = NULL;
8242 list_for_each_entry(entry, entries, list) {
8244 * If there are as many broken entries as entries then we know
8245 * not to trust this particular entry.
8247 if (entry->broken == entry->count)
8251 * Special case, when there are only two entries and 'best' is
8261 * If our current entry == best then we can't be sure our best
8262 * is really the best, so we need to keep searching.
8264 if (best && best->count == entry->count) {
8270 /* Prev == entry, not good enough, have to keep searching */
8271 if (!prev->broken && prev->count == entry->count)
8275 best = (prev->count > entry->count) ? prev : entry;
8276 else if (best->count < entry->count)
8284 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8285 struct data_backref *dback, struct extent_entry *entry)
8287 struct btrfs_trans_handle *trans;
8288 struct btrfs_root *root;
8289 struct btrfs_file_extent_item *fi;
8290 struct extent_buffer *leaf;
8291 struct btrfs_key key;
8295 key.objectid = dback->root;
8296 key.type = BTRFS_ROOT_ITEM_KEY;
8297 key.offset = (u64)-1;
8298 root = btrfs_read_fs_root(info, &key);
8300 fprintf(stderr, "Couldn't find root for our ref\n");
8305 * The backref points to the original offset of the extent if it was
8306 * split, so we need to search down to the offset we have and then walk
8307 * forward until we find the backref we're looking for.
8309 key.objectid = dback->owner;
8310 key.type = BTRFS_EXTENT_DATA_KEY;
8311 key.offset = dback->offset;
8312 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8314 fprintf(stderr, "Error looking up ref %d\n", ret);
8319 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8320 ret = btrfs_next_leaf(root, path);
8322 fprintf(stderr, "Couldn't find our ref, next\n");
8326 leaf = path->nodes[0];
8327 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8328 if (key.objectid != dback->owner ||
8329 key.type != BTRFS_EXTENT_DATA_KEY) {
8330 fprintf(stderr, "Couldn't find our ref, search\n");
8333 fi = btrfs_item_ptr(leaf, path->slots[0],
8334 struct btrfs_file_extent_item);
8335 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8336 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8338 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8343 btrfs_release_path(path);
8345 trans = btrfs_start_transaction(root, 1);
8347 return PTR_ERR(trans);
8350 * Ok we have the key of the file extent we want to fix, now we can cow
8351 * down to the thing and fix it.
8353 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8355 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8356 key.objectid, key.type, key.offset, ret);
8360 fprintf(stderr, "Well that's odd, we just found this key "
8361 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8366 leaf = path->nodes[0];
8367 fi = btrfs_item_ptr(leaf, path->slots[0],
8368 struct btrfs_file_extent_item);
8370 if (btrfs_file_extent_compression(leaf, fi) &&
8371 dback->disk_bytenr != entry->bytenr) {
8372 fprintf(stderr, "Ref doesn't match the record start and is "
8373 "compressed, please take a btrfs-image of this file "
8374 "system and send it to a btrfs developer so they can "
8375 "complete this functionality for bytenr %Lu\n",
8376 dback->disk_bytenr);
8381 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8382 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8383 } else if (dback->disk_bytenr > entry->bytenr) {
8384 u64 off_diff, offset;
8386 off_diff = dback->disk_bytenr - entry->bytenr;
8387 offset = btrfs_file_extent_offset(leaf, fi);
8388 if (dback->disk_bytenr + offset +
8389 btrfs_file_extent_num_bytes(leaf, fi) >
8390 entry->bytenr + entry->bytes) {
8391 fprintf(stderr, "Ref is past the entry end, please "
8392 "take a btrfs-image of this file system and "
8393 "send it to a btrfs developer, ref %Lu\n",
8394 dback->disk_bytenr);
8399 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8400 btrfs_set_file_extent_offset(leaf, fi, offset);
8401 } else if (dback->disk_bytenr < entry->bytenr) {
8404 offset = btrfs_file_extent_offset(leaf, fi);
8405 if (dback->disk_bytenr + offset < entry->bytenr) {
8406 fprintf(stderr, "Ref is before the entry start, please"
8407 " take a btrfs-image of this file system and "
8408 "send it to a btrfs developer, ref %Lu\n",
8409 dback->disk_bytenr);
8414 offset += dback->disk_bytenr;
8415 offset -= entry->bytenr;
8416 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8417 btrfs_set_file_extent_offset(leaf, fi, offset);
8420 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8423 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8424 * only do this if we aren't using compression, otherwise it's a
8427 if (!btrfs_file_extent_compression(leaf, fi))
8428 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8430 printf("ram bytes may be wrong?\n");
8431 btrfs_mark_buffer_dirty(leaf);
8433 err = btrfs_commit_transaction(trans, root);
8434 btrfs_release_path(path);
8435 return ret ? ret : err;
8438 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8439 struct extent_record *rec)
8441 struct extent_backref *back;
8442 struct data_backref *dback;
8443 struct extent_entry *entry, *best = NULL;
8446 int broken_entries = 0;
8451 * Metadata is easy and the backrefs should always agree on bytenr and
8452 * size, if not we've got bigger issues.
8457 list_for_each_entry(back, &rec->backrefs, list) {
8458 if (back->full_backref || !back->is_data)
8461 dback = to_data_backref(back);
8464 * We only pay attention to backrefs that we found a real
8467 if (dback->found_ref == 0)
8471 * For now we only catch when the bytes don't match, not the
8472 * bytenr. We can easily do this at the same time, but I want
8473 * to have a fs image to test on before we just add repair
8474 * functionality willy-nilly so we know we won't screw up the
8478 entry = find_entry(&entries, dback->disk_bytenr,
8481 entry = malloc(sizeof(struct extent_entry));
8486 memset(entry, 0, sizeof(*entry));
8487 entry->bytenr = dback->disk_bytenr;
8488 entry->bytes = dback->bytes;
8489 list_add_tail(&entry->list, &entries);
8494 * If we only have on entry we may think the entries agree when
8495 * in reality they don't so we have to do some extra checking.
8497 if (dback->disk_bytenr != rec->start ||
8498 dback->bytes != rec->nr || back->broken)
8509 /* Yay all the backrefs agree, carry on good sir */
8510 if (nr_entries <= 1 && !mismatch)
8513 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8514 "%Lu\n", rec->start);
8517 * First we want to see if the backrefs can agree amongst themselves who
8518 * is right, so figure out which one of the entries has the highest
8521 best = find_most_right_entry(&entries);
8524 * Ok so we may have an even split between what the backrefs think, so
8525 * this is where we use the extent ref to see what it thinks.
8528 entry = find_entry(&entries, rec->start, rec->nr);
8529 if (!entry && (!broken_entries || !rec->found_rec)) {
8530 fprintf(stderr, "Backrefs don't agree with each other "
8531 "and extent record doesn't agree with anybody,"
8532 " so we can't fix bytenr %Lu bytes %Lu\n",
8533 rec->start, rec->nr);
8536 } else if (!entry) {
8538 * Ok our backrefs were broken, we'll assume this is the
8539 * correct value and add an entry for this range.
8541 entry = malloc(sizeof(struct extent_entry));
8546 memset(entry, 0, sizeof(*entry));
8547 entry->bytenr = rec->start;
8548 entry->bytes = rec->nr;
8549 list_add_tail(&entry->list, &entries);
8553 best = find_most_right_entry(&entries);
8555 fprintf(stderr, "Backrefs and extent record evenly "
8556 "split on who is right, this is going to "
8557 "require user input to fix bytenr %Lu bytes "
8558 "%Lu\n", rec->start, rec->nr);
8565 * I don't think this can happen currently as we'll abort() if we catch
8566 * this case higher up, but in case somebody removes that we still can't
8567 * deal with it properly here yet, so just bail out of that's the case.
8569 if (best->bytenr != rec->start) {
8570 fprintf(stderr, "Extent start and backref starts don't match, "
8571 "please use btrfs-image on this file system and send "
8572 "it to a btrfs developer so they can make fsck fix "
8573 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8574 rec->start, rec->nr);
8580 * Ok great we all agreed on an extent record, let's go find the real
8581 * references and fix up the ones that don't match.
8583 list_for_each_entry(back, &rec->backrefs, list) {
8584 if (back->full_backref || !back->is_data)
8587 dback = to_data_backref(back);
8590 * Still ignoring backrefs that don't have a real ref attached
8593 if (dback->found_ref == 0)
8596 if (dback->bytes == best->bytes &&
8597 dback->disk_bytenr == best->bytenr)
8600 ret = repair_ref(info, path, dback, best);
8606 * Ok we messed with the actual refs, which means we need to drop our
8607 * entire cache and go back and rescan. I know this is a huge pain and
8608 * adds a lot of extra work, but it's the only way to be safe. Once all
8609 * the backrefs agree we may not need to do anything to the extent
8614 while (!list_empty(&entries)) {
8615 entry = list_entry(entries.next, struct extent_entry, list);
8616 list_del_init(&entry->list);
8622 static int process_duplicates(struct cache_tree *extent_cache,
8623 struct extent_record *rec)
8625 struct extent_record *good, *tmp;
8626 struct cache_extent *cache;
8630 * If we found a extent record for this extent then return, or if we
8631 * have more than one duplicate we are likely going to need to delete
8634 if (rec->found_rec || rec->num_duplicates > 1)
8637 /* Shouldn't happen but just in case */
8638 BUG_ON(!rec->num_duplicates);
8641 * So this happens if we end up with a backref that doesn't match the
8642 * actual extent entry. So either the backref is bad or the extent
8643 * entry is bad. Either way we want to have the extent_record actually
8644 * reflect what we found in the extent_tree, so we need to take the
8645 * duplicate out and use that as the extent_record since the only way we
8646 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8648 remove_cache_extent(extent_cache, &rec->cache);
8650 good = to_extent_record(rec->dups.next);
8651 list_del_init(&good->list);
8652 INIT_LIST_HEAD(&good->backrefs);
8653 INIT_LIST_HEAD(&good->dups);
8654 good->cache.start = good->start;
8655 good->cache.size = good->nr;
8656 good->content_checked = 0;
8657 good->owner_ref_checked = 0;
8658 good->num_duplicates = 0;
8659 good->refs = rec->refs;
8660 list_splice_init(&rec->backrefs, &good->backrefs);
8662 cache = lookup_cache_extent(extent_cache, good->start,
8666 tmp = container_of(cache, struct extent_record, cache);
8669 * If we find another overlapping extent and it's found_rec is
8670 * set then it's a duplicate and we need to try and delete
8673 if (tmp->found_rec || tmp->num_duplicates > 0) {
8674 if (list_empty(&good->list))
8675 list_add_tail(&good->list,
8676 &duplicate_extents);
8677 good->num_duplicates += tmp->num_duplicates + 1;
8678 list_splice_init(&tmp->dups, &good->dups);
8679 list_del_init(&tmp->list);
8680 list_add_tail(&tmp->list, &good->dups);
8681 remove_cache_extent(extent_cache, &tmp->cache);
8686 * Ok we have another non extent item backed extent rec, so lets
8687 * just add it to this extent and carry on like we did above.
8689 good->refs += tmp->refs;
8690 list_splice_init(&tmp->backrefs, &good->backrefs);
8691 remove_cache_extent(extent_cache, &tmp->cache);
8694 ret = insert_cache_extent(extent_cache, &good->cache);
8697 return good->num_duplicates ? 0 : 1;
8700 static int delete_duplicate_records(struct btrfs_root *root,
8701 struct extent_record *rec)
8703 struct btrfs_trans_handle *trans;
8704 LIST_HEAD(delete_list);
8705 struct btrfs_path path;
8706 struct extent_record *tmp, *good, *n;
8709 struct btrfs_key key;
8711 btrfs_init_path(&path);
8714 /* Find the record that covers all of the duplicates. */
8715 list_for_each_entry(tmp, &rec->dups, list) {
8716 if (good->start < tmp->start)
8718 if (good->nr > tmp->nr)
8721 if (tmp->start + tmp->nr < good->start + good->nr) {
8722 fprintf(stderr, "Ok we have overlapping extents that "
8723 "aren't completely covered by each other, this "
8724 "is going to require more careful thought. "
8725 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8726 tmp->start, tmp->nr, good->start, good->nr);
8733 list_add_tail(&rec->list, &delete_list);
8735 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8738 list_move_tail(&tmp->list, &delete_list);
8741 root = root->fs_info->extent_root;
8742 trans = btrfs_start_transaction(root, 1);
8743 if (IS_ERR(trans)) {
8744 ret = PTR_ERR(trans);
8748 list_for_each_entry(tmp, &delete_list, list) {
8749 if (tmp->found_rec == 0)
8751 key.objectid = tmp->start;
8752 key.type = BTRFS_EXTENT_ITEM_KEY;
8753 key.offset = tmp->nr;
8755 /* Shouldn't happen but just in case */
8756 if (tmp->metadata) {
8757 fprintf(stderr, "Well this shouldn't happen, extent "
8758 "record overlaps but is metadata? "
8759 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8763 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8769 ret = btrfs_del_item(trans, root, &path);
8772 btrfs_release_path(&path);
8775 err = btrfs_commit_transaction(trans, root);
8779 while (!list_empty(&delete_list)) {
8780 tmp = to_extent_record(delete_list.next);
8781 list_del_init(&tmp->list);
8787 while (!list_empty(&rec->dups)) {
8788 tmp = to_extent_record(rec->dups.next);
8789 list_del_init(&tmp->list);
8793 btrfs_release_path(&path);
8795 if (!ret && !nr_del)
8796 rec->num_duplicates = 0;
8798 return ret ? ret : nr_del;
8801 static int find_possible_backrefs(struct btrfs_fs_info *info,
8802 struct btrfs_path *path,
8803 struct cache_tree *extent_cache,
8804 struct extent_record *rec)
8806 struct btrfs_root *root;
8807 struct extent_backref *back;
8808 struct data_backref *dback;
8809 struct cache_extent *cache;
8810 struct btrfs_file_extent_item *fi;
8811 struct btrfs_key key;
8815 list_for_each_entry(back, &rec->backrefs, list) {
8816 /* Don't care about full backrefs (poor unloved backrefs) */
8817 if (back->full_backref || !back->is_data)
8820 dback = to_data_backref(back);
8822 /* We found this one, we don't need to do a lookup */
8823 if (dback->found_ref)
8826 key.objectid = dback->root;
8827 key.type = BTRFS_ROOT_ITEM_KEY;
8828 key.offset = (u64)-1;
8830 root = btrfs_read_fs_root(info, &key);
8832 /* No root, definitely a bad ref, skip */
8833 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8835 /* Other err, exit */
8837 return PTR_ERR(root);
8839 key.objectid = dback->owner;
8840 key.type = BTRFS_EXTENT_DATA_KEY;
8841 key.offset = dback->offset;
8842 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8844 btrfs_release_path(path);
8847 /* Didn't find it, we can carry on */
8852 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8853 struct btrfs_file_extent_item);
8854 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8855 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8856 btrfs_release_path(path);
8857 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8859 struct extent_record *tmp;
8860 tmp = container_of(cache, struct extent_record, cache);
8863 * If we found an extent record for the bytenr for this
8864 * particular backref then we can't add it to our
8865 * current extent record. We only want to add backrefs
8866 * that don't have a corresponding extent item in the
8867 * extent tree since they likely belong to this record
8868 * and we need to fix it if it doesn't match bytenrs.
8874 dback->found_ref += 1;
8875 dback->disk_bytenr = bytenr;
8876 dback->bytes = bytes;
8879 * Set this so the verify backref code knows not to trust the
8880 * values in this backref.
8889 * Record orphan data ref into corresponding root.
8891 * Return 0 if the extent item contains data ref and recorded.
8892 * Return 1 if the extent item contains no useful data ref
8893 * On that case, it may contains only shared_dataref or metadata backref
8894 * or the file extent exists(this should be handled by the extent bytenr
8896 * Return <0 if something goes wrong.
8898 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8899 struct extent_record *rec)
8901 struct btrfs_key key;
8902 struct btrfs_root *dest_root;
8903 struct extent_backref *back;
8904 struct data_backref *dback;
8905 struct orphan_data_extent *orphan;
8906 struct btrfs_path path;
8907 int recorded_data_ref = 0;
8912 btrfs_init_path(&path);
8913 list_for_each_entry(back, &rec->backrefs, list) {
8914 if (back->full_backref || !back->is_data ||
8915 !back->found_extent_tree)
8917 dback = to_data_backref(back);
8918 if (dback->found_ref)
8920 key.objectid = dback->root;
8921 key.type = BTRFS_ROOT_ITEM_KEY;
8922 key.offset = (u64)-1;
8924 dest_root = btrfs_read_fs_root(fs_info, &key);
8926 /* For non-exist root we just skip it */
8927 if (IS_ERR(dest_root) || !dest_root)
8930 key.objectid = dback->owner;
8931 key.type = BTRFS_EXTENT_DATA_KEY;
8932 key.offset = dback->offset;
8934 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8935 btrfs_release_path(&path);
8937 * For ret < 0, it's OK since the fs-tree may be corrupted,
8938 * we need to record it for inode/file extent rebuild.
8939 * For ret > 0, we record it only for file extent rebuild.
8940 * For ret == 0, the file extent exists but only bytenr
8941 * mismatch, let the original bytenr fix routine to handle,
8947 orphan = malloc(sizeof(*orphan));
8952 INIT_LIST_HEAD(&orphan->list);
8953 orphan->root = dback->root;
8954 orphan->objectid = dback->owner;
8955 orphan->offset = dback->offset;
8956 orphan->disk_bytenr = rec->cache.start;
8957 orphan->disk_len = rec->cache.size;
8958 list_add(&dest_root->orphan_data_extents, &orphan->list);
8959 recorded_data_ref = 1;
8962 btrfs_release_path(&path);
8964 return !recorded_data_ref;
8970 * when an incorrect extent item is found, this will delete
8971 * all of the existing entries for it and recreate them
8972 * based on what the tree scan found.
8974 static int fixup_extent_refs(struct btrfs_fs_info *info,
8975 struct cache_tree *extent_cache,
8976 struct extent_record *rec)
8978 struct btrfs_trans_handle *trans = NULL;
8980 struct btrfs_path path;
8981 struct list_head *cur = rec->backrefs.next;
8982 struct cache_extent *cache;
8983 struct extent_backref *back;
8987 if (rec->flag_block_full_backref)
8988 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8990 btrfs_init_path(&path);
8991 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8993 * Sometimes the backrefs themselves are so broken they don't
8994 * get attached to any meaningful rec, so first go back and
8995 * check any of our backrefs that we couldn't find and throw
8996 * them into the list if we find the backref so that
8997 * verify_backrefs can figure out what to do.
8999 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9004 /* step one, make sure all of the backrefs agree */
9005 ret = verify_backrefs(info, &path, rec);
9009 trans = btrfs_start_transaction(info->extent_root, 1);
9010 if (IS_ERR(trans)) {
9011 ret = PTR_ERR(trans);
9015 /* step two, delete all the existing records */
9016 ret = delete_extent_records(trans, info->extent_root, &path,
9022 /* was this block corrupt? If so, don't add references to it */
9023 cache = lookup_cache_extent(info->corrupt_blocks,
9024 rec->start, rec->max_size);
9030 /* step three, recreate all the refs we did find */
9031 while(cur != &rec->backrefs) {
9032 back = to_extent_backref(cur);
9036 * if we didn't find any references, don't create a
9039 if (!back->found_ref)
9042 rec->bad_full_backref = 0;
9043 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9051 int err = btrfs_commit_transaction(trans, info->extent_root);
9057 fprintf(stderr, "Repaired extent references for %llu\n",
9058 (unsigned long long)rec->start);
9060 btrfs_release_path(&path);
9064 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9065 struct extent_record *rec)
9067 struct btrfs_trans_handle *trans;
9068 struct btrfs_root *root = fs_info->extent_root;
9069 struct btrfs_path path;
9070 struct btrfs_extent_item *ei;
9071 struct btrfs_key key;
9075 key.objectid = rec->start;
9076 if (rec->metadata) {
9077 key.type = BTRFS_METADATA_ITEM_KEY;
9078 key.offset = rec->info_level;
9080 key.type = BTRFS_EXTENT_ITEM_KEY;
9081 key.offset = rec->max_size;
9084 trans = btrfs_start_transaction(root, 0);
9086 return PTR_ERR(trans);
9088 btrfs_init_path(&path);
9089 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9091 btrfs_release_path(&path);
9092 btrfs_commit_transaction(trans, root);
9095 fprintf(stderr, "Didn't find extent for %llu\n",
9096 (unsigned long long)rec->start);
9097 btrfs_release_path(&path);
9098 btrfs_commit_transaction(trans, root);
9102 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9103 struct btrfs_extent_item);
9104 flags = btrfs_extent_flags(path.nodes[0], ei);
9105 if (rec->flag_block_full_backref) {
9106 fprintf(stderr, "setting full backref on %llu\n",
9107 (unsigned long long)key.objectid);
9108 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9110 fprintf(stderr, "clearing full backref on %llu\n",
9111 (unsigned long long)key.objectid);
9112 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9114 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9115 btrfs_mark_buffer_dirty(path.nodes[0]);
9116 btrfs_release_path(&path);
9117 ret = btrfs_commit_transaction(trans, root);
9119 fprintf(stderr, "Repaired extent flags for %llu\n",
9120 (unsigned long long)rec->start);
9125 /* right now we only prune from the extent allocation tree */
9126 static int prune_one_block(struct btrfs_trans_handle *trans,
9127 struct btrfs_fs_info *info,
9128 struct btrfs_corrupt_block *corrupt)
9131 struct btrfs_path path;
9132 struct extent_buffer *eb;
9136 int level = corrupt->level + 1;
9138 btrfs_init_path(&path);
9140 /* we want to stop at the parent to our busted block */
9141 path.lowest_level = level;
9143 ret = btrfs_search_slot(trans, info->extent_root,
9144 &corrupt->key, &path, -1, 1);
9149 eb = path.nodes[level];
9156 * hopefully the search gave us the block we want to prune,
9157 * lets try that first
9159 slot = path.slots[level];
9160 found = btrfs_node_blockptr(eb, slot);
9161 if (found == corrupt->cache.start)
9164 nritems = btrfs_header_nritems(eb);
9166 /* the search failed, lets scan this node and hope we find it */
9167 for (slot = 0; slot < nritems; slot++) {
9168 found = btrfs_node_blockptr(eb, slot);
9169 if (found == corrupt->cache.start)
9173 * we couldn't find the bad block. TODO, search all the nodes for pointers
9176 if (eb == info->extent_root->node) {
9181 btrfs_release_path(&path);
9186 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9187 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9190 btrfs_release_path(&path);
9194 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9196 struct btrfs_trans_handle *trans = NULL;
9197 struct cache_extent *cache;
9198 struct btrfs_corrupt_block *corrupt;
9201 cache = search_cache_extent(info->corrupt_blocks, 0);
9205 trans = btrfs_start_transaction(info->extent_root, 1);
9207 return PTR_ERR(trans);
9209 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9210 prune_one_block(trans, info, corrupt);
9211 remove_cache_extent(info->corrupt_blocks, cache);
9214 return btrfs_commit_transaction(trans, info->extent_root);
9218 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9220 struct btrfs_block_group_cache *cache;
9225 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9226 &start, &end, EXTENT_DIRTY);
9229 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9234 cache = btrfs_lookup_first_block_group(fs_info, start);
9239 start = cache->key.objectid + cache->key.offset;
9243 static int check_extent_refs(struct btrfs_root *root,
9244 struct cache_tree *extent_cache)
9246 struct extent_record *rec;
9247 struct cache_extent *cache;
9253 * if we're doing a repair, we have to make sure
9254 * we don't allocate from the problem extents.
9255 * In the worst case, this will be all the
9258 cache = search_cache_extent(extent_cache, 0);
9260 rec = container_of(cache, struct extent_record, cache);
9261 set_extent_dirty(root->fs_info->excluded_extents,
9263 rec->start + rec->max_size - 1);
9264 cache = next_cache_extent(cache);
9267 /* pin down all the corrupted blocks too */
9268 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9270 set_extent_dirty(root->fs_info->excluded_extents,
9272 cache->start + cache->size - 1);
9273 cache = next_cache_extent(cache);
9275 prune_corrupt_blocks(root->fs_info);
9276 reset_cached_block_groups(root->fs_info);
9279 reset_cached_block_groups(root->fs_info);
9282 * We need to delete any duplicate entries we find first otherwise we
9283 * could mess up the extent tree when we have backrefs that actually
9284 * belong to a different extent item and not the weird duplicate one.
9286 while (repair && !list_empty(&duplicate_extents)) {
9287 rec = to_extent_record(duplicate_extents.next);
9288 list_del_init(&rec->list);
9290 /* Sometimes we can find a backref before we find an actual
9291 * extent, so we need to process it a little bit to see if there
9292 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9293 * if this is a backref screwup. If we need to delete stuff
9294 * process_duplicates() will return 0, otherwise it will return
9297 if (process_duplicates(extent_cache, rec))
9299 ret = delete_duplicate_records(root, rec);
9303 * delete_duplicate_records will return the number of entries
9304 * deleted, so if it's greater than 0 then we know we actually
9305 * did something and we need to remove.
9318 cache = search_cache_extent(extent_cache, 0);
9321 rec = container_of(cache, struct extent_record, cache);
9322 if (rec->num_duplicates) {
9323 fprintf(stderr, "extent item %llu has multiple extent "
9324 "items\n", (unsigned long long)rec->start);
9328 if (rec->refs != rec->extent_item_refs) {
9329 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9330 (unsigned long long)rec->start,
9331 (unsigned long long)rec->nr);
9332 fprintf(stderr, "extent item %llu, found %llu\n",
9333 (unsigned long long)rec->extent_item_refs,
9334 (unsigned long long)rec->refs);
9335 ret = record_orphan_data_extents(root->fs_info, rec);
9341 if (all_backpointers_checked(rec, 1)) {
9342 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9343 (unsigned long long)rec->start,
9344 (unsigned long long)rec->nr);
9348 if (!rec->owner_ref_checked) {
9349 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9350 (unsigned long long)rec->start,
9351 (unsigned long long)rec->nr);
9356 if (repair && fix) {
9357 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9363 if (rec->bad_full_backref) {
9364 fprintf(stderr, "bad full backref, on [%llu]\n",
9365 (unsigned long long)rec->start);
9367 ret = fixup_extent_flags(root->fs_info, rec);
9375 * Although it's not a extent ref's problem, we reuse this
9376 * routine for error reporting.
9377 * No repair function yet.
9379 if (rec->crossing_stripes) {
9381 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9382 rec->start, rec->start + rec->max_size);
9386 if (rec->wrong_chunk_type) {
9388 "bad extent [%llu, %llu), type mismatch with chunk\n",
9389 rec->start, rec->start + rec->max_size);
9393 remove_cache_extent(extent_cache, cache);
9394 free_all_extent_backrefs(rec);
9395 if (!init_extent_tree && repair && (!cur_err || fix))
9396 clear_extent_dirty(root->fs_info->excluded_extents,
9398 rec->start + rec->max_size - 1);
9403 if (ret && ret != -EAGAIN) {
9404 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9407 struct btrfs_trans_handle *trans;
9409 root = root->fs_info->extent_root;
9410 trans = btrfs_start_transaction(root, 1);
9411 if (IS_ERR(trans)) {
9412 ret = PTR_ERR(trans);
9416 btrfs_fix_block_accounting(trans, root);
9417 ret = btrfs_commit_transaction(trans, root);
9426 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9430 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9431 stripe_size = length;
9432 stripe_size /= num_stripes;
9433 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9434 stripe_size = length * 2;
9435 stripe_size /= num_stripes;
9436 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9437 stripe_size = length;
9438 stripe_size /= (num_stripes - 1);
9439 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9440 stripe_size = length;
9441 stripe_size /= (num_stripes - 2);
9443 stripe_size = length;
9449 * Check the chunk with its block group/dev list ref:
9450 * Return 0 if all refs seems valid.
9451 * Return 1 if part of refs seems valid, need later check for rebuild ref
9452 * like missing block group and needs to search extent tree to rebuild them.
9453 * Return -1 if essential refs are missing and unable to rebuild.
9455 static int check_chunk_refs(struct chunk_record *chunk_rec,
9456 struct block_group_tree *block_group_cache,
9457 struct device_extent_tree *dev_extent_cache,
9460 struct cache_extent *block_group_item;
9461 struct block_group_record *block_group_rec;
9462 struct cache_extent *dev_extent_item;
9463 struct device_extent_record *dev_extent_rec;
9467 int metadump_v2 = 0;
9471 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9474 if (block_group_item) {
9475 block_group_rec = container_of(block_group_item,
9476 struct block_group_record,
9478 if (chunk_rec->length != block_group_rec->offset ||
9479 chunk_rec->offset != block_group_rec->objectid ||
9481 chunk_rec->type_flags != block_group_rec->flags)) {
9484 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9485 chunk_rec->objectid,
9490 chunk_rec->type_flags,
9491 block_group_rec->objectid,
9492 block_group_rec->type,
9493 block_group_rec->offset,
9494 block_group_rec->offset,
9495 block_group_rec->objectid,
9496 block_group_rec->flags);
9499 list_del_init(&block_group_rec->list);
9500 chunk_rec->bg_rec = block_group_rec;
9505 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9506 chunk_rec->objectid,
9511 chunk_rec->type_flags);
9518 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9519 chunk_rec->num_stripes);
9520 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9521 devid = chunk_rec->stripes[i].devid;
9522 offset = chunk_rec->stripes[i].offset;
9523 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9524 devid, offset, length);
9525 if (dev_extent_item) {
9526 dev_extent_rec = container_of(dev_extent_item,
9527 struct device_extent_record,
9529 if (dev_extent_rec->objectid != devid ||
9530 dev_extent_rec->offset != offset ||
9531 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9532 dev_extent_rec->length != length) {
9535 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9536 chunk_rec->objectid,
9539 chunk_rec->stripes[i].devid,
9540 chunk_rec->stripes[i].offset,
9541 dev_extent_rec->objectid,
9542 dev_extent_rec->offset,
9543 dev_extent_rec->length);
9546 list_move(&dev_extent_rec->chunk_list,
9547 &chunk_rec->dextents);
9552 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9553 chunk_rec->objectid,
9556 chunk_rec->stripes[i].devid,
9557 chunk_rec->stripes[i].offset);
9564 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9565 int check_chunks(struct cache_tree *chunk_cache,
9566 struct block_group_tree *block_group_cache,
9567 struct device_extent_tree *dev_extent_cache,
9568 struct list_head *good, struct list_head *bad,
9569 struct list_head *rebuild, int silent)
9571 struct cache_extent *chunk_item;
9572 struct chunk_record *chunk_rec;
9573 struct block_group_record *bg_rec;
9574 struct device_extent_record *dext_rec;
9578 chunk_item = first_cache_extent(chunk_cache);
9579 while (chunk_item) {
9580 chunk_rec = container_of(chunk_item, struct chunk_record,
9582 err = check_chunk_refs(chunk_rec, block_group_cache,
9583 dev_extent_cache, silent);
9586 if (err == 0 && good)
9587 list_add_tail(&chunk_rec->list, good);
9588 if (err > 0 && rebuild)
9589 list_add_tail(&chunk_rec->list, rebuild);
9591 list_add_tail(&chunk_rec->list, bad);
9592 chunk_item = next_cache_extent(chunk_item);
9595 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9598 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9606 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9610 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9621 static int check_device_used(struct device_record *dev_rec,
9622 struct device_extent_tree *dext_cache)
9624 struct cache_extent *cache;
9625 struct device_extent_record *dev_extent_rec;
9628 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9630 dev_extent_rec = container_of(cache,
9631 struct device_extent_record,
9633 if (dev_extent_rec->objectid != dev_rec->devid)
9636 list_del_init(&dev_extent_rec->device_list);
9637 total_byte += dev_extent_rec->length;
9638 cache = next_cache_extent(cache);
9641 if (total_byte != dev_rec->byte_used) {
9643 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9644 total_byte, dev_rec->byte_used, dev_rec->objectid,
9645 dev_rec->type, dev_rec->offset);
9652 /* check btrfs_dev_item -> btrfs_dev_extent */
9653 static int check_devices(struct rb_root *dev_cache,
9654 struct device_extent_tree *dev_extent_cache)
9656 struct rb_node *dev_node;
9657 struct device_record *dev_rec;
9658 struct device_extent_record *dext_rec;
9662 dev_node = rb_first(dev_cache);
9664 dev_rec = container_of(dev_node, struct device_record, node);
9665 err = check_device_used(dev_rec, dev_extent_cache);
9669 dev_node = rb_next(dev_node);
9671 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9674 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9675 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9682 static int add_root_item_to_list(struct list_head *head,
9683 u64 objectid, u64 bytenr, u64 last_snapshot,
9684 u8 level, u8 drop_level,
9685 int level_size, struct btrfs_key *drop_key)
9688 struct root_item_record *ri_rec;
9689 ri_rec = malloc(sizeof(*ri_rec));
9692 ri_rec->bytenr = bytenr;
9693 ri_rec->objectid = objectid;
9694 ri_rec->level = level;
9695 ri_rec->level_size = level_size;
9696 ri_rec->drop_level = drop_level;
9697 ri_rec->last_snapshot = last_snapshot;
9699 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9700 list_add_tail(&ri_rec->list, head);
9705 static void free_root_item_list(struct list_head *list)
9707 struct root_item_record *ri_rec;
9709 while (!list_empty(list)) {
9710 ri_rec = list_first_entry(list, struct root_item_record,
9712 list_del_init(&ri_rec->list);
9717 static int deal_root_from_list(struct list_head *list,
9718 struct btrfs_root *root,
9719 struct block_info *bits,
9721 struct cache_tree *pending,
9722 struct cache_tree *seen,
9723 struct cache_tree *reada,
9724 struct cache_tree *nodes,
9725 struct cache_tree *extent_cache,
9726 struct cache_tree *chunk_cache,
9727 struct rb_root *dev_cache,
9728 struct block_group_tree *block_group_cache,
9729 struct device_extent_tree *dev_extent_cache)
9734 while (!list_empty(list)) {
9735 struct root_item_record *rec;
9736 struct extent_buffer *buf;
9737 rec = list_entry(list->next,
9738 struct root_item_record, list);
9740 buf = read_tree_block(root->fs_info->tree_root,
9741 rec->bytenr, rec->level_size, 0);
9742 if (!extent_buffer_uptodate(buf)) {
9743 free_extent_buffer(buf);
9747 ret = add_root_to_pending(buf, extent_cache, pending,
9748 seen, nodes, rec->objectid);
9752 * To rebuild extent tree, we need deal with snapshot
9753 * one by one, otherwise we deal with node firstly which
9754 * can maximize readahead.
9757 ret = run_next_block(root, bits, bits_nr, &last,
9758 pending, seen, reada, nodes,
9759 extent_cache, chunk_cache,
9760 dev_cache, block_group_cache,
9761 dev_extent_cache, rec);
9765 free_extent_buffer(buf);
9766 list_del(&rec->list);
9772 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9773 reada, nodes, extent_cache, chunk_cache,
9774 dev_cache, block_group_cache,
9775 dev_extent_cache, NULL);
9785 static int check_chunks_and_extents(struct btrfs_root *root)
9787 struct rb_root dev_cache;
9788 struct cache_tree chunk_cache;
9789 struct block_group_tree block_group_cache;
9790 struct device_extent_tree dev_extent_cache;
9791 struct cache_tree extent_cache;
9792 struct cache_tree seen;
9793 struct cache_tree pending;
9794 struct cache_tree reada;
9795 struct cache_tree nodes;
9796 struct extent_io_tree excluded_extents;
9797 struct cache_tree corrupt_blocks;
9798 struct btrfs_path path;
9799 struct btrfs_key key;
9800 struct btrfs_key found_key;
9802 struct block_info *bits;
9804 struct extent_buffer *leaf;
9806 struct btrfs_root_item ri;
9807 struct list_head dropping_trees;
9808 struct list_head normal_trees;
9809 struct btrfs_root *root1;
9814 dev_cache = RB_ROOT;
9815 cache_tree_init(&chunk_cache);
9816 block_group_tree_init(&block_group_cache);
9817 device_extent_tree_init(&dev_extent_cache);
9819 cache_tree_init(&extent_cache);
9820 cache_tree_init(&seen);
9821 cache_tree_init(&pending);
9822 cache_tree_init(&nodes);
9823 cache_tree_init(&reada);
9824 cache_tree_init(&corrupt_blocks);
9825 extent_io_tree_init(&excluded_extents);
9826 INIT_LIST_HEAD(&dropping_trees);
9827 INIT_LIST_HEAD(&normal_trees);
9830 root->fs_info->excluded_extents = &excluded_extents;
9831 root->fs_info->fsck_extent_cache = &extent_cache;
9832 root->fs_info->free_extent_hook = free_extent_hook;
9833 root->fs_info->corrupt_blocks = &corrupt_blocks;
9837 bits = malloc(bits_nr * sizeof(struct block_info));
9843 if (ctx.progress_enabled) {
9844 ctx.tp = TASK_EXTENTS;
9845 task_start(ctx.info);
9849 root1 = root->fs_info->tree_root;
9850 level = btrfs_header_level(root1->node);
9851 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9852 root1->node->start, 0, level, 0,
9853 root1->nodesize, NULL);
9856 root1 = root->fs_info->chunk_root;
9857 level = btrfs_header_level(root1->node);
9858 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9859 root1->node->start, 0, level, 0,
9860 root1->nodesize, NULL);
9863 btrfs_init_path(&path);
9866 key.type = BTRFS_ROOT_ITEM_KEY;
9867 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9872 leaf = path.nodes[0];
9873 slot = path.slots[0];
9874 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9875 ret = btrfs_next_leaf(root, &path);
9878 leaf = path.nodes[0];
9879 slot = path.slots[0];
9881 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9882 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9883 unsigned long offset;
9886 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9887 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9888 last_snapshot = btrfs_root_last_snapshot(&ri);
9889 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9890 level = btrfs_root_level(&ri);
9891 level_size = root->nodesize;
9892 ret = add_root_item_to_list(&normal_trees,
9894 btrfs_root_bytenr(&ri),
9895 last_snapshot, level,
9896 0, level_size, NULL);
9900 level = btrfs_root_level(&ri);
9901 level_size = root->nodesize;
9902 objectid = found_key.objectid;
9903 btrfs_disk_key_to_cpu(&found_key,
9905 ret = add_root_item_to_list(&dropping_trees,
9907 btrfs_root_bytenr(&ri),
9908 last_snapshot, level,
9910 level_size, &found_key);
9917 btrfs_release_path(&path);
9920 * check_block can return -EAGAIN if it fixes something, please keep
9921 * this in mind when dealing with return values from these functions, if
9922 * we get -EAGAIN we want to fall through and restart the loop.
9924 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9925 &seen, &reada, &nodes, &extent_cache,
9926 &chunk_cache, &dev_cache, &block_group_cache,
9933 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9934 &pending, &seen, &reada, &nodes,
9935 &extent_cache, &chunk_cache, &dev_cache,
9936 &block_group_cache, &dev_extent_cache);
9943 ret = check_chunks(&chunk_cache, &block_group_cache,
9944 &dev_extent_cache, NULL, NULL, NULL, 0);
9951 ret = check_extent_refs(root, &extent_cache);
9958 ret = check_devices(&dev_cache, &dev_extent_cache);
9963 task_stop(ctx.info);
9965 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9966 extent_io_tree_cleanup(&excluded_extents);
9967 root->fs_info->fsck_extent_cache = NULL;
9968 root->fs_info->free_extent_hook = NULL;
9969 root->fs_info->corrupt_blocks = NULL;
9970 root->fs_info->excluded_extents = NULL;
9973 free_chunk_cache_tree(&chunk_cache);
9974 free_device_cache_tree(&dev_cache);
9975 free_block_group_tree(&block_group_cache);
9976 free_device_extent_tree(&dev_extent_cache);
9977 free_extent_cache_tree(&seen);
9978 free_extent_cache_tree(&pending);
9979 free_extent_cache_tree(&reada);
9980 free_extent_cache_tree(&nodes);
9981 free_root_item_list(&normal_trees);
9982 free_root_item_list(&dropping_trees);
9985 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9986 free_extent_cache_tree(&seen);
9987 free_extent_cache_tree(&pending);
9988 free_extent_cache_tree(&reada);
9989 free_extent_cache_tree(&nodes);
9990 free_chunk_cache_tree(&chunk_cache);
9991 free_block_group_tree(&block_group_cache);
9992 free_device_cache_tree(&dev_cache);
9993 free_device_extent_tree(&dev_extent_cache);
9994 free_extent_record_cache(&extent_cache);
9995 free_root_item_list(&normal_trees);
9996 free_root_item_list(&dropping_trees);
9997 extent_io_tree_cleanup(&excluded_extents);
10002 * Check backrefs of a tree block given by @bytenr or @eb.
10004 * @root: the root containing the @bytenr or @eb
10005 * @eb: tree block extent buffer, can be NULL
10006 * @bytenr: bytenr of the tree block to search
10007 * @level: tree level of the tree block
10008 * @owner: owner of the tree block
10010 * Return >0 for any error found and output error message
10011 * Return 0 for no error found
10013 static int check_tree_block_ref(struct btrfs_root *root,
10014 struct extent_buffer *eb, u64 bytenr,
10015 int level, u64 owner)
10017 struct btrfs_key key;
10018 struct btrfs_root *extent_root = root->fs_info->extent_root;
10019 struct btrfs_path path;
10020 struct btrfs_extent_item *ei;
10021 struct btrfs_extent_inline_ref *iref;
10022 struct extent_buffer *leaf;
10028 u32 nodesize = root->nodesize;
10031 int tree_reloc_root = 0;
10036 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10037 btrfs_header_bytenr(root->node) == bytenr)
10038 tree_reloc_root = 1;
10040 btrfs_init_path(&path);
10041 key.objectid = bytenr;
10042 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10043 key.type = BTRFS_METADATA_ITEM_KEY;
10045 key.type = BTRFS_EXTENT_ITEM_KEY;
10046 key.offset = (u64)-1;
10048 /* Search for the backref in extent tree */
10049 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10051 err |= BACKREF_MISSING;
10054 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10056 err |= BACKREF_MISSING;
10060 leaf = path.nodes[0];
10061 slot = path.slots[0];
10062 btrfs_item_key_to_cpu(leaf, &key, slot);
10064 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10066 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10067 skinny_level = (int)key.offset;
10068 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10070 struct btrfs_tree_block_info *info;
10072 info = (struct btrfs_tree_block_info *)(ei + 1);
10073 skinny_level = btrfs_tree_block_level(leaf, info);
10074 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10081 if (!(btrfs_extent_flags(leaf, ei) &
10082 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10084 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10085 key.objectid, nodesize,
10086 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10087 err = BACKREF_MISMATCH;
10089 header_gen = btrfs_header_generation(eb);
10090 extent_gen = btrfs_extent_generation(leaf, ei);
10091 if (header_gen != extent_gen) {
10093 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10094 key.objectid, nodesize, header_gen,
10096 err = BACKREF_MISMATCH;
10098 if (level != skinny_level) {
10100 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10101 key.objectid, nodesize, level, skinny_level);
10102 err = BACKREF_MISMATCH;
10104 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10106 "extent[%llu %u] is referred by other roots than %llu",
10107 key.objectid, nodesize, root->objectid);
10108 err = BACKREF_MISMATCH;
10113 * Iterate the extent/metadata item to find the exact backref
10115 item_size = btrfs_item_size_nr(leaf, slot);
10116 ptr = (unsigned long)iref;
10117 end = (unsigned long)ei + item_size;
10118 while (ptr < end) {
10119 iref = (struct btrfs_extent_inline_ref *)ptr;
10120 type = btrfs_extent_inline_ref_type(leaf, iref);
10121 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10123 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10124 (offset == root->objectid || offset == owner)) {
10126 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10128 * Backref of tree reloc root points to itself, no need
10129 * to check backref any more.
10131 if (tree_reloc_root)
10134 /* Check if the backref points to valid referencer */
10135 found_ref = !check_tree_block_ref(root, NULL,
10136 offset, level + 1, owner);
10141 ptr += btrfs_extent_inline_ref_size(type);
10145 * Inlined extent item doesn't have what we need, check
10146 * TREE_BLOCK_REF_KEY
10149 btrfs_release_path(&path);
10150 key.objectid = bytenr;
10151 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10152 key.offset = root->objectid;
10154 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10159 err |= BACKREF_MISSING;
10161 btrfs_release_path(&path);
10162 if (eb && (err & BACKREF_MISSING))
10163 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10164 bytenr, nodesize, owner, level);
10169 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10171 * Return >0 any error found and output error message
10172 * Return 0 for no error found
10174 static int check_extent_data_item(struct btrfs_root *root,
10175 struct extent_buffer *eb, int slot)
10177 struct btrfs_file_extent_item *fi;
10178 struct btrfs_path path;
10179 struct btrfs_root *extent_root = root->fs_info->extent_root;
10180 struct btrfs_key fi_key;
10181 struct btrfs_key dbref_key;
10182 struct extent_buffer *leaf;
10183 struct btrfs_extent_item *ei;
10184 struct btrfs_extent_inline_ref *iref;
10185 struct btrfs_extent_data_ref *dref;
10188 u64 disk_num_bytes;
10189 u64 extent_num_bytes;
10196 int found_dbackref = 0;
10200 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10201 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10203 /* Nothing to check for hole and inline data extents */
10204 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10205 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10208 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10209 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10210 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10212 /* Check unaligned disk_num_bytes and num_bytes */
10213 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10215 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10216 fi_key.objectid, fi_key.offset, disk_num_bytes,
10218 err |= BYTES_UNALIGNED;
10220 data_bytes_allocated += disk_num_bytes;
10222 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10224 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10225 fi_key.objectid, fi_key.offset, extent_num_bytes,
10227 err |= BYTES_UNALIGNED;
10229 data_bytes_referenced += extent_num_bytes;
10231 owner = btrfs_header_owner(eb);
10233 /* Check the extent item of the file extent in extent tree */
10234 btrfs_init_path(&path);
10235 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10236 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10237 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10239 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10243 leaf = path.nodes[0];
10244 slot = path.slots[0];
10245 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10247 extent_flags = btrfs_extent_flags(leaf, ei);
10249 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10251 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10252 disk_bytenr, disk_num_bytes,
10253 BTRFS_EXTENT_FLAG_DATA);
10254 err |= BACKREF_MISMATCH;
10257 /* Check data backref inside that extent item */
10258 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10259 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10260 ptr = (unsigned long)iref;
10261 end = (unsigned long)ei + item_size;
10262 while (ptr < end) {
10263 iref = (struct btrfs_extent_inline_ref *)ptr;
10264 type = btrfs_extent_inline_ref_type(leaf, iref);
10265 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10267 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10268 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10269 if (ref_root == owner || ref_root == root->objectid)
10270 found_dbackref = 1;
10271 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10272 found_dbackref = !check_tree_block_ref(root, NULL,
10273 btrfs_extent_inline_ref_offset(leaf, iref),
10277 if (found_dbackref)
10279 ptr += btrfs_extent_inline_ref_size(type);
10282 if (!found_dbackref) {
10283 btrfs_release_path(&path);
10285 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10286 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10287 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10288 dbref_key.offset = hash_extent_data_ref(root->objectid,
10289 fi_key.objectid, fi_key.offset);
10291 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10292 &dbref_key, &path, 0, 0);
10294 found_dbackref = 1;
10298 btrfs_release_path(&path);
10301 * Neither inlined nor EXTENT_DATA_REF found, try
10302 * SHARED_DATA_REF as last chance.
10304 dbref_key.objectid = disk_bytenr;
10305 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10306 dbref_key.offset = eb->start;
10308 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10309 &dbref_key, &path, 0, 0);
10311 found_dbackref = 1;
10317 if (!found_dbackref)
10318 err |= BACKREF_MISSING;
10319 btrfs_release_path(&path);
10320 if (err & BACKREF_MISSING) {
10321 error("data extent[%llu %llu] backref lost",
10322 disk_bytenr, disk_num_bytes);
10328 * Get real tree block level for the case like shared block
10329 * Return >= 0 as tree level
10330 * Return <0 for error
10332 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10334 struct extent_buffer *eb;
10335 struct btrfs_path path;
10336 struct btrfs_key key;
10337 struct btrfs_extent_item *ei;
10340 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10345 /* Search extent tree for extent generation and level */
10346 key.objectid = bytenr;
10347 key.type = BTRFS_METADATA_ITEM_KEY;
10348 key.offset = (u64)-1;
10350 btrfs_init_path(&path);
10351 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10354 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10362 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10363 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10364 struct btrfs_extent_item);
10365 flags = btrfs_extent_flags(path.nodes[0], ei);
10366 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10371 /* Get transid for later read_tree_block() check */
10372 transid = btrfs_extent_generation(path.nodes[0], ei);
10374 /* Get backref level as one source */
10375 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10376 backref_level = key.offset;
10378 struct btrfs_tree_block_info *info;
10380 info = (struct btrfs_tree_block_info *)(ei + 1);
10381 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10383 btrfs_release_path(&path);
10385 /* Get level from tree block as an alternative source */
10386 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10387 if (!extent_buffer_uptodate(eb)) {
10388 free_extent_buffer(eb);
10391 header_level = btrfs_header_level(eb);
10392 free_extent_buffer(eb);
10394 if (header_level != backref_level)
10396 return header_level;
10399 btrfs_release_path(&path);
10404 * Check if a tree block backref is valid (points to a valid tree block)
10405 * if level == -1, level will be resolved
10406 * Return >0 for any error found and print error message
10408 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10409 u64 bytenr, int level)
10411 struct btrfs_root *root;
10412 struct btrfs_key key;
10413 struct btrfs_path path;
10414 struct extent_buffer *eb;
10415 struct extent_buffer *node;
10416 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10420 /* Query level for level == -1 special case */
10422 level = query_tree_block_level(fs_info, bytenr);
10424 err |= REFERENCER_MISSING;
10428 key.objectid = root_id;
10429 key.type = BTRFS_ROOT_ITEM_KEY;
10430 key.offset = (u64)-1;
10432 root = btrfs_read_fs_root(fs_info, &key);
10433 if (IS_ERR(root)) {
10434 err |= REFERENCER_MISSING;
10438 /* Read out the tree block to get item/node key */
10439 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10440 if (!extent_buffer_uptodate(eb)) {
10441 err |= REFERENCER_MISSING;
10442 free_extent_buffer(eb);
10446 /* Empty tree, no need to check key */
10447 if (!btrfs_header_nritems(eb) && !level) {
10448 free_extent_buffer(eb);
10453 btrfs_node_key_to_cpu(eb, &key, 0);
10455 btrfs_item_key_to_cpu(eb, &key, 0);
10457 free_extent_buffer(eb);
10459 btrfs_init_path(&path);
10460 path.lowest_level = level;
10461 /* Search with the first key, to ensure we can reach it */
10462 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10464 err |= REFERENCER_MISSING;
10468 node = path.nodes[level];
10469 if (btrfs_header_bytenr(node) != bytenr) {
10471 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10472 bytenr, nodesize, bytenr,
10473 btrfs_header_bytenr(node));
10474 err |= REFERENCER_MISMATCH;
10476 if (btrfs_header_level(node) != level) {
10478 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10479 bytenr, nodesize, level,
10480 btrfs_header_level(node));
10481 err |= REFERENCER_MISMATCH;
10485 btrfs_release_path(&path);
10487 if (err & REFERENCER_MISSING) {
10489 error("extent [%llu %d] lost referencer (owner: %llu)",
10490 bytenr, nodesize, root_id);
10493 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10494 bytenr, nodesize, root_id, level);
10501 * Check if tree block @eb is tree reloc root.
10502 * Return 0 if it's not or any problem happens
10503 * Return 1 if it's a tree reloc root
10505 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10506 struct extent_buffer *eb)
10508 struct btrfs_root *tree_reloc_root;
10509 struct btrfs_key key;
10510 u64 bytenr = btrfs_header_bytenr(eb);
10511 u64 owner = btrfs_header_owner(eb);
10514 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10515 key.offset = owner;
10516 key.type = BTRFS_ROOT_ITEM_KEY;
10518 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10519 if (IS_ERR(tree_reloc_root))
10522 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10524 btrfs_free_fs_root(tree_reloc_root);
10529 * Check referencer for shared block backref
10530 * If level == -1, this function will resolve the level.
10532 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10533 u64 parent, u64 bytenr, int level)
10535 struct extent_buffer *eb;
10536 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10538 int found_parent = 0;
10541 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10542 if (!extent_buffer_uptodate(eb))
10546 level = query_tree_block_level(fs_info, bytenr);
10550 /* It's possible it's a tree reloc root */
10551 if (parent == bytenr) {
10552 if (is_tree_reloc_root(fs_info, eb))
10557 if (level + 1 != btrfs_header_level(eb))
10560 nr = btrfs_header_nritems(eb);
10561 for (i = 0; i < nr; i++) {
10562 if (bytenr == btrfs_node_blockptr(eb, i)) {
10568 free_extent_buffer(eb);
10569 if (!found_parent) {
10571 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10572 bytenr, nodesize, parent, level);
10573 return REFERENCER_MISSING;
10579 * Check referencer for normal (inlined) data ref
10580 * If len == 0, it will be resolved by searching in extent tree
10582 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10583 u64 root_id, u64 objectid, u64 offset,
10584 u64 bytenr, u64 len, u32 count)
10586 struct btrfs_root *root;
10587 struct btrfs_root *extent_root = fs_info->extent_root;
10588 struct btrfs_key key;
10589 struct btrfs_path path;
10590 struct extent_buffer *leaf;
10591 struct btrfs_file_extent_item *fi;
10592 u32 found_count = 0;
10597 key.objectid = bytenr;
10598 key.type = BTRFS_EXTENT_ITEM_KEY;
10599 key.offset = (u64)-1;
10601 btrfs_init_path(&path);
10602 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10605 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10608 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10609 if (key.objectid != bytenr ||
10610 key.type != BTRFS_EXTENT_ITEM_KEY)
10613 btrfs_release_path(&path);
10615 key.objectid = root_id;
10616 key.type = BTRFS_ROOT_ITEM_KEY;
10617 key.offset = (u64)-1;
10618 btrfs_init_path(&path);
10620 root = btrfs_read_fs_root(fs_info, &key);
10624 key.objectid = objectid;
10625 key.type = BTRFS_EXTENT_DATA_KEY;
10627 * It can be nasty as data backref offset is
10628 * file offset - file extent offset, which is smaller or
10629 * equal to original backref offset. The only special case is
10630 * overflow. So we need to special check and do further search.
10632 key.offset = offset & (1ULL << 63) ? 0 : offset;
10634 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10639 * Search afterwards to get correct one
10640 * NOTE: As we must do a comprehensive check on the data backref to
10641 * make sure the dref count also matches, we must iterate all file
10642 * extents for that inode.
10645 leaf = path.nodes[0];
10646 slot = path.slots[0];
10648 if (slot >= btrfs_header_nritems(leaf))
10650 btrfs_item_key_to_cpu(leaf, &key, slot);
10651 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10653 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10655 * Except normal disk bytenr and disk num bytes, we still
10656 * need to do extra check on dbackref offset as
10657 * dbackref offset = file_offset - file_extent_offset
10659 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10660 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10661 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10666 ret = btrfs_next_item(root, &path);
10671 btrfs_release_path(&path);
10672 if (found_count != count) {
10674 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10675 bytenr, len, root_id, objectid, offset, count, found_count);
10676 return REFERENCER_MISSING;
10682 * Check if the referencer of a shared data backref exists
10684 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10685 u64 parent, u64 bytenr)
10687 struct extent_buffer *eb;
10688 struct btrfs_key key;
10689 struct btrfs_file_extent_item *fi;
10690 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10692 int found_parent = 0;
10695 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10696 if (!extent_buffer_uptodate(eb))
10699 nr = btrfs_header_nritems(eb);
10700 for (i = 0; i < nr; i++) {
10701 btrfs_item_key_to_cpu(eb, &key, i);
10702 if (key.type != BTRFS_EXTENT_DATA_KEY)
10705 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10706 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10709 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10716 free_extent_buffer(eb);
10717 if (!found_parent) {
10718 error("shared extent %llu referencer lost (parent: %llu)",
10720 return REFERENCER_MISSING;
10726 * This function will check a given extent item, including its backref and
10727 * itself (like crossing stripe boundary and type)
10729 * Since we don't use extent_record anymore, introduce new error bit
10731 static int check_extent_item(struct btrfs_fs_info *fs_info,
10732 struct extent_buffer *eb, int slot)
10734 struct btrfs_extent_item *ei;
10735 struct btrfs_extent_inline_ref *iref;
10736 struct btrfs_extent_data_ref *dref;
10740 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10741 u32 item_size = btrfs_item_size_nr(eb, slot);
10746 struct btrfs_key key;
10750 btrfs_item_key_to_cpu(eb, &key, slot);
10751 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10752 bytes_used += key.offset;
10754 bytes_used += nodesize;
10756 if (item_size < sizeof(*ei)) {
10758 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10759 * old thing when on disk format is still un-determined.
10760 * No need to care about it anymore
10762 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10766 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10767 flags = btrfs_extent_flags(eb, ei);
10769 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10771 if (metadata && check_crossing_stripes(global_info, key.objectid,
10773 error("bad metadata [%llu, %llu) crossing stripe boundary",
10774 key.objectid, key.objectid + nodesize);
10775 err |= CROSSING_STRIPE_BOUNDARY;
10778 ptr = (unsigned long)(ei + 1);
10780 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10781 /* Old EXTENT_ITEM metadata */
10782 struct btrfs_tree_block_info *info;
10784 info = (struct btrfs_tree_block_info *)ptr;
10785 level = btrfs_tree_block_level(eb, info);
10786 ptr += sizeof(struct btrfs_tree_block_info);
10788 /* New METADATA_ITEM */
10789 level = key.offset;
10791 end = (unsigned long)ei + item_size;
10794 /* Reached extent item end normally */
10798 /* Beyond extent item end, wrong item size */
10800 err |= ITEM_SIZE_MISMATCH;
10801 error("extent item at bytenr %llu slot %d has wrong size",
10806 /* Now check every backref in this extent item */
10807 iref = (struct btrfs_extent_inline_ref *)ptr;
10808 type = btrfs_extent_inline_ref_type(eb, iref);
10809 offset = btrfs_extent_inline_ref_offset(eb, iref);
10811 case BTRFS_TREE_BLOCK_REF_KEY:
10812 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10816 case BTRFS_SHARED_BLOCK_REF_KEY:
10817 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10821 case BTRFS_EXTENT_DATA_REF_KEY:
10822 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10823 ret = check_extent_data_backref(fs_info,
10824 btrfs_extent_data_ref_root(eb, dref),
10825 btrfs_extent_data_ref_objectid(eb, dref),
10826 btrfs_extent_data_ref_offset(eb, dref),
10827 key.objectid, key.offset,
10828 btrfs_extent_data_ref_count(eb, dref));
10831 case BTRFS_SHARED_DATA_REF_KEY:
10832 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10836 error("extent[%llu %d %llu] has unknown ref type: %d",
10837 key.objectid, key.type, key.offset, type);
10838 err |= UNKNOWN_TYPE;
10842 ptr += btrfs_extent_inline_ref_size(type);
10850 * Check if a dev extent item is referred correctly by its chunk
10852 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10853 struct extent_buffer *eb, int slot)
10855 struct btrfs_root *chunk_root = fs_info->chunk_root;
10856 struct btrfs_dev_extent *ptr;
10857 struct btrfs_path path;
10858 struct btrfs_key chunk_key;
10859 struct btrfs_key devext_key;
10860 struct btrfs_chunk *chunk;
10861 struct extent_buffer *l;
10865 int found_chunk = 0;
10868 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10869 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10870 length = btrfs_dev_extent_length(eb, ptr);
10872 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10873 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10874 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10876 btrfs_init_path(&path);
10877 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10882 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10883 if (btrfs_chunk_length(l, chunk) != length)
10886 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10887 for (i = 0; i < num_stripes; i++) {
10888 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10889 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10891 if (devid == devext_key.objectid &&
10892 offset == devext_key.offset) {
10898 btrfs_release_path(&path);
10899 if (!found_chunk) {
10901 "device extent[%llu, %llu, %llu] did not find the related chunk",
10902 devext_key.objectid, devext_key.offset, length);
10903 return REFERENCER_MISSING;
10909 * Check if the used space is correct with the dev item
10911 static int check_dev_item(struct btrfs_fs_info *fs_info,
10912 struct extent_buffer *eb, int slot)
10914 struct btrfs_root *dev_root = fs_info->dev_root;
10915 struct btrfs_dev_item *dev_item;
10916 struct btrfs_path path;
10917 struct btrfs_key key;
10918 struct btrfs_dev_extent *ptr;
10924 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10925 dev_id = btrfs_device_id(eb, dev_item);
10926 used = btrfs_device_bytes_used(eb, dev_item);
10928 key.objectid = dev_id;
10929 key.type = BTRFS_DEV_EXTENT_KEY;
10932 btrfs_init_path(&path);
10933 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10935 btrfs_item_key_to_cpu(eb, &key, slot);
10936 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10937 key.objectid, key.type, key.offset);
10938 btrfs_release_path(&path);
10939 return REFERENCER_MISSING;
10942 /* Iterate dev_extents to calculate the used space of a device */
10944 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10947 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10948 if (key.objectid > dev_id)
10950 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10953 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10954 struct btrfs_dev_extent);
10955 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10957 ret = btrfs_next_item(dev_root, &path);
10961 btrfs_release_path(&path);
10963 if (used != total) {
10964 btrfs_item_key_to_cpu(eb, &key, slot);
10966 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10967 total, used, BTRFS_ROOT_TREE_OBJECTID,
10968 BTRFS_DEV_EXTENT_KEY, dev_id);
10969 return ACCOUNTING_MISMATCH;
10975 * Check a block group item with its referener (chunk) and its used space
10976 * with extent/metadata item
10978 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10979 struct extent_buffer *eb, int slot)
10981 struct btrfs_root *extent_root = fs_info->extent_root;
10982 struct btrfs_root *chunk_root = fs_info->chunk_root;
10983 struct btrfs_block_group_item *bi;
10984 struct btrfs_block_group_item bg_item;
10985 struct btrfs_path path;
10986 struct btrfs_key bg_key;
10987 struct btrfs_key chunk_key;
10988 struct btrfs_key extent_key;
10989 struct btrfs_chunk *chunk;
10990 struct extent_buffer *leaf;
10991 struct btrfs_extent_item *ei;
10992 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11000 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11001 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11002 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11003 used = btrfs_block_group_used(&bg_item);
11004 bg_flags = btrfs_block_group_flags(&bg_item);
11006 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11007 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11008 chunk_key.offset = bg_key.objectid;
11010 btrfs_init_path(&path);
11011 /* Search for the referencer chunk */
11012 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11015 "block group[%llu %llu] did not find the related chunk item",
11016 bg_key.objectid, bg_key.offset);
11017 err |= REFERENCER_MISSING;
11019 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11020 struct btrfs_chunk);
11021 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11024 "block group[%llu %llu] related chunk item length does not match",
11025 bg_key.objectid, bg_key.offset);
11026 err |= REFERENCER_MISMATCH;
11029 btrfs_release_path(&path);
11031 /* Search from the block group bytenr */
11032 extent_key.objectid = bg_key.objectid;
11033 extent_key.type = 0;
11034 extent_key.offset = 0;
11036 btrfs_init_path(&path);
11037 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11041 /* Iterate extent tree to account used space */
11043 leaf = path.nodes[0];
11045 /* Search slot can point to the last item beyond leaf nritems */
11046 if (path.slots[0] >= btrfs_header_nritems(leaf))
11049 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11050 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11053 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11054 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11056 if (extent_key.objectid < bg_key.objectid)
11059 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11062 total += extent_key.offset;
11064 ei = btrfs_item_ptr(leaf, path.slots[0],
11065 struct btrfs_extent_item);
11066 flags = btrfs_extent_flags(leaf, ei);
11067 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11068 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11070 "bad extent[%llu, %llu) type mismatch with chunk",
11071 extent_key.objectid,
11072 extent_key.objectid + extent_key.offset);
11073 err |= CHUNK_TYPE_MISMATCH;
11075 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11076 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11077 BTRFS_BLOCK_GROUP_METADATA))) {
11079 "bad extent[%llu, %llu) type mismatch with chunk",
11080 extent_key.objectid,
11081 extent_key.objectid + nodesize);
11082 err |= CHUNK_TYPE_MISMATCH;
11086 ret = btrfs_next_item(extent_root, &path);
11092 btrfs_release_path(&path);
11094 if (total != used) {
11096 "block group[%llu %llu] used %llu but extent items used %llu",
11097 bg_key.objectid, bg_key.offset, used, total);
11098 err |= ACCOUNTING_MISMATCH;
11104 * Check a chunk item.
11105 * Including checking all referred dev_extents and block group
11107 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11108 struct extent_buffer *eb, int slot)
11110 struct btrfs_root *extent_root = fs_info->extent_root;
11111 struct btrfs_root *dev_root = fs_info->dev_root;
11112 struct btrfs_path path;
11113 struct btrfs_key chunk_key;
11114 struct btrfs_key bg_key;
11115 struct btrfs_key devext_key;
11116 struct btrfs_chunk *chunk;
11117 struct extent_buffer *leaf;
11118 struct btrfs_block_group_item *bi;
11119 struct btrfs_block_group_item bg_item;
11120 struct btrfs_dev_extent *ptr;
11121 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11133 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11134 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11135 length = btrfs_chunk_length(eb, chunk);
11136 chunk_end = chunk_key.offset + length;
11137 if (!IS_ALIGNED(length, sectorsize)) {
11138 error("chunk[%llu %llu) not aligned to %u",
11139 chunk_key.offset, chunk_end, sectorsize);
11140 err |= BYTES_UNALIGNED;
11144 type = btrfs_chunk_type(eb, chunk);
11145 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11146 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11147 error("chunk[%llu %llu) has no chunk type",
11148 chunk_key.offset, chunk_end);
11149 err |= UNKNOWN_TYPE;
11151 if (profile && (profile & (profile - 1))) {
11152 error("chunk[%llu %llu) multiple profiles detected: %llx",
11153 chunk_key.offset, chunk_end, profile);
11154 err |= UNKNOWN_TYPE;
11157 bg_key.objectid = chunk_key.offset;
11158 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11159 bg_key.offset = length;
11161 btrfs_init_path(&path);
11162 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11165 "chunk[%llu %llu) did not find the related block group item",
11166 chunk_key.offset, chunk_end);
11167 err |= REFERENCER_MISSING;
11169 leaf = path.nodes[0];
11170 bi = btrfs_item_ptr(leaf, path.slots[0],
11171 struct btrfs_block_group_item);
11172 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11174 if (btrfs_block_group_flags(&bg_item) != type) {
11176 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11177 chunk_key.offset, chunk_end, type,
11178 btrfs_block_group_flags(&bg_item));
11179 err |= REFERENCER_MISSING;
11183 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11184 for (i = 0; i < num_stripes; i++) {
11185 btrfs_release_path(&path);
11186 btrfs_init_path(&path);
11187 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11188 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11189 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11191 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11194 goto not_match_dev;
11196 leaf = path.nodes[0];
11197 ptr = btrfs_item_ptr(leaf, path.slots[0],
11198 struct btrfs_dev_extent);
11199 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11200 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11201 if (objectid != chunk_key.objectid ||
11202 offset != chunk_key.offset ||
11203 btrfs_dev_extent_length(leaf, ptr) != length)
11204 goto not_match_dev;
11207 err |= BACKREF_MISSING;
11209 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11210 chunk_key.objectid, chunk_end, i);
11213 btrfs_release_path(&path);
11219 * Main entry function to check known items and update related accounting info
11221 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11223 struct btrfs_fs_info *fs_info = root->fs_info;
11224 struct btrfs_key key;
11227 struct btrfs_extent_data_ref *dref;
11232 btrfs_item_key_to_cpu(eb, &key, slot);
11236 case BTRFS_EXTENT_DATA_KEY:
11237 ret = check_extent_data_item(root, eb, slot);
11240 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11241 ret = check_block_group_item(fs_info, eb, slot);
11244 case BTRFS_DEV_ITEM_KEY:
11245 ret = check_dev_item(fs_info, eb, slot);
11248 case BTRFS_CHUNK_ITEM_KEY:
11249 ret = check_chunk_item(fs_info, eb, slot);
11252 case BTRFS_DEV_EXTENT_KEY:
11253 ret = check_dev_extent_item(fs_info, eb, slot);
11256 case BTRFS_EXTENT_ITEM_KEY:
11257 case BTRFS_METADATA_ITEM_KEY:
11258 ret = check_extent_item(fs_info, eb, slot);
11261 case BTRFS_EXTENT_CSUM_KEY:
11262 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11264 case BTRFS_TREE_BLOCK_REF_KEY:
11265 ret = check_tree_block_backref(fs_info, key.offset,
11269 case BTRFS_EXTENT_DATA_REF_KEY:
11270 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11271 ret = check_extent_data_backref(fs_info,
11272 btrfs_extent_data_ref_root(eb, dref),
11273 btrfs_extent_data_ref_objectid(eb, dref),
11274 btrfs_extent_data_ref_offset(eb, dref),
11276 btrfs_extent_data_ref_count(eb, dref));
11279 case BTRFS_SHARED_BLOCK_REF_KEY:
11280 ret = check_shared_block_backref(fs_info, key.offset,
11284 case BTRFS_SHARED_DATA_REF_KEY:
11285 ret = check_shared_data_backref(fs_info, key.offset,
11293 if (++slot < btrfs_header_nritems(eb))
11300 * Helper function for later fs/subvol tree check. To determine if a tree
11301 * block should be checked.
11302 * This function will ensure only the direct referencer with lowest rootid to
11303 * check a fs/subvolume tree block.
11305 * Backref check at extent tree would detect errors like missing subvolume
11306 * tree, so we can do aggressive check to reduce duplicated checks.
11308 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11310 struct btrfs_root *extent_root = root->fs_info->extent_root;
11311 struct btrfs_key key;
11312 struct btrfs_path path;
11313 struct extent_buffer *leaf;
11315 struct btrfs_extent_item *ei;
11321 struct btrfs_extent_inline_ref *iref;
11324 btrfs_init_path(&path);
11325 key.objectid = btrfs_header_bytenr(eb);
11326 key.type = BTRFS_METADATA_ITEM_KEY;
11327 key.offset = (u64)-1;
11330 * Any failure in backref resolving means we can't determine
11331 * whom the tree block belongs to.
11332 * So in that case, we need to check that tree block
11334 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11338 ret = btrfs_previous_extent_item(extent_root, &path,
11339 btrfs_header_bytenr(eb));
11343 leaf = path.nodes[0];
11344 slot = path.slots[0];
11345 btrfs_item_key_to_cpu(leaf, &key, slot);
11346 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11348 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11349 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11351 struct btrfs_tree_block_info *info;
11353 info = (struct btrfs_tree_block_info *)(ei + 1);
11354 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11357 item_size = btrfs_item_size_nr(leaf, slot);
11358 ptr = (unsigned long)iref;
11359 end = (unsigned long)ei + item_size;
11360 while (ptr < end) {
11361 iref = (struct btrfs_extent_inline_ref *)ptr;
11362 type = btrfs_extent_inline_ref_type(leaf, iref);
11363 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11366 * We only check the tree block if current root is
11367 * the lowest referencer of it.
11369 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11370 offset < root->objectid) {
11371 btrfs_release_path(&path);
11375 ptr += btrfs_extent_inline_ref_size(type);
11378 * Normally we should also check keyed tree block ref, but that may be
11379 * very time consuming. Inlined ref should already make us skip a lot
11380 * of refs now. So skip search keyed tree block ref.
11384 btrfs_release_path(&path);
11389 * Traversal function for tree block. We will do:
11390 * 1) Skip shared fs/subvolume tree blocks
11391 * 2) Update related bytes accounting
11392 * 3) Pre-order traversal
11394 static int traverse_tree_block(struct btrfs_root *root,
11395 struct extent_buffer *node)
11397 struct extent_buffer *eb;
11398 struct btrfs_key key;
11399 struct btrfs_key drop_key;
11407 * Skip shared fs/subvolume tree block, in that case they will
11408 * be checked by referencer with lowest rootid
11410 if (is_fstree(root->objectid) && !should_check(root, node))
11413 /* Update bytes accounting */
11414 total_btree_bytes += node->len;
11415 if (fs_root_objectid(btrfs_header_owner(node)))
11416 total_fs_tree_bytes += node->len;
11417 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11418 total_extent_tree_bytes += node->len;
11419 if (!found_old_backref &&
11420 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11421 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11422 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11423 found_old_backref = 1;
11425 /* pre-order tranversal, check itself first */
11426 level = btrfs_header_level(node);
11427 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11428 btrfs_header_level(node),
11429 btrfs_header_owner(node));
11433 "check %s failed root %llu bytenr %llu level %d, force continue check",
11434 level ? "node":"leaf", root->objectid,
11435 btrfs_header_bytenr(node), btrfs_header_level(node));
11438 btree_space_waste += btrfs_leaf_free_space(root, node);
11439 ret = check_leaf_items(root, node);
11444 nr = btrfs_header_nritems(node);
11445 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11446 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11447 sizeof(struct btrfs_key_ptr);
11449 /* Then check all its children */
11450 for (i = 0; i < nr; i++) {
11451 u64 blocknr = btrfs_node_blockptr(node, i);
11453 btrfs_node_key_to_cpu(node, &key, i);
11454 if (level == root->root_item.drop_level &&
11455 is_dropped_key(&key, &drop_key))
11459 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11460 * to call the function itself.
11462 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11463 if (extent_buffer_uptodate(eb)) {
11464 ret = traverse_tree_block(root, eb);
11467 free_extent_buffer(eb);
11474 * Low memory usage version check_chunks_and_extents.
11476 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11478 struct btrfs_path path;
11479 struct btrfs_key key;
11480 struct btrfs_root *root1;
11481 struct btrfs_root *cur_root;
11485 root1 = root->fs_info->chunk_root;
11486 ret = traverse_tree_block(root1, root1->node);
11489 root1 = root->fs_info->tree_root;
11490 ret = traverse_tree_block(root1, root1->node);
11493 btrfs_init_path(&path);
11494 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11496 key.type = BTRFS_ROOT_ITEM_KEY;
11498 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11500 error("cannot find extent treet in tree_root");
11505 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11506 if (key.type != BTRFS_ROOT_ITEM_KEY)
11508 key.offset = (u64)-1;
11510 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11511 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11514 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11515 if (IS_ERR(cur_root) || !cur_root) {
11516 error("failed to read tree: %lld", key.objectid);
11520 ret = traverse_tree_block(cur_root, cur_root->node);
11523 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11524 btrfs_free_fs_root(cur_root);
11526 ret = btrfs_next_item(root1, &path);
11532 btrfs_release_path(&path);
11536 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11537 struct btrfs_root *root, int overwrite)
11539 struct extent_buffer *c;
11540 struct extent_buffer *old = root->node;
11543 struct btrfs_disk_key disk_key = {0,0,0};
11549 extent_buffer_get(c);
11552 c = btrfs_alloc_free_block(trans, root,
11554 root->root_key.objectid,
11555 &disk_key, level, 0, 0);
11558 extent_buffer_get(c);
11562 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11563 btrfs_set_header_level(c, level);
11564 btrfs_set_header_bytenr(c, c->start);
11565 btrfs_set_header_generation(c, trans->transid);
11566 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11567 btrfs_set_header_owner(c, root->root_key.objectid);
11569 write_extent_buffer(c, root->fs_info->fsid,
11570 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11572 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11573 btrfs_header_chunk_tree_uuid(c),
11576 btrfs_mark_buffer_dirty(c);
11578 * this case can happen in the following case:
11580 * 1.overwrite previous root.
11582 * 2.reinit reloc data root, this is because we skip pin
11583 * down reloc data tree before which means we can allocate
11584 * same block bytenr here.
11586 if (old->start == c->start) {
11587 btrfs_set_root_generation(&root->root_item,
11589 root->root_item.level = btrfs_header_level(root->node);
11590 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11591 &root->root_key, &root->root_item);
11593 free_extent_buffer(c);
11597 free_extent_buffer(old);
11599 add_root_to_dirty_list(root);
11603 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11604 struct extent_buffer *eb, int tree_root)
11606 struct extent_buffer *tmp;
11607 struct btrfs_root_item *ri;
11608 struct btrfs_key key;
11611 int level = btrfs_header_level(eb);
11617 * If we have pinned this block before, don't pin it again.
11618 * This can not only avoid forever loop with broken filesystem
11619 * but also give us some speedups.
11621 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11622 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11625 btrfs_pin_extent(fs_info, eb->start, eb->len);
11627 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11628 nritems = btrfs_header_nritems(eb);
11629 for (i = 0; i < nritems; i++) {
11631 btrfs_item_key_to_cpu(eb, &key, i);
11632 if (key.type != BTRFS_ROOT_ITEM_KEY)
11634 /* Skip the extent root and reloc roots */
11635 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11636 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11637 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11639 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11640 bytenr = btrfs_disk_root_bytenr(eb, ri);
11643 * If at any point we start needing the real root we
11644 * will have to build a stump root for the root we are
11645 * in, but for now this doesn't actually use the root so
11646 * just pass in extent_root.
11648 tmp = read_tree_block(fs_info->extent_root, bytenr,
11650 if (!extent_buffer_uptodate(tmp)) {
11651 fprintf(stderr, "Error reading root block\n");
11654 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11655 free_extent_buffer(tmp);
11659 bytenr = btrfs_node_blockptr(eb, i);
11661 /* If we aren't the tree root don't read the block */
11662 if (level == 1 && !tree_root) {
11663 btrfs_pin_extent(fs_info, bytenr, nodesize);
11667 tmp = read_tree_block(fs_info->extent_root, bytenr,
11669 if (!extent_buffer_uptodate(tmp)) {
11670 fprintf(stderr, "Error reading tree block\n");
11673 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11674 free_extent_buffer(tmp);
11683 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11687 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11691 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11694 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11696 struct btrfs_block_group_cache *cache;
11697 struct btrfs_path path;
11698 struct extent_buffer *leaf;
11699 struct btrfs_chunk *chunk;
11700 struct btrfs_key key;
11704 btrfs_init_path(&path);
11706 key.type = BTRFS_CHUNK_ITEM_KEY;
11708 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11710 btrfs_release_path(&path);
11715 * We do this in case the block groups were screwed up and had alloc
11716 * bits that aren't actually set on the chunks. This happens with
11717 * restored images every time and could happen in real life I guess.
11719 fs_info->avail_data_alloc_bits = 0;
11720 fs_info->avail_metadata_alloc_bits = 0;
11721 fs_info->avail_system_alloc_bits = 0;
11723 /* First we need to create the in-memory block groups */
11725 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11726 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11728 btrfs_release_path(&path);
11736 leaf = path.nodes[0];
11737 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11738 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11743 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11744 btrfs_add_block_group(fs_info, 0,
11745 btrfs_chunk_type(leaf, chunk),
11746 key.objectid, key.offset,
11747 btrfs_chunk_length(leaf, chunk));
11748 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11749 key.offset + btrfs_chunk_length(leaf, chunk));
11754 cache = btrfs_lookup_first_block_group(fs_info, start);
11758 start = cache->key.objectid + cache->key.offset;
11761 btrfs_release_path(&path);
11765 static int reset_balance(struct btrfs_trans_handle *trans,
11766 struct btrfs_fs_info *fs_info)
11768 struct btrfs_root *root = fs_info->tree_root;
11769 struct btrfs_path path;
11770 struct extent_buffer *leaf;
11771 struct btrfs_key key;
11772 int del_slot, del_nr = 0;
11776 btrfs_init_path(&path);
11777 key.objectid = BTRFS_BALANCE_OBJECTID;
11778 key.type = BTRFS_BALANCE_ITEM_KEY;
11780 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11785 goto reinit_data_reloc;
11790 ret = btrfs_del_item(trans, root, &path);
11793 btrfs_release_path(&path);
11795 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11796 key.type = BTRFS_ROOT_ITEM_KEY;
11798 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11802 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11807 ret = btrfs_del_items(trans, root, &path,
11814 btrfs_release_path(&path);
11817 ret = btrfs_search_slot(trans, root, &key, &path,
11824 leaf = path.nodes[0];
11825 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11826 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11828 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11833 del_slot = path.slots[0];
11842 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11846 btrfs_release_path(&path);
11849 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11850 key.type = BTRFS_ROOT_ITEM_KEY;
11851 key.offset = (u64)-1;
11852 root = btrfs_read_fs_root(fs_info, &key);
11853 if (IS_ERR(root)) {
11854 fprintf(stderr, "Error reading data reloc tree\n");
11855 ret = PTR_ERR(root);
11858 record_root_in_trans(trans, root);
11859 ret = btrfs_fsck_reinit_root(trans, root, 0);
11862 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11864 btrfs_release_path(&path);
11868 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11869 struct btrfs_fs_info *fs_info)
11875 * The only reason we don't do this is because right now we're just
11876 * walking the trees we find and pinning down their bytes, we don't look
11877 * at any of the leaves. In order to do mixed groups we'd have to check
11878 * the leaves of any fs roots and pin down the bytes for any file
11879 * extents we find. Not hard but why do it if we don't have to?
11881 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11882 fprintf(stderr, "We don't support re-initing the extent tree "
11883 "for mixed block groups yet, please notify a btrfs "
11884 "developer you want to do this so they can add this "
11885 "functionality.\n");
11890 * first we need to walk all of the trees except the extent tree and pin
11891 * down the bytes that are in use so we don't overwrite any existing
11894 ret = pin_metadata_blocks(fs_info);
11896 fprintf(stderr, "error pinning down used bytes\n");
11901 * Need to drop all the block groups since we're going to recreate all
11904 btrfs_free_block_groups(fs_info);
11905 ret = reset_block_groups(fs_info);
11907 fprintf(stderr, "error resetting the block groups\n");
11911 /* Ok we can allocate now, reinit the extent root */
11912 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11914 fprintf(stderr, "extent root initialization failed\n");
11916 * When the transaction code is updated we should end the
11917 * transaction, but for now progs only knows about commit so
11918 * just return an error.
11924 * Now we have all the in-memory block groups setup so we can make
11925 * allocations properly, and the metadata we care about is safe since we
11926 * pinned all of it above.
11929 struct btrfs_block_group_cache *cache;
11931 cache = btrfs_lookup_first_block_group(fs_info, start);
11934 start = cache->key.objectid + cache->key.offset;
11935 ret = btrfs_insert_item(trans, fs_info->extent_root,
11936 &cache->key, &cache->item,
11937 sizeof(cache->item));
11939 fprintf(stderr, "Error adding block group\n");
11942 btrfs_extent_post_op(trans, fs_info->extent_root);
11945 ret = reset_balance(trans, fs_info);
11947 fprintf(stderr, "error resetting the pending balance\n");
11952 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11954 struct btrfs_path path;
11955 struct btrfs_trans_handle *trans;
11956 struct btrfs_key key;
11959 printf("Recowing metadata block %llu\n", eb->start);
11960 key.objectid = btrfs_header_owner(eb);
11961 key.type = BTRFS_ROOT_ITEM_KEY;
11962 key.offset = (u64)-1;
11964 root = btrfs_read_fs_root(root->fs_info, &key);
11965 if (IS_ERR(root)) {
11966 fprintf(stderr, "Couldn't find owner root %llu\n",
11968 return PTR_ERR(root);
11971 trans = btrfs_start_transaction(root, 1);
11973 return PTR_ERR(trans);
11975 btrfs_init_path(&path);
11976 path.lowest_level = btrfs_header_level(eb);
11977 if (path.lowest_level)
11978 btrfs_node_key_to_cpu(eb, &key, 0);
11980 btrfs_item_key_to_cpu(eb, &key, 0);
11982 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11983 btrfs_commit_transaction(trans, root);
11984 btrfs_release_path(&path);
11988 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11990 struct btrfs_path path;
11991 struct btrfs_trans_handle *trans;
11992 struct btrfs_key key;
11995 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11996 bad->key.type, bad->key.offset);
11997 key.objectid = bad->root_id;
11998 key.type = BTRFS_ROOT_ITEM_KEY;
11999 key.offset = (u64)-1;
12001 root = btrfs_read_fs_root(root->fs_info, &key);
12002 if (IS_ERR(root)) {
12003 fprintf(stderr, "Couldn't find owner root %llu\n",
12005 return PTR_ERR(root);
12008 trans = btrfs_start_transaction(root, 1);
12010 return PTR_ERR(trans);
12012 btrfs_init_path(&path);
12013 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12019 ret = btrfs_del_item(trans, root, &path);
12021 btrfs_commit_transaction(trans, root);
12022 btrfs_release_path(&path);
12026 static int zero_log_tree(struct btrfs_root *root)
12028 struct btrfs_trans_handle *trans;
12031 trans = btrfs_start_transaction(root, 1);
12032 if (IS_ERR(trans)) {
12033 ret = PTR_ERR(trans);
12036 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12037 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12038 ret = btrfs_commit_transaction(trans, root);
12042 static int populate_csum(struct btrfs_trans_handle *trans,
12043 struct btrfs_root *csum_root, char *buf, u64 start,
12050 while (offset < len) {
12051 sectorsize = csum_root->sectorsize;
12052 ret = read_extent_data(csum_root, buf, start + offset,
12056 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12057 start + offset, buf, sectorsize);
12060 offset += sectorsize;
12065 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12066 struct btrfs_root *csum_root,
12067 struct btrfs_root *cur_root)
12069 struct btrfs_path path;
12070 struct btrfs_key key;
12071 struct extent_buffer *node;
12072 struct btrfs_file_extent_item *fi;
12079 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12083 btrfs_init_path(&path);
12087 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12090 /* Iterate all regular file extents and fill its csum */
12092 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12094 if (key.type != BTRFS_EXTENT_DATA_KEY)
12096 node = path.nodes[0];
12097 slot = path.slots[0];
12098 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12099 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12101 start = btrfs_file_extent_disk_bytenr(node, fi);
12102 len = btrfs_file_extent_disk_num_bytes(node, fi);
12104 ret = populate_csum(trans, csum_root, buf, start, len);
12105 if (ret == -EEXIST)
12111 * TODO: if next leaf is corrupted, jump to nearest next valid
12114 ret = btrfs_next_item(cur_root, &path);
12124 btrfs_release_path(&path);
12129 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12130 struct btrfs_root *csum_root)
12132 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12133 struct btrfs_path path;
12134 struct btrfs_root *tree_root = fs_info->tree_root;
12135 struct btrfs_root *cur_root;
12136 struct extent_buffer *node;
12137 struct btrfs_key key;
12141 btrfs_init_path(&path);
12142 key.objectid = BTRFS_FS_TREE_OBJECTID;
12144 key.type = BTRFS_ROOT_ITEM_KEY;
12145 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12154 node = path.nodes[0];
12155 slot = path.slots[0];
12156 btrfs_item_key_to_cpu(node, &key, slot);
12157 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12159 if (key.type != BTRFS_ROOT_ITEM_KEY)
12161 if (!is_fstree(key.objectid))
12163 key.offset = (u64)-1;
12165 cur_root = btrfs_read_fs_root(fs_info, &key);
12166 if (IS_ERR(cur_root) || !cur_root) {
12167 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12171 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12176 ret = btrfs_next_item(tree_root, &path);
12186 btrfs_release_path(&path);
12190 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12191 struct btrfs_root *csum_root)
12193 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12194 struct btrfs_path path;
12195 struct btrfs_extent_item *ei;
12196 struct extent_buffer *leaf;
12198 struct btrfs_key key;
12201 btrfs_init_path(&path);
12203 key.type = BTRFS_EXTENT_ITEM_KEY;
12205 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12207 btrfs_release_path(&path);
12211 buf = malloc(csum_root->sectorsize);
12213 btrfs_release_path(&path);
12218 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12219 ret = btrfs_next_leaf(extent_root, &path);
12227 leaf = path.nodes[0];
12229 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12230 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12235 ei = btrfs_item_ptr(leaf, path.slots[0],
12236 struct btrfs_extent_item);
12237 if (!(btrfs_extent_flags(leaf, ei) &
12238 BTRFS_EXTENT_FLAG_DATA)) {
12243 ret = populate_csum(trans, csum_root, buf, key.objectid,
12250 btrfs_release_path(&path);
12256 * Recalculate the csum and put it into the csum tree.
12258 * Extent tree init will wipe out all the extent info, so in that case, we
12259 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12260 * will use fs/subvol trees to init the csum tree.
12262 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12263 struct btrfs_root *csum_root,
12264 int search_fs_tree)
12266 if (search_fs_tree)
12267 return fill_csum_tree_from_fs(trans, csum_root);
12269 return fill_csum_tree_from_extent(trans, csum_root);
12272 static void free_roots_info_cache(void)
12274 if (!roots_info_cache)
12277 while (!cache_tree_empty(roots_info_cache)) {
12278 struct cache_extent *entry;
12279 struct root_item_info *rii;
12281 entry = first_cache_extent(roots_info_cache);
12284 remove_cache_extent(roots_info_cache, entry);
12285 rii = container_of(entry, struct root_item_info, cache_extent);
12289 free(roots_info_cache);
12290 roots_info_cache = NULL;
12293 static int build_roots_info_cache(struct btrfs_fs_info *info)
12296 struct btrfs_key key;
12297 struct extent_buffer *leaf;
12298 struct btrfs_path path;
12300 if (!roots_info_cache) {
12301 roots_info_cache = malloc(sizeof(*roots_info_cache));
12302 if (!roots_info_cache)
12304 cache_tree_init(roots_info_cache);
12307 btrfs_init_path(&path);
12309 key.type = BTRFS_EXTENT_ITEM_KEY;
12311 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12314 leaf = path.nodes[0];
12317 struct btrfs_key found_key;
12318 struct btrfs_extent_item *ei;
12319 struct btrfs_extent_inline_ref *iref;
12320 int slot = path.slots[0];
12325 struct cache_extent *entry;
12326 struct root_item_info *rii;
12328 if (slot >= btrfs_header_nritems(leaf)) {
12329 ret = btrfs_next_leaf(info->extent_root, &path);
12336 leaf = path.nodes[0];
12337 slot = path.slots[0];
12340 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12342 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12343 found_key.type != BTRFS_METADATA_ITEM_KEY)
12346 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12347 flags = btrfs_extent_flags(leaf, ei);
12349 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12350 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12353 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12354 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12355 level = found_key.offset;
12357 struct btrfs_tree_block_info *binfo;
12359 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12360 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12361 level = btrfs_tree_block_level(leaf, binfo);
12365 * For a root extent, it must be of the following type and the
12366 * first (and only one) iref in the item.
12368 type = btrfs_extent_inline_ref_type(leaf, iref);
12369 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12372 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12373 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12375 rii = malloc(sizeof(struct root_item_info));
12380 rii->cache_extent.start = root_id;
12381 rii->cache_extent.size = 1;
12382 rii->level = (u8)-1;
12383 entry = &rii->cache_extent;
12384 ret = insert_cache_extent(roots_info_cache, entry);
12387 rii = container_of(entry, struct root_item_info,
12391 ASSERT(rii->cache_extent.start == root_id);
12392 ASSERT(rii->cache_extent.size == 1);
12394 if (level > rii->level || rii->level == (u8)-1) {
12395 rii->level = level;
12396 rii->bytenr = found_key.objectid;
12397 rii->gen = btrfs_extent_generation(leaf, ei);
12398 rii->node_count = 1;
12399 } else if (level == rii->level) {
12407 btrfs_release_path(&path);
12412 static int maybe_repair_root_item(struct btrfs_path *path,
12413 const struct btrfs_key *root_key,
12414 const int read_only_mode)
12416 const u64 root_id = root_key->objectid;
12417 struct cache_extent *entry;
12418 struct root_item_info *rii;
12419 struct btrfs_root_item ri;
12420 unsigned long offset;
12422 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12425 "Error: could not find extent items for root %llu\n",
12426 root_key->objectid);
12430 rii = container_of(entry, struct root_item_info, cache_extent);
12431 ASSERT(rii->cache_extent.start == root_id);
12432 ASSERT(rii->cache_extent.size == 1);
12434 if (rii->node_count != 1) {
12436 "Error: could not find btree root extent for root %llu\n",
12441 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12442 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12444 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12445 btrfs_root_level(&ri) != rii->level ||
12446 btrfs_root_generation(&ri) != rii->gen) {
12449 * If we're in repair mode but our caller told us to not update
12450 * the root item, i.e. just check if it needs to be updated, don't
12451 * print this message, since the caller will call us again shortly
12452 * for the same root item without read only mode (the caller will
12453 * open a transaction first).
12455 if (!(read_only_mode && repair))
12457 "%sroot item for root %llu,"
12458 " current bytenr %llu, current gen %llu, current level %u,"
12459 " new bytenr %llu, new gen %llu, new level %u\n",
12460 (read_only_mode ? "" : "fixing "),
12462 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12463 btrfs_root_level(&ri),
12464 rii->bytenr, rii->gen, rii->level);
12466 if (btrfs_root_generation(&ri) > rii->gen) {
12468 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12469 root_id, btrfs_root_generation(&ri), rii->gen);
12473 if (!read_only_mode) {
12474 btrfs_set_root_bytenr(&ri, rii->bytenr);
12475 btrfs_set_root_level(&ri, rii->level);
12476 btrfs_set_root_generation(&ri, rii->gen);
12477 write_extent_buffer(path->nodes[0], &ri,
12478 offset, sizeof(ri));
12488 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12489 * caused read-only snapshots to be corrupted if they were created at a moment
12490 * when the source subvolume/snapshot had orphan items. The issue was that the
12491 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12492 * node instead of the post orphan cleanup root node.
12493 * So this function, and its callees, just detects and fixes those cases. Even
12494 * though the regression was for read-only snapshots, this function applies to
12495 * any snapshot/subvolume root.
12496 * This must be run before any other repair code - not doing it so, makes other
12497 * repair code delete or modify backrefs in the extent tree for example, which
12498 * will result in an inconsistent fs after repairing the root items.
12500 static int repair_root_items(struct btrfs_fs_info *info)
12502 struct btrfs_path path;
12503 struct btrfs_key key;
12504 struct extent_buffer *leaf;
12505 struct btrfs_trans_handle *trans = NULL;
12508 int need_trans = 0;
12510 btrfs_init_path(&path);
12512 ret = build_roots_info_cache(info);
12516 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12517 key.type = BTRFS_ROOT_ITEM_KEY;
12522 * Avoid opening and committing transactions if a leaf doesn't have
12523 * any root items that need to be fixed, so that we avoid rotating
12524 * backup roots unnecessarily.
12527 trans = btrfs_start_transaction(info->tree_root, 1);
12528 if (IS_ERR(trans)) {
12529 ret = PTR_ERR(trans);
12534 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12538 leaf = path.nodes[0];
12541 struct btrfs_key found_key;
12543 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12544 int no_more_keys = find_next_key(&path, &key);
12546 btrfs_release_path(&path);
12548 ret = btrfs_commit_transaction(trans,
12560 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12562 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12564 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12567 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12571 if (!trans && repair) {
12574 btrfs_release_path(&path);
12584 free_roots_info_cache();
12585 btrfs_release_path(&path);
12587 btrfs_commit_transaction(trans, info->tree_root);
12594 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12596 struct btrfs_trans_handle *trans;
12597 struct btrfs_block_group_cache *bg_cache;
12601 /* Clear all free space cache inodes and its extent data */
12603 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12606 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12609 current = bg_cache->key.objectid + bg_cache->key.offset;
12612 /* Don't forget to set cache_generation to -1 */
12613 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12614 if (IS_ERR(trans)) {
12615 error("failed to update super block cache generation");
12616 return PTR_ERR(trans);
12618 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12619 btrfs_commit_transaction(trans, fs_info->tree_root);
12624 const char * const cmd_check_usage[] = {
12625 "btrfs check [options] <device>",
12626 "Check structural integrity of a filesystem (unmounted).",
12627 "Check structural integrity of an unmounted filesystem. Verify internal",
12628 "trees' consistency and item connectivity. In the repair mode try to",
12629 "fix the problems found. ",
12630 "WARNING: the repair mode is considered dangerous",
12632 "-s|--super <superblock> use this superblock copy",
12633 "-b|--backup use the first valid backup root copy",
12634 "--repair try to repair the filesystem",
12635 "--readonly run in read-only mode (default)",
12636 "--init-csum-tree create a new CRC tree",
12637 "--init-extent-tree create a new extent tree",
12638 "--mode <MODE> allows choice of memory/IO trade-offs",
12639 " where MODE is one of:",
12640 " original - read inodes and extents to memory (requires",
12641 " more memory, does less IO)",
12642 " lowmem - try to use less memory but read blocks again",
12644 "--check-data-csum verify checksums of data blocks",
12645 "-Q|--qgroup-report print a report on qgroup consistency",
12646 "-E|--subvol-extents <subvolid>",
12647 " print subvolume extents and sharing state",
12648 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12649 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12650 "-p|--progress indicate progress",
12651 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12655 int cmd_check(int argc, char **argv)
12657 struct cache_tree root_cache;
12658 struct btrfs_root *root;
12659 struct btrfs_fs_info *info;
12662 u64 tree_root_bytenr = 0;
12663 u64 chunk_root_bytenr = 0;
12664 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12668 int init_csum_tree = 0;
12670 int clear_space_cache = 0;
12671 int qgroup_report = 0;
12672 int qgroups_repaired = 0;
12673 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12677 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12678 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12679 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12680 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12681 static const struct option long_options[] = {
12682 { "super", required_argument, NULL, 's' },
12683 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12684 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12685 { "init-csum-tree", no_argument, NULL,
12686 GETOPT_VAL_INIT_CSUM },
12687 { "init-extent-tree", no_argument, NULL,
12688 GETOPT_VAL_INIT_EXTENT },
12689 { "check-data-csum", no_argument, NULL,
12690 GETOPT_VAL_CHECK_CSUM },
12691 { "backup", no_argument, NULL, 'b' },
12692 { "subvol-extents", required_argument, NULL, 'E' },
12693 { "qgroup-report", no_argument, NULL, 'Q' },
12694 { "tree-root", required_argument, NULL, 'r' },
12695 { "chunk-root", required_argument, NULL,
12696 GETOPT_VAL_CHUNK_TREE },
12697 { "progress", no_argument, NULL, 'p' },
12698 { "mode", required_argument, NULL,
12700 { "clear-space-cache", required_argument, NULL,
12701 GETOPT_VAL_CLEAR_SPACE_CACHE},
12702 { NULL, 0, NULL, 0}
12705 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12709 case 'a': /* ignored */ break;
12711 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12714 num = arg_strtou64(optarg);
12715 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12717 "super mirror should be less than %d",
12718 BTRFS_SUPER_MIRROR_MAX);
12721 bytenr = btrfs_sb_offset(((int)num));
12722 printf("using SB copy %llu, bytenr %llu\n", num,
12723 (unsigned long long)bytenr);
12729 subvolid = arg_strtou64(optarg);
12732 tree_root_bytenr = arg_strtou64(optarg);
12734 case GETOPT_VAL_CHUNK_TREE:
12735 chunk_root_bytenr = arg_strtou64(optarg);
12738 ctx.progress_enabled = true;
12742 usage(cmd_check_usage);
12743 case GETOPT_VAL_REPAIR:
12744 printf("enabling repair mode\n");
12746 ctree_flags |= OPEN_CTREE_WRITES;
12748 case GETOPT_VAL_READONLY:
12751 case GETOPT_VAL_INIT_CSUM:
12752 printf("Creating a new CRC tree\n");
12753 init_csum_tree = 1;
12755 ctree_flags |= OPEN_CTREE_WRITES;
12757 case GETOPT_VAL_INIT_EXTENT:
12758 init_extent_tree = 1;
12759 ctree_flags |= (OPEN_CTREE_WRITES |
12760 OPEN_CTREE_NO_BLOCK_GROUPS);
12763 case GETOPT_VAL_CHECK_CSUM:
12764 check_data_csum = 1;
12766 case GETOPT_VAL_MODE:
12767 check_mode = parse_check_mode(optarg);
12768 if (check_mode == CHECK_MODE_UNKNOWN) {
12769 error("unknown mode: %s", optarg);
12773 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12774 if (strcmp(optarg, "v1") == 0) {
12775 clear_space_cache = 1;
12776 } else if (strcmp(optarg, "v2") == 0) {
12777 clear_space_cache = 2;
12778 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12781 "invalid argument to --clear-space-cache, must be v1 or v2");
12784 ctree_flags |= OPEN_CTREE_WRITES;
12789 if (check_argc_exact(argc - optind, 1))
12790 usage(cmd_check_usage);
12792 if (ctx.progress_enabled) {
12793 ctx.tp = TASK_NOTHING;
12794 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12797 /* This check is the only reason for --readonly to exist */
12798 if (readonly && repair) {
12799 error("repair options are not compatible with --readonly");
12804 * Not supported yet
12806 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12807 error("low memory mode doesn't support repair yet");
12812 cache_tree_init(&root_cache);
12814 if((ret = check_mounted(argv[optind])) < 0) {
12815 error("could not check mount status: %s", strerror(-ret));
12819 error("%s is currently mounted, aborting", argv[optind]);
12825 /* only allow partial opening under repair mode */
12827 ctree_flags |= OPEN_CTREE_PARTIAL;
12829 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12830 chunk_root_bytenr, ctree_flags);
12832 error("cannot open file system");
12838 global_info = info;
12839 root = info->fs_root;
12840 if (clear_space_cache == 1) {
12841 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12843 "free space cache v2 detected, use --clear-space-cache v2");
12847 printf("Clearing free space cache\n");
12848 ret = clear_free_space_cache(info);
12850 error("failed to clear free space cache");
12853 printf("Free space cache cleared\n");
12856 } else if (clear_space_cache == 2) {
12857 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12858 printf("no free space cache v2 to clear\n");
12862 printf("Clear free space cache v2\n");
12863 ret = btrfs_clear_free_space_tree(info);
12865 error("failed to clear free space cache v2: %d", ret);
12868 printf("free space cache v2 cleared\n");
12874 * repair mode will force us to commit transaction which
12875 * will make us fail to load log tree when mounting.
12877 if (repair && btrfs_super_log_root(info->super_copy)) {
12878 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12884 ret = zero_log_tree(root);
12887 error("failed to zero log tree: %d", ret);
12892 uuid_unparse(info->super_copy->fsid, uuidbuf);
12893 if (qgroup_report) {
12894 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12896 ret = qgroup_verify_all(info);
12903 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12904 subvolid, argv[optind], uuidbuf);
12905 ret = print_extent_state(info, subvolid);
12909 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12911 if (!extent_buffer_uptodate(info->tree_root->node) ||
12912 !extent_buffer_uptodate(info->dev_root->node) ||
12913 !extent_buffer_uptodate(info->chunk_root->node)) {
12914 error("critical roots corrupted, unable to check the filesystem");
12920 if (init_extent_tree || init_csum_tree) {
12921 struct btrfs_trans_handle *trans;
12923 trans = btrfs_start_transaction(info->extent_root, 0);
12924 if (IS_ERR(trans)) {
12925 error("error starting transaction");
12926 ret = PTR_ERR(trans);
12931 if (init_extent_tree) {
12932 printf("Creating a new extent tree\n");
12933 ret = reinit_extent_tree(trans, info);
12939 if (init_csum_tree) {
12940 printf("Reinitialize checksum tree\n");
12941 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12943 error("checksum tree initialization failed: %d",
12950 ret = fill_csum_tree(trans, info->csum_root,
12954 error("checksum tree refilling failed: %d", ret);
12959 * Ok now we commit and run the normal fsck, which will add
12960 * extent entries for all of the items it finds.
12962 ret = btrfs_commit_transaction(trans, info->extent_root);
12967 if (!extent_buffer_uptodate(info->extent_root->node)) {
12968 error("critical: extent_root, unable to check the filesystem");
12973 if (!extent_buffer_uptodate(info->csum_root->node)) {
12974 error("critical: csum_root, unable to check the filesystem");
12980 if (!ctx.progress_enabled)
12981 fprintf(stderr, "checking extents\n");
12982 if (check_mode == CHECK_MODE_LOWMEM)
12983 ret = check_chunks_and_extents_v2(root);
12985 ret = check_chunks_and_extents(root);
12989 "errors found in extent allocation tree or chunk allocation");
12991 ret = repair_root_items(info);
12994 error("failed to repair root items: %s", strerror(-ret));
12998 fprintf(stderr, "Fixed %d roots.\n", ret);
13000 } else if (ret > 0) {
13002 "Found %d roots with an outdated root item.\n",
13005 "Please run a filesystem check with the option --repair to fix them.\n");
13011 if (!ctx.progress_enabled) {
13012 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13013 fprintf(stderr, "checking free space tree\n");
13015 fprintf(stderr, "checking free space cache\n");
13017 ret = check_space_cache(root);
13020 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13021 error("errors found in free space tree");
13023 error("errors found in free space cache");
13028 * We used to have to have these hole extents in between our real
13029 * extents so if we don't have this flag set we need to make sure there
13030 * are no gaps in the file extents for inodes, otherwise we can just
13031 * ignore it when this happens.
13033 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13034 if (!ctx.progress_enabled)
13035 fprintf(stderr, "checking fs roots\n");
13036 if (check_mode == CHECK_MODE_LOWMEM)
13037 ret = check_fs_roots_v2(root->fs_info);
13039 ret = check_fs_roots(root, &root_cache);
13042 error("errors found in fs roots");
13046 fprintf(stderr, "checking csums\n");
13047 ret = check_csums(root);
13050 error("errors found in csum tree");
13054 fprintf(stderr, "checking root refs\n");
13055 /* For low memory mode, check_fs_roots_v2 handles root refs */
13056 if (check_mode != CHECK_MODE_LOWMEM) {
13057 ret = check_root_refs(root, &root_cache);
13060 error("errors found in root refs");
13065 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13066 struct extent_buffer *eb;
13068 eb = list_first_entry(&root->fs_info->recow_ebs,
13069 struct extent_buffer, recow);
13070 list_del_init(&eb->recow);
13071 ret = recow_extent_buffer(root, eb);
13074 error("fails to fix transid errors");
13079 while (!list_empty(&delete_items)) {
13080 struct bad_item *bad;
13082 bad = list_first_entry(&delete_items, struct bad_item, list);
13083 list_del_init(&bad->list);
13085 ret = delete_bad_item(root, bad);
13091 if (info->quota_enabled) {
13092 fprintf(stderr, "checking quota groups\n");
13093 ret = qgroup_verify_all(info);
13096 error("failed to check quota groups");
13100 ret = repair_qgroups(info, &qgroups_repaired);
13103 error("failed to repair quota groups");
13109 if (!list_empty(&root->fs_info->recow_ebs)) {
13110 error("transid errors in file system");
13115 if (found_old_backref) { /*
13116 * there was a disk format change when mixed
13117 * backref was in testing tree. The old format
13118 * existed about one week.
13120 printf("\n * Found old mixed backref format. "
13121 "The old format is not supported! *"
13122 "\n * Please mount the FS in readonly mode, "
13123 "backup data and re-format the FS. *\n\n");
13126 printf("found %llu bytes used, ",
13127 (unsigned long long)bytes_used);
13129 printf("error(s) found\n");
13131 printf("no error found\n");
13132 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13133 printf("total tree bytes: %llu\n",
13134 (unsigned long long)total_btree_bytes);
13135 printf("total fs tree bytes: %llu\n",
13136 (unsigned long long)total_fs_tree_bytes);
13137 printf("total extent tree bytes: %llu\n",
13138 (unsigned long long)total_extent_tree_bytes);
13139 printf("btree space waste bytes: %llu\n",
13140 (unsigned long long)btree_space_waste);
13141 printf("file data blocks allocated: %llu\n referenced %llu\n",
13142 (unsigned long long)data_bytes_allocated,
13143 (unsigned long long)data_bytes_referenced);
13145 free_qgroup_counts();
13146 free_root_recs_tree(&root_cache);
13150 if (ctx.progress_enabled)
13151 task_deinit(ctx.info);