2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (cur + sizeof(*di) + name_len > total ||
1516 name_len > BTRFS_NAME_LEN) {
1517 error = REF_ERR_NAME_TOO_LONG;
1519 if (cur + sizeof(*di) > total)
1521 len = min_t(u32, total - cur - sizeof(*di),
1528 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530 if (location.type == BTRFS_INODE_ITEM_KEY) {
1531 add_inode_backref(inode_cache, location.objectid,
1532 key->objectid, key->offset, namebuf,
1533 len, filetype, key->type, error);
1534 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1535 add_inode_backref(root_cache, location.objectid,
1536 key->objectid, key->offset,
1537 namebuf, len, filetype,
1540 fprintf(stderr, "invalid location in dir item %u\n",
1542 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1543 key->objectid, key->offset, namebuf,
1544 len, filetype, key->type, error);
1547 len = sizeof(*di) + name_len + data_len;
1548 di = (struct btrfs_dir_item *)((char *)di + len);
1551 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1552 rec->errors |= I_ERR_DUP_DIR_INDEX;
1557 static int process_inode_ref(struct extent_buffer *eb,
1558 int slot, struct btrfs_key *key,
1559 struct shared_node *active_node)
1567 struct cache_tree *inode_cache;
1568 struct btrfs_inode_ref *ref;
1569 char namebuf[BTRFS_NAME_LEN];
1571 inode_cache = &active_node->inode_cache;
1573 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1574 total = btrfs_item_size_nr(eb, slot);
1575 while (cur < total) {
1576 name_len = btrfs_inode_ref_name_len(eb, ref);
1577 index = btrfs_inode_ref_index(eb, ref);
1579 /* inode_ref + namelen should not cross item boundary */
1580 if (cur + sizeof(*ref) + name_len > total ||
1581 name_len > BTRFS_NAME_LEN) {
1582 if (total < cur + sizeof(*ref))
1585 /* Still try to read out the remaining part */
1586 len = min_t(u32, total - cur - sizeof(*ref),
1588 error = REF_ERR_NAME_TOO_LONG;
1594 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1595 add_inode_backref(inode_cache, key->objectid, key->offset,
1596 index, namebuf, len, 0, key->type, error);
1598 len = sizeof(*ref) + name_len;
1599 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1605 static int process_inode_extref(struct extent_buffer *eb,
1606 int slot, struct btrfs_key *key,
1607 struct shared_node *active_node)
1616 struct cache_tree *inode_cache;
1617 struct btrfs_inode_extref *extref;
1618 char namebuf[BTRFS_NAME_LEN];
1620 inode_cache = &active_node->inode_cache;
1622 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1623 total = btrfs_item_size_nr(eb, slot);
1624 while (cur < total) {
1625 name_len = btrfs_inode_extref_name_len(eb, extref);
1626 index = btrfs_inode_extref_index(eb, extref);
1627 parent = btrfs_inode_extref_parent(eb, extref);
1628 if (name_len <= BTRFS_NAME_LEN) {
1632 len = BTRFS_NAME_LEN;
1633 error = REF_ERR_NAME_TOO_LONG;
1635 read_extent_buffer(eb, namebuf,
1636 (unsigned long)(extref + 1), len);
1637 add_inode_backref(inode_cache, key->objectid, parent,
1638 index, namebuf, len, 0, key->type, error);
1640 len = sizeof(*extref) + name_len;
1641 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1648 static int count_csum_range(struct btrfs_root *root, u64 start,
1649 u64 len, u64 *found)
1651 struct btrfs_key key;
1652 struct btrfs_path path;
1653 struct extent_buffer *leaf;
1658 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660 btrfs_init_path(&path);
1662 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664 key.type = BTRFS_EXTENT_CSUM_KEY;
1666 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1670 if (ret > 0 && path.slots[0] > 0) {
1671 leaf = path.nodes[0];
1672 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1673 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1674 key.type == BTRFS_EXTENT_CSUM_KEY)
1679 leaf = path.nodes[0];
1680 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1681 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1686 leaf = path.nodes[0];
1689 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1690 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1691 key.type != BTRFS_EXTENT_CSUM_KEY)
1694 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1695 if (key.offset >= start + len)
1698 if (key.offset > start)
1701 size = btrfs_item_size_nr(leaf, path.slots[0]);
1702 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1703 if (csum_end > start) {
1704 size = min(csum_end - start, len);
1713 btrfs_release_path(&path);
1719 static int process_file_extent(struct btrfs_root *root,
1720 struct extent_buffer *eb,
1721 int slot, struct btrfs_key *key,
1722 struct shared_node *active_node)
1724 struct inode_record *rec;
1725 struct btrfs_file_extent_item *fi;
1727 u64 disk_bytenr = 0;
1728 u64 extent_offset = 0;
1729 u64 mask = root->sectorsize - 1;
1733 rec = active_node->current;
1734 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1735 rec->found_file_extent = 1;
1737 if (rec->extent_start == (u64)-1) {
1738 rec->extent_start = key->offset;
1739 rec->extent_end = key->offset;
1742 if (rec->extent_end > key->offset)
1743 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1744 else if (rec->extent_end < key->offset) {
1745 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1746 key->offset - rec->extent_end);
1751 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1752 extent_type = btrfs_file_extent_type(eb, fi);
1754 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1755 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1757 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1758 rec->found_size += num_bytes;
1759 num_bytes = (num_bytes + mask) & ~mask;
1760 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1761 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1763 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1764 extent_offset = btrfs_file_extent_offset(eb, fi);
1765 if (num_bytes == 0 || (num_bytes & mask))
1766 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767 if (num_bytes + extent_offset >
1768 btrfs_file_extent_ram_bytes(eb, fi))
1769 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1770 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1771 (btrfs_file_extent_compression(eb, fi) ||
1772 btrfs_file_extent_encryption(eb, fi) ||
1773 btrfs_file_extent_other_encoding(eb, fi)))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (disk_bytenr > 0)
1776 rec->found_size += num_bytes;
1778 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780 rec->extent_end = key->offset + num_bytes;
1783 * The data reloc tree will copy full extents into its inode and then
1784 * copy the corresponding csums. Because the extent it copied could be
1785 * a preallocated extent that hasn't been written to yet there may be no
1786 * csums to copy, ergo we won't have csums for our file extent. This is
1787 * ok so just don't bother checking csums if the inode belongs to the
1790 if (disk_bytenr > 0 &&
1791 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1793 if (btrfs_file_extent_compression(eb, fi))
1794 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1796 disk_bytenr += extent_offset;
1798 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1803 rec->found_csum_item = 1;
1804 if (found < num_bytes)
1805 rec->some_csum_missing = 1;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1814 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1815 struct walk_control *wc)
1817 struct btrfs_key key;
1821 struct cache_tree *inode_cache;
1822 struct shared_node *active_node;
1824 if (wc->root_level == wc->active_node &&
1825 btrfs_root_refs(&root->root_item) == 0)
1828 active_node = wc->nodes[wc->active_node];
1829 inode_cache = &active_node->inode_cache;
1830 nritems = btrfs_header_nritems(eb);
1831 for (i = 0; i < nritems; i++) {
1832 btrfs_item_key_to_cpu(eb, &key, i);
1834 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1836 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839 if (active_node->current == NULL ||
1840 active_node->current->ino < key.objectid) {
1841 if (active_node->current) {
1842 active_node->current->checked = 1;
1843 maybe_free_inode_rec(inode_cache,
1844 active_node->current);
1846 active_node->current = get_inode_rec(inode_cache,
1848 BUG_ON(IS_ERR(active_node->current));
1851 case BTRFS_DIR_ITEM_KEY:
1852 case BTRFS_DIR_INDEX_KEY:
1853 ret = process_dir_item(eb, i, &key, active_node);
1855 case BTRFS_INODE_REF_KEY:
1856 ret = process_inode_ref(eb, i, &key, active_node);
1858 case BTRFS_INODE_EXTREF_KEY:
1859 ret = process_inode_extref(eb, i, &key, active_node);
1861 case BTRFS_INODE_ITEM_KEY:
1862 ret = process_inode_item(eb, i, &key, active_node);
1864 case BTRFS_EXTENT_DATA_KEY:
1865 ret = process_file_extent(root, eb, i, &key,
1876 u64 bytenr[BTRFS_MAX_LEVEL];
1877 u64 refs[BTRFS_MAX_LEVEL];
1878 int need_check[BTRFS_MAX_LEVEL];
1881 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1882 struct node_refs *nrefs, u64 level);
1883 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1884 unsigned int ext_ref);
1887 * Returns >0 Found error, not fatal, should continue
1888 * Returns <0 Fatal error, must exit the whole check
1889 * Returns 0 No errors found
1891 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1892 struct node_refs *nrefs, int *level, int ext_ref)
1894 struct extent_buffer *cur = path->nodes[0];
1895 struct btrfs_key key;
1899 int root_level = btrfs_header_level(root->node);
1901 int ret = 0; /* Final return value */
1902 int err = 0; /* Positive error bitmap */
1904 cur_bytenr = cur->start;
1906 /* skip to first inode item or the first inode number change */
1907 nritems = btrfs_header_nritems(cur);
1908 for (i = 0; i < nritems; i++) {
1909 btrfs_item_key_to_cpu(cur, &key, i);
1911 first_ino = key.objectid;
1912 if (key.type == BTRFS_INODE_ITEM_KEY ||
1913 (first_ino && first_ino != key.objectid))
1917 path->slots[0] = nritems;
1923 err |= check_inode_item(root, path, ext_ref);
1925 if (err & LAST_ITEM)
1928 /* still have inode items in thie leaf */
1929 if (cur->start == cur_bytenr)
1933 * we have switched to another leaf, above nodes may
1934 * have changed, here walk down the path, if a node
1935 * or leaf is shared, check whether we can skip this
1938 for (i = root_level; i >= 0; i--) {
1939 if (path->nodes[i]->start == nrefs->bytenr[i])
1942 ret = update_nodes_refs(root,
1943 path->nodes[i]->start,
1948 if (!nrefs->need_check[i]) {
1954 for (i = 0; i < *level; i++) {
1955 free_extent_buffer(path->nodes[i]);
1956 path->nodes[i] = NULL;
1965 static void reada_walk_down(struct btrfs_root *root,
1966 struct extent_buffer *node, int slot)
1975 level = btrfs_header_level(node);
1979 nritems = btrfs_header_nritems(node);
1980 blocksize = root->nodesize;
1981 for (i = slot; i < nritems; i++) {
1982 bytenr = btrfs_node_blockptr(node, i);
1983 ptr_gen = btrfs_node_ptr_generation(node, i);
1984 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1989 * Check the child node/leaf by the following condition:
1990 * 1. the first item key of the node/leaf should be the same with the one
1992 * 2. block in parent node should match the child node/leaf.
1993 * 3. generation of parent node and child's header should be consistent.
1995 * Or the child node/leaf pointed by the key in parent is not valid.
1997 * We hope to check leaf owner too, but since subvol may share leaves,
1998 * which makes leaf owner check not so strong, key check should be
1999 * sufficient enough for that case.
2001 static int check_child_node(struct extent_buffer *parent, int slot,
2002 struct extent_buffer *child)
2004 struct btrfs_key parent_key;
2005 struct btrfs_key child_key;
2008 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2009 if (btrfs_header_level(child) == 0)
2010 btrfs_item_key_to_cpu(child, &child_key, 0);
2012 btrfs_node_key_to_cpu(child, &child_key, 0);
2014 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2018 parent_key.objectid, parent_key.type, parent_key.offset,
2019 child_key.objectid, child_key.type, child_key.offset);
2021 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2023 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2024 btrfs_node_blockptr(parent, slot),
2025 btrfs_header_bytenr(child));
2027 if (btrfs_node_ptr_generation(parent, slot) !=
2028 btrfs_header_generation(child)) {
2030 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_header_generation(child),
2032 btrfs_node_ptr_generation(parent, slot));
2038 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2039 * in every fs or file tree check. Here we find its all root ids, and only check
2040 * it in the fs or file tree which has the smallest root id.
2042 static int need_check(struct btrfs_root *root, struct ulist *roots)
2044 struct rb_node *node;
2045 struct ulist_node *u;
2047 if (roots->nnodes == 1)
2050 node = rb_first(&roots->root);
2051 u = rb_entry(node, struct ulist_node, rb_node);
2053 * current root id is not smallest, we skip it and let it be checked
2054 * in the fs or file tree who hash the smallest root id.
2056 if (root->objectid != u->val)
2063 * for a tree node or leaf, we record its reference count, so later if we still
2064 * process this node or leaf, don't need to compute its reference count again.
2066 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2067 struct node_refs *nrefs, u64 level)
2071 struct ulist *roots;
2073 if (nrefs->bytenr[level] != bytenr) {
2074 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2075 level, 1, &refs, NULL);
2079 nrefs->bytenr[level] = bytenr;
2080 nrefs->refs[level] = refs;
2082 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2087 check = need_check(root, roots);
2089 nrefs->need_check[level] = check;
2091 nrefs->need_check[level] = 1;
2098 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2099 struct walk_control *wc, int *level,
2100 struct node_refs *nrefs)
2102 enum btrfs_tree_block_status status;
2105 struct extent_buffer *next;
2106 struct extent_buffer *cur;
2111 WARN_ON(*level < 0);
2112 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2114 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2115 refs = nrefs->refs[*level];
2118 ret = btrfs_lookup_extent_info(NULL, root,
2119 path->nodes[*level]->start,
2120 *level, 1, &refs, NULL);
2125 nrefs->bytenr[*level] = path->nodes[*level]->start;
2126 nrefs->refs[*level] = refs;
2130 ret = enter_shared_node(root, path->nodes[*level]->start,
2138 while (*level >= 0) {
2139 WARN_ON(*level < 0);
2140 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2141 cur = path->nodes[*level];
2143 if (btrfs_header_level(cur) != *level)
2146 if (path->slots[*level] >= btrfs_header_nritems(cur))
2149 ret = process_one_leaf(root, cur, wc);
2154 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2155 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2156 blocksize = root->nodesize;
2158 if (bytenr == nrefs->bytenr[*level - 1]) {
2159 refs = nrefs->refs[*level - 1];
2161 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2162 *level - 1, 1, &refs, NULL);
2166 nrefs->bytenr[*level - 1] = bytenr;
2167 nrefs->refs[*level - 1] = refs;
2172 ret = enter_shared_node(root, bytenr, refs,
2175 path->slots[*level]++;
2180 next = btrfs_find_tree_block(root, bytenr, blocksize);
2181 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2182 free_extent_buffer(next);
2183 reada_walk_down(root, cur, path->slots[*level]);
2184 next = read_tree_block(root, bytenr, blocksize,
2186 if (!extent_buffer_uptodate(next)) {
2187 struct btrfs_key node_key;
2189 btrfs_node_key_to_cpu(path->nodes[*level],
2191 path->slots[*level]);
2192 btrfs_add_corrupt_extent_record(root->fs_info,
2194 path->nodes[*level]->start,
2195 root->nodesize, *level);
2201 ret = check_child_node(cur, path->slots[*level], next);
2203 free_extent_buffer(next);
2208 if (btrfs_is_leaf(next))
2209 status = btrfs_check_leaf(root, NULL, next);
2211 status = btrfs_check_node(root, NULL, next);
2212 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2213 free_extent_buffer(next);
2218 *level = *level - 1;
2219 free_extent_buffer(path->nodes[*level]);
2220 path->nodes[*level] = next;
2221 path->slots[*level] = 0;
2224 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2228 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2229 unsigned int ext_ref);
2232 * Returns >0 Found error, should continue
2233 * Returns <0 Fatal error, must exit the whole check
2234 * Returns 0 No errors found
2236 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2237 int *level, struct node_refs *nrefs, int ext_ref)
2239 enum btrfs_tree_block_status status;
2242 struct extent_buffer *next;
2243 struct extent_buffer *cur;
2247 WARN_ON(*level < 0);
2248 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2250 ret = update_nodes_refs(root, path->nodes[*level]->start,
2255 while (*level >= 0) {
2256 WARN_ON(*level < 0);
2257 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258 cur = path->nodes[*level];
2260 if (btrfs_header_level(cur) != *level)
2263 if (path->slots[*level] >= btrfs_header_nritems(cur))
2265 /* Don't forgot to check leaf/node validation */
2267 ret = btrfs_check_leaf(root, NULL, cur);
2268 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272 ret = process_one_leaf_v2(root, path, nrefs,
2276 ret = btrfs_check_node(root, NULL, cur);
2277 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2282 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2283 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2284 blocksize = root->nodesize;
2286 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2289 if (!nrefs->need_check[*level - 1]) {
2290 path->slots[*level]++;
2294 next = btrfs_find_tree_block(root, bytenr, blocksize);
2295 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2296 free_extent_buffer(next);
2297 reada_walk_down(root, cur, path->slots[*level]);
2298 next = read_tree_block(root, bytenr, blocksize,
2300 if (!extent_buffer_uptodate(next)) {
2301 struct btrfs_key node_key;
2303 btrfs_node_key_to_cpu(path->nodes[*level],
2305 path->slots[*level]);
2306 btrfs_add_corrupt_extent_record(root->fs_info,
2308 path->nodes[*level]->start,
2309 root->nodesize, *level);
2315 ret = check_child_node(cur, path->slots[*level], next);
2319 if (btrfs_is_leaf(next))
2320 status = btrfs_check_leaf(root, NULL, next);
2322 status = btrfs_check_node(root, NULL, next);
2323 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2324 free_extent_buffer(next);
2329 *level = *level - 1;
2330 free_extent_buffer(path->nodes[*level]);
2331 path->nodes[*level] = next;
2332 path->slots[*level] = 0;
2337 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2338 struct walk_control *wc, int *level)
2341 struct extent_buffer *leaf;
2343 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344 leaf = path->nodes[i];
2345 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350 free_extent_buffer(path->nodes[*level]);
2351 path->nodes[*level] = NULL;
2352 BUG_ON(*level > wc->active_node);
2353 if (*level == wc->active_node)
2354 leave_shared_node(root, wc, *level);
2361 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2365 struct extent_buffer *leaf;
2367 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2368 leaf = path->nodes[i];
2369 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2374 free_extent_buffer(path->nodes[*level]);
2375 path->nodes[*level] = NULL;
2382 static int check_root_dir(struct inode_record *rec)
2384 struct inode_backref *backref;
2387 if (!rec->found_inode_item || rec->errors)
2389 if (rec->nlink != 1 || rec->found_link != 0)
2391 if (list_empty(&rec->backrefs))
2393 backref = to_inode_backref(rec->backrefs.next);
2394 if (!backref->found_inode_ref)
2396 if (backref->index != 0 || backref->namelen != 2 ||
2397 memcmp(backref->name, "..", 2))
2399 if (backref->found_dir_index || backref->found_dir_item)
2406 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2407 struct btrfs_root *root, struct btrfs_path *path,
2408 struct inode_record *rec)
2410 struct btrfs_inode_item *ei;
2411 struct btrfs_key key;
2414 key.objectid = rec->ino;
2415 key.type = BTRFS_INODE_ITEM_KEY;
2416 key.offset = (u64)-1;
2418 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2422 if (!path->slots[0]) {
2429 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2430 if (key.objectid != rec->ino) {
2435 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2436 struct btrfs_inode_item);
2437 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2438 btrfs_mark_buffer_dirty(path->nodes[0]);
2439 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2440 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2441 root->root_key.objectid);
2443 btrfs_release_path(path);
2447 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2448 struct btrfs_root *root,
2449 struct btrfs_path *path,
2450 struct inode_record *rec)
2454 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2455 btrfs_release_path(path);
2457 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2461 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2462 struct btrfs_root *root,
2463 struct btrfs_path *path,
2464 struct inode_record *rec)
2466 struct btrfs_inode_item *ei;
2467 struct btrfs_key key;
2470 key.objectid = rec->ino;
2471 key.type = BTRFS_INODE_ITEM_KEY;
2474 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2481 /* Since ret == 0, no need to check anything */
2482 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2483 struct btrfs_inode_item);
2484 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2485 btrfs_mark_buffer_dirty(path->nodes[0]);
2486 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2487 printf("reset nbytes for ino %llu root %llu\n",
2488 rec->ino, root->root_key.objectid);
2490 btrfs_release_path(path);
2494 static int add_missing_dir_index(struct btrfs_root *root,
2495 struct cache_tree *inode_cache,
2496 struct inode_record *rec,
2497 struct inode_backref *backref)
2499 struct btrfs_path path;
2500 struct btrfs_trans_handle *trans;
2501 struct btrfs_dir_item *dir_item;
2502 struct extent_buffer *leaf;
2503 struct btrfs_key key;
2504 struct btrfs_disk_key disk_key;
2505 struct inode_record *dir_rec;
2506 unsigned long name_ptr;
2507 u32 data_size = sizeof(*dir_item) + backref->namelen;
2510 trans = btrfs_start_transaction(root, 1);
2512 return PTR_ERR(trans);
2514 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2515 (unsigned long long)rec->ino);
2517 btrfs_init_path(&path);
2518 key.objectid = backref->dir;
2519 key.type = BTRFS_DIR_INDEX_KEY;
2520 key.offset = backref->index;
2521 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2524 leaf = path.nodes[0];
2525 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2527 disk_key.objectid = cpu_to_le64(rec->ino);
2528 disk_key.type = BTRFS_INODE_ITEM_KEY;
2529 disk_key.offset = 0;
2531 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2532 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2533 btrfs_set_dir_data_len(leaf, dir_item, 0);
2534 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2535 name_ptr = (unsigned long)(dir_item + 1);
2536 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2537 btrfs_mark_buffer_dirty(leaf);
2538 btrfs_release_path(&path);
2539 btrfs_commit_transaction(trans, root);
2541 backref->found_dir_index = 1;
2542 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2543 BUG_ON(IS_ERR(dir_rec));
2546 dir_rec->found_size += backref->namelen;
2547 if (dir_rec->found_size == dir_rec->isize &&
2548 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2549 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2550 if (dir_rec->found_size != dir_rec->isize)
2551 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2556 static int delete_dir_index(struct btrfs_root *root,
2557 struct inode_backref *backref)
2559 struct btrfs_trans_handle *trans;
2560 struct btrfs_dir_item *di;
2561 struct btrfs_path path;
2564 trans = btrfs_start_transaction(root, 1);
2566 return PTR_ERR(trans);
2568 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2569 (unsigned long long)backref->dir,
2570 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2571 (unsigned long long)root->objectid);
2573 btrfs_init_path(&path);
2574 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2575 backref->name, backref->namelen,
2576 backref->index, -1);
2579 btrfs_release_path(&path);
2580 btrfs_commit_transaction(trans, root);
2587 ret = btrfs_del_item(trans, root, &path);
2589 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2591 btrfs_release_path(&path);
2592 btrfs_commit_transaction(trans, root);
2596 static int create_inode_item(struct btrfs_root *root,
2597 struct inode_record *rec,
2600 struct btrfs_trans_handle *trans;
2601 struct btrfs_inode_item inode_item;
2602 time_t now = time(NULL);
2605 trans = btrfs_start_transaction(root, 1);
2606 if (IS_ERR(trans)) {
2607 ret = PTR_ERR(trans);
2611 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2612 "be incomplete, please check permissions and content after "
2613 "the fsck completes.\n", (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2616 memset(&inode_item, 0, sizeof(inode_item));
2617 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2619 btrfs_set_stack_inode_nlink(&inode_item, 1);
2621 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2622 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2623 if (rec->found_dir_item) {
2624 if (rec->found_file_extent)
2625 fprintf(stderr, "root %llu inode %llu has both a dir "
2626 "item and extents, unsure if it is a dir or a "
2627 "regular file so setting it as a directory\n",
2628 (unsigned long long)root->objectid,
2629 (unsigned long long)rec->ino);
2630 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2631 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2632 } else if (!rec->found_dir_item) {
2633 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2634 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2636 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2637 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2638 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2639 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2640 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2641 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2642 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2643 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2645 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2647 btrfs_commit_transaction(trans, root);
2651 static int repair_inode_backrefs(struct btrfs_root *root,
2652 struct inode_record *rec,
2653 struct cache_tree *inode_cache,
2656 struct inode_backref *tmp, *backref;
2657 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2661 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2662 if (!delete && rec->ino == root_dirid) {
2663 if (!rec->found_inode_item) {
2664 ret = create_inode_item(root, rec, 1);
2671 /* Index 0 for root dir's are special, don't mess with it */
2672 if (rec->ino == root_dirid && backref->index == 0)
2676 ((backref->found_dir_index && !backref->found_inode_ref) ||
2677 (backref->found_dir_index && backref->found_inode_ref &&
2678 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2679 ret = delete_dir_index(root, backref);
2683 list_del(&backref->list);
2688 if (!delete && !backref->found_dir_index &&
2689 backref->found_dir_item && backref->found_inode_ref) {
2690 ret = add_missing_dir_index(root, inode_cache, rec,
2695 if (backref->found_dir_item &&
2696 backref->found_dir_index) {
2697 if (!backref->errors &&
2698 backref->found_inode_ref) {
2699 list_del(&backref->list);
2706 if (!delete && (!backref->found_dir_index &&
2707 !backref->found_dir_item &&
2708 backref->found_inode_ref)) {
2709 struct btrfs_trans_handle *trans;
2710 struct btrfs_key location;
2712 ret = check_dir_conflict(root, backref->name,
2718 * let nlink fixing routine to handle it,
2719 * which can do it better.
2724 location.objectid = rec->ino;
2725 location.type = BTRFS_INODE_ITEM_KEY;
2726 location.offset = 0;
2728 trans = btrfs_start_transaction(root, 1);
2729 if (IS_ERR(trans)) {
2730 ret = PTR_ERR(trans);
2733 fprintf(stderr, "adding missing dir index/item pair "
2735 (unsigned long long)rec->ino);
2736 ret = btrfs_insert_dir_item(trans, root, backref->name,
2738 backref->dir, &location,
2739 imode_to_type(rec->imode),
2742 btrfs_commit_transaction(trans, root);
2746 if (!delete && (backref->found_inode_ref &&
2747 backref->found_dir_index &&
2748 backref->found_dir_item &&
2749 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2750 !rec->found_inode_item)) {
2751 ret = create_inode_item(root, rec, 0);
2758 return ret ? ret : repaired;
2762 * To determine the file type for nlink/inode_item repair
2764 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2765 * Return -ENOENT if file type is not found.
2767 static int find_file_type(struct inode_record *rec, u8 *type)
2769 struct inode_backref *backref;
2771 /* For inode item recovered case */
2772 if (rec->found_inode_item) {
2773 *type = imode_to_type(rec->imode);
2777 list_for_each_entry(backref, &rec->backrefs, list) {
2778 if (backref->found_dir_index || backref->found_dir_item) {
2779 *type = backref->filetype;
2787 * To determine the file name for nlink repair
2789 * Return 0 if file name is found, set name and namelen.
2790 * Return -ENOENT if file name is not found.
2792 static int find_file_name(struct inode_record *rec,
2793 char *name, int *namelen)
2795 struct inode_backref *backref;
2797 list_for_each_entry(backref, &rec->backrefs, list) {
2798 if (backref->found_dir_index || backref->found_dir_item ||
2799 backref->found_inode_ref) {
2800 memcpy(name, backref->name, backref->namelen);
2801 *namelen = backref->namelen;
2808 /* Reset the nlink of the inode to the correct one */
2809 static int reset_nlink(struct btrfs_trans_handle *trans,
2810 struct btrfs_root *root,
2811 struct btrfs_path *path,
2812 struct inode_record *rec)
2814 struct inode_backref *backref;
2815 struct inode_backref *tmp;
2816 struct btrfs_key key;
2817 struct btrfs_inode_item *inode_item;
2820 /* We don't believe this either, reset it and iterate backref */
2821 rec->found_link = 0;
2823 /* Remove all backref including the valid ones */
2824 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2825 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2826 backref->index, backref->name,
2827 backref->namelen, 0);
2831 /* remove invalid backref, so it won't be added back */
2832 if (!(backref->found_dir_index &&
2833 backref->found_dir_item &&
2834 backref->found_inode_ref)) {
2835 list_del(&backref->list);
2842 /* Set nlink to 0 */
2843 key.objectid = rec->ino;
2844 key.type = BTRFS_INODE_ITEM_KEY;
2846 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2853 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2854 struct btrfs_inode_item);
2855 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2856 btrfs_mark_buffer_dirty(path->nodes[0]);
2857 btrfs_release_path(path);
2860 * Add back valid inode_ref/dir_item/dir_index,
2861 * add_link() will handle the nlink inc, so new nlink must be correct
2863 list_for_each_entry(backref, &rec->backrefs, list) {
2864 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2865 backref->name, backref->namelen,
2866 backref->filetype, &backref->index, 1);
2871 btrfs_release_path(path);
2875 static int get_highest_inode(struct btrfs_trans_handle *trans,
2876 struct btrfs_root *root,
2877 struct btrfs_path *path,
2880 struct btrfs_key key, found_key;
2883 btrfs_init_path(path);
2884 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2886 key.type = BTRFS_INODE_ITEM_KEY;
2887 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2889 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2890 path->slots[0] - 1);
2891 *highest_ino = found_key.objectid;
2894 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2896 btrfs_release_path(path);
2900 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2901 struct btrfs_root *root,
2902 struct btrfs_path *path,
2903 struct inode_record *rec)
2905 char *dir_name = "lost+found";
2906 char namebuf[BTRFS_NAME_LEN] = {0};
2911 int name_recovered = 0;
2912 int type_recovered = 0;
2916 * Get file name and type first before these invalid inode ref
2917 * are deleted by remove_all_invalid_backref()
2919 name_recovered = !find_file_name(rec, namebuf, &namelen);
2920 type_recovered = !find_file_type(rec, &type);
2922 if (!name_recovered) {
2923 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2924 rec->ino, rec->ino);
2925 namelen = count_digits(rec->ino);
2926 sprintf(namebuf, "%llu", rec->ino);
2929 if (!type_recovered) {
2930 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2932 type = BTRFS_FT_REG_FILE;
2936 ret = reset_nlink(trans, root, path, rec);
2939 "Failed to reset nlink for inode %llu: %s\n",
2940 rec->ino, strerror(-ret));
2944 if (rec->found_link == 0) {
2945 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2949 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2950 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2953 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2954 dir_name, strerror(-ret));
2957 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2958 namebuf, namelen, type, NULL, 1);
2960 * Add ".INO" suffix several times to handle case where
2961 * "FILENAME.INO" is already taken by another file.
2963 while (ret == -EEXIST) {
2965 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2967 if (namelen + count_digits(rec->ino) + 1 >
2972 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2974 namelen += count_digits(rec->ino) + 1;
2975 ret = btrfs_add_link(trans, root, rec->ino,
2976 lost_found_ino, namebuf,
2977 namelen, type, NULL, 1);
2981 "Failed to link the inode %llu to %s dir: %s\n",
2982 rec->ino, dir_name, strerror(-ret));
2986 * Just increase the found_link, don't actually add the
2987 * backref. This will make things easier and this inode
2988 * record will be freed after the repair is done.
2989 * So fsck will not report problem about this inode.
2992 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2993 namelen, namebuf, dir_name);
2995 printf("Fixed the nlink of inode %llu\n", rec->ino);
2998 * Clear the flag anyway, or we will loop forever for the same inode
2999 * as it will not be removed from the bad inode list and the dead loop
3002 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3003 btrfs_release_path(path);
3008 * Check if there is any normal(reg or prealloc) file extent for given
3010 * This is used to determine the file type when neither its dir_index/item or
3011 * inode_item exists.
3013 * This will *NOT* report error, if any error happens, just consider it does
3014 * not have any normal file extent.
3016 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3018 struct btrfs_path path;
3019 struct btrfs_key key;
3020 struct btrfs_key found_key;
3021 struct btrfs_file_extent_item *fi;
3025 btrfs_init_path(&path);
3027 key.type = BTRFS_EXTENT_DATA_KEY;
3030 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3035 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3036 ret = btrfs_next_leaf(root, &path);
3043 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3045 if (found_key.objectid != ino ||
3046 found_key.type != BTRFS_EXTENT_DATA_KEY)
3048 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3049 struct btrfs_file_extent_item);
3050 type = btrfs_file_extent_type(path.nodes[0], fi);
3051 if (type != BTRFS_FILE_EXTENT_INLINE) {
3057 btrfs_release_path(&path);
3061 static u32 btrfs_type_to_imode(u8 type)
3063 static u32 imode_by_btrfs_type[] = {
3064 [BTRFS_FT_REG_FILE] = S_IFREG,
3065 [BTRFS_FT_DIR] = S_IFDIR,
3066 [BTRFS_FT_CHRDEV] = S_IFCHR,
3067 [BTRFS_FT_BLKDEV] = S_IFBLK,
3068 [BTRFS_FT_FIFO] = S_IFIFO,
3069 [BTRFS_FT_SOCK] = S_IFSOCK,
3070 [BTRFS_FT_SYMLINK] = S_IFLNK,
3073 return imode_by_btrfs_type[(type)];
3076 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3077 struct btrfs_root *root,
3078 struct btrfs_path *path,
3079 struct inode_record *rec)
3083 int type_recovered = 0;
3086 printf("Trying to rebuild inode:%llu\n", rec->ino);
3088 type_recovered = !find_file_type(rec, &filetype);
3091 * Try to determine inode type if type not found.
3093 * For found regular file extent, it must be FILE.
3094 * For found dir_item/index, it must be DIR.
3096 * For undetermined one, use FILE as fallback.
3099 * 1. If found backref(inode_index/item is already handled) to it,
3101 * Need new inode-inode ref structure to allow search for that.
3103 if (!type_recovered) {
3104 if (rec->found_file_extent &&
3105 find_normal_file_extent(root, rec->ino)) {
3107 filetype = BTRFS_FT_REG_FILE;
3108 } else if (rec->found_dir_item) {
3110 filetype = BTRFS_FT_DIR;
3111 } else if (!list_empty(&rec->orphan_extents)) {
3113 filetype = BTRFS_FT_REG_FILE;
3115 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3118 filetype = BTRFS_FT_REG_FILE;
3122 ret = btrfs_new_inode(trans, root, rec->ino,
3123 mode | btrfs_type_to_imode(filetype));
3128 * Here inode rebuild is done, we only rebuild the inode item,
3129 * don't repair the nlink(like move to lost+found).
3130 * That is the job of nlink repair.
3132 * We just fill the record and return
3134 rec->found_dir_item = 1;
3135 rec->imode = mode | btrfs_type_to_imode(filetype);
3137 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3138 /* Ensure the inode_nlinks repair function will be called */
3139 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3144 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3145 struct btrfs_root *root,
3146 struct btrfs_path *path,
3147 struct inode_record *rec)
3149 struct orphan_data_extent *orphan;
3150 struct orphan_data_extent *tmp;
3153 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3155 * Check for conflicting file extents
3157 * Here we don't know whether the extents is compressed or not,
3158 * so we can only assume it not compressed nor data offset,
3159 * and use its disk_len as extent length.
3161 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3162 orphan->offset, orphan->disk_len, 0);
3163 btrfs_release_path(path);
3168 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3169 orphan->disk_bytenr, orphan->disk_len);
3170 ret = btrfs_free_extent(trans,
3171 root->fs_info->extent_root,
3172 orphan->disk_bytenr, orphan->disk_len,
3173 0, root->objectid, orphan->objectid,
3178 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3179 orphan->offset, orphan->disk_bytenr,
3180 orphan->disk_len, orphan->disk_len);
3184 /* Update file size info */
3185 rec->found_size += orphan->disk_len;
3186 if (rec->found_size == rec->nbytes)
3187 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3189 /* Update the file extent hole info too */
3190 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3194 if (RB_EMPTY_ROOT(&rec->holes))
3195 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3197 list_del(&orphan->list);
3200 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3205 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3206 struct btrfs_root *root,
3207 struct btrfs_path *path,
3208 struct inode_record *rec)
3210 struct rb_node *node;
3211 struct file_extent_hole *hole;
3215 node = rb_first(&rec->holes);
3219 hole = rb_entry(node, struct file_extent_hole, node);
3220 ret = btrfs_punch_hole(trans, root, rec->ino,
3221 hole->start, hole->len);
3224 ret = del_file_extent_hole(&rec->holes, hole->start,
3228 if (RB_EMPTY_ROOT(&rec->holes))
3229 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3230 node = rb_first(&rec->holes);
3232 /* special case for a file losing all its file extent */
3234 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3235 round_up(rec->isize, root->sectorsize));
3239 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3240 rec->ino, root->objectid);
3245 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3247 struct btrfs_trans_handle *trans;
3248 struct btrfs_path path;
3251 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3252 I_ERR_NO_ORPHAN_ITEM |
3253 I_ERR_LINK_COUNT_WRONG |
3254 I_ERR_NO_INODE_ITEM |
3255 I_ERR_FILE_EXTENT_ORPHAN |
3256 I_ERR_FILE_EXTENT_DISCOUNT|
3257 I_ERR_FILE_NBYTES_WRONG)))
3261 * For nlink repair, it may create a dir and add link, so
3262 * 2 for parent(256)'s dir_index and dir_item
3263 * 2 for lost+found dir's inode_item and inode_ref
3264 * 1 for the new inode_ref of the file
3265 * 2 for lost+found dir's dir_index and dir_item for the file
3267 trans = btrfs_start_transaction(root, 7);
3269 return PTR_ERR(trans);
3271 btrfs_init_path(&path);
3272 if (rec->errors & I_ERR_NO_INODE_ITEM)
3273 ret = repair_inode_no_item(trans, root, &path, rec);
3274 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3275 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3276 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3277 ret = repair_inode_discount_extent(trans, root, &path, rec);
3278 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3279 ret = repair_inode_isize(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3281 ret = repair_inode_orphan_item(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3283 ret = repair_inode_nlinks(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3285 ret = repair_inode_nbytes(trans, root, &path, rec);
3286 btrfs_commit_transaction(trans, root);
3287 btrfs_release_path(&path);
3291 static int check_inode_recs(struct btrfs_root *root,
3292 struct cache_tree *inode_cache)
3294 struct cache_extent *cache;
3295 struct ptr_node *node;
3296 struct inode_record *rec;
3297 struct inode_backref *backref;
3302 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3304 if (btrfs_root_refs(&root->root_item) == 0) {
3305 if (!cache_tree_empty(inode_cache))
3306 fprintf(stderr, "warning line %d\n", __LINE__);
3311 * We need to repair backrefs first because we could change some of the
3312 * errors in the inode recs.
3314 * We also need to go through and delete invalid backrefs first and then
3315 * add the correct ones second. We do this because we may get EEXIST
3316 * when adding back the correct index because we hadn't yet deleted the
3319 * For example, if we were missing a dir index then the directories
3320 * isize would be wrong, so if we fixed the isize to what we thought it
3321 * would be and then fixed the backref we'd still have a invalid fs, so
3322 * we need to add back the dir index and then check to see if the isize
3327 if (stage == 3 && !err)
3330 cache = search_cache_extent(inode_cache, 0);
3331 while (repair && cache) {
3332 node = container_of(cache, struct ptr_node, cache);
3334 cache = next_cache_extent(cache);
3336 /* Need to free everything up and rescan */
3338 remove_cache_extent(inode_cache, &node->cache);
3340 free_inode_rec(rec);
3344 if (list_empty(&rec->backrefs))
3347 ret = repair_inode_backrefs(root, rec, inode_cache,
3361 rec = get_inode_rec(inode_cache, root_dirid, 0);
3362 BUG_ON(IS_ERR(rec));
3364 ret = check_root_dir(rec);
3366 fprintf(stderr, "root %llu root dir %llu error\n",
3367 (unsigned long long)root->root_key.objectid,
3368 (unsigned long long)root_dirid);
3369 print_inode_error(root, rec);
3374 struct btrfs_trans_handle *trans;
3376 trans = btrfs_start_transaction(root, 1);
3377 if (IS_ERR(trans)) {
3378 err = PTR_ERR(trans);
3383 "root %llu missing its root dir, recreating\n",
3384 (unsigned long long)root->objectid);
3386 ret = btrfs_make_root_dir(trans, root, root_dirid);
3389 btrfs_commit_transaction(trans, root);
3393 fprintf(stderr, "root %llu root dir %llu not found\n",
3394 (unsigned long long)root->root_key.objectid,
3395 (unsigned long long)root_dirid);
3399 cache = search_cache_extent(inode_cache, 0);
3402 node = container_of(cache, struct ptr_node, cache);
3404 remove_cache_extent(inode_cache, &node->cache);
3406 if (rec->ino == root_dirid ||
3407 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3408 free_inode_rec(rec);
3412 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3413 ret = check_orphan_item(root, rec->ino);
3415 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3416 if (can_free_inode_rec(rec)) {
3417 free_inode_rec(rec);
3422 if (!rec->found_inode_item)
3423 rec->errors |= I_ERR_NO_INODE_ITEM;
3424 if (rec->found_link != rec->nlink)
3425 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3427 ret = try_repair_inode(root, rec);
3428 if (ret == 0 && can_free_inode_rec(rec)) {
3429 free_inode_rec(rec);
3435 if (!(repair && ret == 0))
3437 print_inode_error(root, rec);
3438 list_for_each_entry(backref, &rec->backrefs, list) {
3439 if (!backref->found_dir_item)
3440 backref->errors |= REF_ERR_NO_DIR_ITEM;
3441 if (!backref->found_dir_index)
3442 backref->errors |= REF_ERR_NO_DIR_INDEX;
3443 if (!backref->found_inode_ref)
3444 backref->errors |= REF_ERR_NO_INODE_REF;
3445 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3446 " namelen %u name %s filetype %d errors %x",
3447 (unsigned long long)backref->dir,
3448 (unsigned long long)backref->index,
3449 backref->namelen, backref->name,
3450 backref->filetype, backref->errors);
3451 print_ref_error(backref->errors);
3453 free_inode_rec(rec);
3455 return (error > 0) ? -1 : 0;
3458 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3461 struct cache_extent *cache;
3462 struct root_record *rec = NULL;
3465 cache = lookup_cache_extent(root_cache, objectid, 1);
3467 rec = container_of(cache, struct root_record, cache);
3469 rec = calloc(1, sizeof(*rec));
3471 return ERR_PTR(-ENOMEM);
3472 rec->objectid = objectid;
3473 INIT_LIST_HEAD(&rec->backrefs);
3474 rec->cache.start = objectid;
3475 rec->cache.size = 1;
3477 ret = insert_cache_extent(root_cache, &rec->cache);
3479 return ERR_PTR(-EEXIST);
3484 static struct root_backref *get_root_backref(struct root_record *rec,
3485 u64 ref_root, u64 dir, u64 index,
3486 const char *name, int namelen)
3488 struct root_backref *backref;
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (backref->ref_root != ref_root || backref->dir != dir ||
3492 backref->namelen != namelen)
3494 if (memcmp(name, backref->name, namelen))
3499 backref = calloc(1, sizeof(*backref) + namelen + 1);
3502 backref->ref_root = ref_root;
3504 backref->index = index;
3505 backref->namelen = namelen;
3506 memcpy(backref->name, name, namelen);
3507 backref->name[namelen] = '\0';
3508 list_add_tail(&backref->list, &rec->backrefs);
3512 static void free_root_record(struct cache_extent *cache)
3514 struct root_record *rec;
3515 struct root_backref *backref;
3517 rec = container_of(cache, struct root_record, cache);
3518 while (!list_empty(&rec->backrefs)) {
3519 backref = to_root_backref(rec->backrefs.next);
3520 list_del(&backref->list);
3527 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3529 static int add_root_backref(struct cache_tree *root_cache,
3530 u64 root_id, u64 ref_root, u64 dir, u64 index,
3531 const char *name, int namelen,
3532 int item_type, int errors)
3534 struct root_record *rec;
3535 struct root_backref *backref;
3537 rec = get_root_rec(root_cache, root_id);
3538 BUG_ON(IS_ERR(rec));
3539 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3542 backref->errors |= errors;
3544 if (item_type != BTRFS_DIR_ITEM_KEY) {
3545 if (backref->found_dir_index || backref->found_back_ref ||
3546 backref->found_forward_ref) {
3547 if (backref->index != index)
3548 backref->errors |= REF_ERR_INDEX_UNMATCH;
3550 backref->index = index;
3554 if (item_type == BTRFS_DIR_ITEM_KEY) {
3555 if (backref->found_forward_ref)
3557 backref->found_dir_item = 1;
3558 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3559 backref->found_dir_index = 1;
3560 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3561 if (backref->found_forward_ref)
3562 backref->errors |= REF_ERR_DUP_ROOT_REF;
3563 else if (backref->found_dir_item)
3565 backref->found_forward_ref = 1;
3566 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3567 if (backref->found_back_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3569 backref->found_back_ref = 1;
3574 if (backref->found_forward_ref && backref->found_dir_item)
3575 backref->reachable = 1;
3579 static int merge_root_recs(struct btrfs_root *root,
3580 struct cache_tree *src_cache,
3581 struct cache_tree *dst_cache)
3583 struct cache_extent *cache;
3584 struct ptr_node *node;
3585 struct inode_record *rec;
3586 struct inode_backref *backref;
3589 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3590 free_inode_recs_tree(src_cache);
3595 cache = search_cache_extent(src_cache, 0);
3598 node = container_of(cache, struct ptr_node, cache);
3600 remove_cache_extent(src_cache, &node->cache);
3603 ret = is_child_root(root, root->objectid, rec->ino);
3609 list_for_each_entry(backref, &rec->backrefs, list) {
3610 BUG_ON(backref->found_inode_ref);
3611 if (backref->found_dir_item)
3612 add_root_backref(dst_cache, rec->ino,
3613 root->root_key.objectid, backref->dir,
3614 backref->index, backref->name,
3615 backref->namelen, BTRFS_DIR_ITEM_KEY,
3617 if (backref->found_dir_index)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_INDEX_KEY,
3625 free_inode_rec(rec);
3632 static int check_root_refs(struct btrfs_root *root,
3633 struct cache_tree *root_cache)
3635 struct root_record *rec;
3636 struct root_record *ref_root;
3637 struct root_backref *backref;
3638 struct cache_extent *cache;
3644 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3645 BUG_ON(IS_ERR(rec));
3648 /* fixme: this can not detect circular references */
3651 cache = search_cache_extent(root_cache, 0);
3655 rec = container_of(cache, struct root_record, cache);
3656 cache = next_cache_extent(cache);
3658 if (rec->found_ref == 0)
3661 list_for_each_entry(backref, &rec->backrefs, list) {
3662 if (!backref->reachable)
3665 ref_root = get_root_rec(root_cache,
3667 BUG_ON(IS_ERR(ref_root));
3668 if (ref_root->found_ref > 0)
3671 backref->reachable = 0;
3673 if (rec->found_ref == 0)
3679 cache = search_cache_extent(root_cache, 0);
3683 rec = container_of(cache, struct root_record, cache);
3684 cache = next_cache_extent(cache);
3686 if (rec->found_ref == 0 &&
3687 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3688 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3689 ret = check_orphan_item(root->fs_info->tree_root,
3695 * If we don't have a root item then we likely just have
3696 * a dir item in a snapshot for this root but no actual
3697 * ref key or anything so it's meaningless.
3699 if (!rec->found_root_item)
3702 fprintf(stderr, "fs tree %llu not referenced\n",
3703 (unsigned long long)rec->objectid);
3707 if (rec->found_ref > 0 && !rec->found_root_item)
3709 list_for_each_entry(backref, &rec->backrefs, list) {
3710 if (!backref->found_dir_item)
3711 backref->errors |= REF_ERR_NO_DIR_ITEM;
3712 if (!backref->found_dir_index)
3713 backref->errors |= REF_ERR_NO_DIR_INDEX;
3714 if (!backref->found_back_ref)
3715 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3716 if (!backref->found_forward_ref)
3717 backref->errors |= REF_ERR_NO_ROOT_REF;
3718 if (backref->reachable && backref->errors)
3725 fprintf(stderr, "fs tree %llu refs %u %s\n",
3726 (unsigned long long)rec->objectid, rec->found_ref,
3727 rec->found_root_item ? "" : "not found");
3729 list_for_each_entry(backref, &rec->backrefs, list) {
3730 if (!backref->reachable)
3732 if (!backref->errors && rec->found_root_item)
3734 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3735 " index %llu namelen %u name %s errors %x\n",
3736 (unsigned long long)backref->ref_root,
3737 (unsigned long long)backref->dir,
3738 (unsigned long long)backref->index,
3739 backref->namelen, backref->name,
3741 print_ref_error(backref->errors);
3744 return errors > 0 ? 1 : 0;
3747 static int process_root_ref(struct extent_buffer *eb, int slot,
3748 struct btrfs_key *key,
3749 struct cache_tree *root_cache)
3755 struct btrfs_root_ref *ref;
3756 char namebuf[BTRFS_NAME_LEN];
3759 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3761 dirid = btrfs_root_ref_dirid(eb, ref);
3762 index = btrfs_root_ref_sequence(eb, ref);
3763 name_len = btrfs_root_ref_name_len(eb, ref);
3765 if (name_len <= BTRFS_NAME_LEN) {
3769 len = BTRFS_NAME_LEN;
3770 error = REF_ERR_NAME_TOO_LONG;
3772 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3774 if (key->type == BTRFS_ROOT_REF_KEY) {
3775 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3776 index, namebuf, len, key->type, error);
3778 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3779 index, namebuf, len, key->type, error);
3784 static void free_corrupt_block(struct cache_extent *cache)
3786 struct btrfs_corrupt_block *corrupt;
3788 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3792 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3795 * Repair the btree of the given root.
3797 * The fix is to remove the node key in corrupt_blocks cache_tree.
3798 * and rebalance the tree.
3799 * After the fix, the btree should be writeable.
3801 static int repair_btree(struct btrfs_root *root,
3802 struct cache_tree *corrupt_blocks)
3804 struct btrfs_trans_handle *trans;
3805 struct btrfs_path path;
3806 struct btrfs_corrupt_block *corrupt;
3807 struct cache_extent *cache;
3808 struct btrfs_key key;
3813 if (cache_tree_empty(corrupt_blocks))
3816 trans = btrfs_start_transaction(root, 1);
3817 if (IS_ERR(trans)) {
3818 ret = PTR_ERR(trans);
3819 fprintf(stderr, "Error starting transaction: %s\n",
3823 btrfs_init_path(&path);
3824 cache = first_cache_extent(corrupt_blocks);
3826 corrupt = container_of(cache, struct btrfs_corrupt_block,
3828 level = corrupt->level;
3829 path.lowest_level = level;
3830 key.objectid = corrupt->key.objectid;
3831 key.type = corrupt->key.type;
3832 key.offset = corrupt->key.offset;
3835 * Here we don't want to do any tree balance, since it may
3836 * cause a balance with corrupted brother leaf/node,
3837 * so ins_len set to 0 here.
3838 * Balance will be done after all corrupt node/leaf is deleted.
3840 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3843 offset = btrfs_node_blockptr(path.nodes[level],
3846 /* Remove the ptr */
3847 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3851 * Remove the corresponding extent
3852 * return value is not concerned.
3854 btrfs_release_path(&path);
3855 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3856 0, root->root_key.objectid,
3858 cache = next_cache_extent(cache);
3861 /* Balance the btree using btrfs_search_slot() */
3862 cache = first_cache_extent(corrupt_blocks);
3864 corrupt = container_of(cache, struct btrfs_corrupt_block,
3866 memcpy(&key, &corrupt->key, sizeof(key));
3867 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3870 /* return will always >0 since it won't find the item */
3872 btrfs_release_path(&path);
3873 cache = next_cache_extent(cache);
3876 btrfs_commit_transaction(trans, root);
3877 btrfs_release_path(&path);
3881 static int check_fs_root(struct btrfs_root *root,
3882 struct cache_tree *root_cache,
3883 struct walk_control *wc)
3889 struct btrfs_path path;
3890 struct shared_node root_node;
3891 struct root_record *rec;
3892 struct btrfs_root_item *root_item = &root->root_item;
3893 struct cache_tree corrupt_blocks;
3894 struct orphan_data_extent *orphan;
3895 struct orphan_data_extent *tmp;
3896 enum btrfs_tree_block_status status;
3897 struct node_refs nrefs;
3900 * Reuse the corrupt_block cache tree to record corrupted tree block
3902 * Unlike the usage in extent tree check, here we do it in a per
3903 * fs/subvol tree base.
3905 cache_tree_init(&corrupt_blocks);
3906 root->fs_info->corrupt_blocks = &corrupt_blocks;
3908 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3909 rec = get_root_rec(root_cache, root->root_key.objectid);
3910 BUG_ON(IS_ERR(rec));
3911 if (btrfs_root_refs(root_item) > 0)
3912 rec->found_root_item = 1;
3915 btrfs_init_path(&path);
3916 memset(&root_node, 0, sizeof(root_node));
3917 cache_tree_init(&root_node.root_cache);
3918 cache_tree_init(&root_node.inode_cache);
3919 memset(&nrefs, 0, sizeof(nrefs));
3921 /* Move the orphan extent record to corresponding inode_record */
3922 list_for_each_entry_safe(orphan, tmp,
3923 &root->orphan_data_extents, list) {
3924 struct inode_record *inode;
3926 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3928 BUG_ON(IS_ERR(inode));
3929 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3930 list_move(&orphan->list, &inode->orphan_extents);
3933 level = btrfs_header_level(root->node);
3934 memset(wc->nodes, 0, sizeof(wc->nodes));
3935 wc->nodes[level] = &root_node;
3936 wc->active_node = level;
3937 wc->root_level = level;
3939 /* We may not have checked the root block, lets do that now */
3940 if (btrfs_is_leaf(root->node))
3941 status = btrfs_check_leaf(root, NULL, root->node);
3943 status = btrfs_check_node(root, NULL, root->node);
3944 if (status != BTRFS_TREE_BLOCK_CLEAN)
3947 if (btrfs_root_refs(root_item) > 0 ||
3948 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3949 path.nodes[level] = root->node;
3950 extent_buffer_get(root->node);
3951 path.slots[level] = 0;
3953 struct btrfs_key key;
3954 struct btrfs_disk_key found_key;
3956 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3957 level = root_item->drop_level;
3958 path.lowest_level = level;
3959 if (level > btrfs_header_level(root->node) ||
3960 level >= BTRFS_MAX_LEVEL) {
3961 error("ignoring invalid drop level: %u", level);
3964 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3967 btrfs_node_key(path.nodes[level], &found_key,
3969 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3970 sizeof(found_key)));
3974 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3980 wret = walk_up_tree(root, &path, wc, &level);
3987 btrfs_release_path(&path);
3989 if (!cache_tree_empty(&corrupt_blocks)) {
3990 struct cache_extent *cache;
3991 struct btrfs_corrupt_block *corrupt;
3993 printf("The following tree block(s) is corrupted in tree %llu:\n",
3994 root->root_key.objectid);
3995 cache = first_cache_extent(&corrupt_blocks);
3997 corrupt = container_of(cache,
3998 struct btrfs_corrupt_block,
4000 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4001 cache->start, corrupt->level,
4002 corrupt->key.objectid, corrupt->key.type,
4003 corrupt->key.offset);
4004 cache = next_cache_extent(cache);
4007 printf("Try to repair the btree for root %llu\n",
4008 root->root_key.objectid);
4009 ret = repair_btree(root, &corrupt_blocks);
4011 fprintf(stderr, "Failed to repair btree: %s\n",
4014 printf("Btree for root %llu is fixed\n",
4015 root->root_key.objectid);
4019 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4023 if (root_node.current) {
4024 root_node.current->checked = 1;
4025 maybe_free_inode_rec(&root_node.inode_cache,
4029 err = check_inode_recs(root, &root_node.inode_cache);
4033 free_corrupt_blocks_tree(&corrupt_blocks);
4034 root->fs_info->corrupt_blocks = NULL;
4035 free_orphan_data_extents(&root->orphan_data_extents);
4039 static int fs_root_objectid(u64 objectid)
4041 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4042 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4044 return is_fstree(objectid);
4047 static int check_fs_roots(struct btrfs_root *root,
4048 struct cache_tree *root_cache)
4050 struct btrfs_path path;
4051 struct btrfs_key key;
4052 struct walk_control wc;
4053 struct extent_buffer *leaf, *tree_node;
4054 struct btrfs_root *tmp_root;
4055 struct btrfs_root *tree_root = root->fs_info->tree_root;
4059 if (ctx.progress_enabled) {
4060 ctx.tp = TASK_FS_ROOTS;
4061 task_start(ctx.info);
4065 * Just in case we made any changes to the extent tree that weren't
4066 * reflected into the free space cache yet.
4069 reset_cached_block_groups(root->fs_info);
4070 memset(&wc, 0, sizeof(wc));
4071 cache_tree_init(&wc.shared);
4072 btrfs_init_path(&path);
4077 key.type = BTRFS_ROOT_ITEM_KEY;
4078 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4083 tree_node = tree_root->node;
4085 if (tree_node != tree_root->node) {
4086 free_root_recs_tree(root_cache);
4087 btrfs_release_path(&path);
4090 leaf = path.nodes[0];
4091 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4092 ret = btrfs_next_leaf(tree_root, &path);
4098 leaf = path.nodes[0];
4100 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4101 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4102 fs_root_objectid(key.objectid)) {
4103 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4104 tmp_root = btrfs_read_fs_root_no_cache(
4105 root->fs_info, &key);
4107 key.offset = (u64)-1;
4108 tmp_root = btrfs_read_fs_root(
4109 root->fs_info, &key);
4111 if (IS_ERR(tmp_root)) {
4115 ret = check_fs_root(tmp_root, root_cache, &wc);
4116 if (ret == -EAGAIN) {
4117 free_root_recs_tree(root_cache);
4118 btrfs_release_path(&path);
4123 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4124 btrfs_free_fs_root(tmp_root);
4125 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4126 key.type == BTRFS_ROOT_BACKREF_KEY) {
4127 process_root_ref(leaf, path.slots[0], &key,
4134 btrfs_release_path(&path);
4136 free_extent_cache_tree(&wc.shared);
4137 if (!cache_tree_empty(&wc.shared))
4138 fprintf(stderr, "warning line %d\n", __LINE__);
4140 task_stop(ctx.info);
4146 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4147 * INODE_REF/INODE_EXTREF match.
4149 * @root: the root of the fs/file tree
4150 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4151 * @key: the key of the DIR_ITEM/DIR_INDEX
4152 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4153 * distinguish root_dir between normal dir/file
4154 * @name: the name in the INODE_REF/INODE_EXTREF
4155 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4156 * @mode: the st_mode of INODE_ITEM
4158 * Return 0 if no error occurred.
4159 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4160 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4162 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4163 * not match for normal dir/file.
4165 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4166 struct btrfs_key *key, u64 index, char *name,
4167 u32 namelen, u32 mode)
4169 struct btrfs_path path;
4170 struct extent_buffer *node;
4171 struct btrfs_dir_item *di;
4172 struct btrfs_key location;
4173 char namebuf[BTRFS_NAME_LEN] = {0};
4183 btrfs_init_path(&path);
4184 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4186 ret = DIR_ITEM_MISSING;
4190 /* Process root dir and goto out*/
4193 ret = ROOT_DIR_ERROR;
4195 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4197 ref_key->type == BTRFS_INODE_REF_KEY ?
4199 ref_key->objectid, ref_key->offset,
4200 key->type == BTRFS_DIR_ITEM_KEY ?
4201 "DIR_ITEM" : "DIR_INDEX");
4209 /* Process normal file/dir */
4211 ret = DIR_ITEM_MISSING;
4213 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4215 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4216 ref_key->objectid, ref_key->offset,
4217 key->type == BTRFS_DIR_ITEM_KEY ?
4218 "DIR_ITEM" : "DIR_INDEX",
4219 key->objectid, key->offset, namelen, name,
4220 imode_to_type(mode));
4224 /* Check whether inode_id/filetype/name match */
4225 node = path.nodes[0];
4226 slot = path.slots[0];
4227 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4228 total = btrfs_item_size_nr(node, slot);
4229 while (cur < total) {
4230 ret = DIR_ITEM_MISMATCH;
4231 name_len = btrfs_dir_name_len(node, di);
4232 data_len = btrfs_dir_data_len(node, di);
4234 btrfs_dir_item_key_to_cpu(node, di, &location);
4235 if (location.objectid != ref_key->objectid ||
4236 location.type != BTRFS_INODE_ITEM_KEY ||
4237 location.offset != 0)
4240 filetype = btrfs_dir_type(node, di);
4241 if (imode_to_type(mode) != filetype)
4244 if (cur + sizeof(*di) + name_len > total ||
4245 name_len > BTRFS_NAME_LEN) {
4246 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4248 key->type == BTRFS_DIR_ITEM_KEY ?
4249 "DIR_ITEM" : "DIR_INDEX",
4250 key->objectid, key->offset, name_len);
4252 if (cur + sizeof(*di) > total)
4254 len = min_t(u32, total - cur - sizeof(*di),
4260 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4261 if (len != namelen || strncmp(namebuf, name, len))
4267 len = sizeof(*di) + name_len + data_len;
4268 di = (struct btrfs_dir_item *)((char *)di + len);
4271 if (ret == DIR_ITEM_MISMATCH)
4273 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4275 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4276 ref_key->objectid, ref_key->offset,
4277 key->type == BTRFS_DIR_ITEM_KEY ?
4278 "DIR_ITEM" : "DIR_INDEX",
4279 key->objectid, key->offset, namelen, name,
4280 imode_to_type(mode));
4282 btrfs_release_path(&path);
4287 * Traverse the given INODE_REF and call find_dir_item() to find related
4288 * DIR_ITEM/DIR_INDEX.
4290 * @root: the root of the fs/file tree
4291 * @ref_key: the key of the INODE_REF
4292 * @refs: the count of INODE_REF
4293 * @mode: the st_mode of INODE_ITEM
4295 * Return 0 if no error occurred.
4297 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4298 struct extent_buffer *node, int slot, u64 *refs,
4301 struct btrfs_key key;
4302 struct btrfs_inode_ref *ref;
4303 char namebuf[BTRFS_NAME_LEN] = {0};
4311 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4312 total = btrfs_item_size_nr(node, slot);
4315 /* Update inode ref count */
4318 index = btrfs_inode_ref_index(node, ref);
4319 name_len = btrfs_inode_ref_name_len(node, ref);
4320 if (cur + sizeof(*ref) + name_len > total ||
4321 name_len > BTRFS_NAME_LEN) {
4322 warning("root %llu INODE_REF[%llu %llu] name too long",
4323 root->objectid, ref_key->objectid, ref_key->offset);
4325 if (total < cur + sizeof(*ref))
4327 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4332 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4334 /* Check root dir ref name */
4335 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4336 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4337 root->objectid, ref_key->objectid, ref_key->offset,
4339 err |= ROOT_DIR_ERROR;
4342 /* Find related DIR_INDEX */
4343 key.objectid = ref_key->offset;
4344 key.type = BTRFS_DIR_INDEX_KEY;
4346 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4349 /* Find related dir_item */
4350 key.objectid = ref_key->offset;
4351 key.type = BTRFS_DIR_ITEM_KEY;
4352 key.offset = btrfs_name_hash(namebuf, len);
4353 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356 len = sizeof(*ref) + name_len;
4357 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4367 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4368 * DIR_ITEM/DIR_INDEX.
4370 * @root: the root of the fs/file tree
4371 * @ref_key: the key of the INODE_EXTREF
4372 * @refs: the count of INODE_EXTREF
4373 * @mode: the st_mode of INODE_ITEM
4375 * Return 0 if no error occurred.
4377 static int check_inode_extref(struct btrfs_root *root,
4378 struct btrfs_key *ref_key,
4379 struct extent_buffer *node, int slot, u64 *refs,
4382 struct btrfs_key key;
4383 struct btrfs_inode_extref *extref;
4384 char namebuf[BTRFS_NAME_LEN] = {0};
4394 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4395 total = btrfs_item_size_nr(node, slot);
4398 /* update inode ref count */
4400 name_len = btrfs_inode_extref_name_len(node, extref);
4401 index = btrfs_inode_extref_index(node, extref);
4402 parent = btrfs_inode_extref_parent(node, extref);
4403 if (name_len <= BTRFS_NAME_LEN) {
4406 len = BTRFS_NAME_LEN;
4407 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4408 root->objectid, ref_key->objectid, ref_key->offset);
4410 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4412 /* Check root dir ref name */
4413 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4414 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4415 root->objectid, ref_key->objectid, ref_key->offset,
4417 err |= ROOT_DIR_ERROR;
4420 /* find related dir_index */
4421 key.objectid = parent;
4422 key.type = BTRFS_DIR_INDEX_KEY;
4424 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4427 /* find related dir_item */
4428 key.objectid = parent;
4429 key.type = BTRFS_DIR_ITEM_KEY;
4430 key.offset = btrfs_name_hash(namebuf, len);
4431 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434 len = sizeof(*extref) + name_len;
4435 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4445 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4446 * DIR_ITEM/DIR_INDEX match.
4448 * @root: the root of the fs/file tree
4449 * @key: the key of the INODE_REF/INODE_EXTREF
4450 * @name: the name in the INODE_REF/INODE_EXTREF
4451 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4452 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4454 * @ext_ref: the EXTENDED_IREF feature
4456 * Return 0 if no error occurred.
4457 * Return >0 for error bitmap
4459 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4460 char *name, int namelen, u64 index,
4461 unsigned int ext_ref)
4463 struct btrfs_path path;
4464 struct btrfs_inode_ref *ref;
4465 struct btrfs_inode_extref *extref;
4466 struct extent_buffer *node;
4467 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4478 btrfs_init_path(&path);
4479 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4481 ret = INODE_REF_MISSING;
4485 node = path.nodes[0];
4486 slot = path.slots[0];
4488 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4489 total = btrfs_item_size_nr(node, slot);
4491 /* Iterate all entry of INODE_REF */
4492 while (cur < total) {
4493 ret = INODE_REF_MISSING;
4495 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4496 ref_index = btrfs_inode_ref_index(node, ref);
4497 if (index != (u64)-1 && index != ref_index)
4500 if (cur + sizeof(*ref) + ref_namelen > total ||
4501 ref_namelen > BTRFS_NAME_LEN) {
4502 warning("root %llu INODE %s[%llu %llu] name too long",
4504 key->type == BTRFS_INODE_REF_KEY ?
4506 key->objectid, key->offset);
4508 if (cur + sizeof(*ref) > total)
4510 len = min_t(u32, total - cur - sizeof(*ref),
4516 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4519 if (len != namelen || strncmp(ref_namebuf, name, len))
4525 len = sizeof(*ref) + ref_namelen;
4526 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4531 /* Skip if not support EXTENDED_IREF feature */
4535 btrfs_release_path(&path);
4536 btrfs_init_path(&path);
4538 dir_id = key->offset;
4539 key->type = BTRFS_INODE_EXTREF_KEY;
4540 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4542 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4544 ret = INODE_REF_MISSING;
4548 node = path.nodes[0];
4549 slot = path.slots[0];
4551 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4553 total = btrfs_item_size_nr(node, slot);
4555 /* Iterate all entry of INODE_EXTREF */
4556 while (cur < total) {
4557 ret = INODE_REF_MISSING;
4559 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4560 ref_index = btrfs_inode_extref_index(node, extref);
4561 parent = btrfs_inode_extref_parent(node, extref);
4562 if (index != (u64)-1 && index != ref_index)
4565 if (parent != dir_id)
4568 if (ref_namelen <= BTRFS_NAME_LEN) {
4571 len = BTRFS_NAME_LEN;
4572 warning("root %llu INODE %s[%llu %llu] name too long",
4574 key->type == BTRFS_INODE_REF_KEY ?
4576 key->objectid, key->offset);
4578 read_extent_buffer(node, ref_namebuf,
4579 (unsigned long)(extref + 1), len);
4581 if (len != namelen || strncmp(ref_namebuf, name, len))
4588 len = sizeof(*extref) + ref_namelen;
4589 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4594 btrfs_release_path(&path);
4599 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4600 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4602 * @root: the root of the fs/file tree
4603 * @key: the key of the INODE_REF/INODE_EXTREF
4604 * @size: the st_size of the INODE_ITEM
4605 * @ext_ref: the EXTENDED_IREF feature
4607 * Return 0 if no error occurred.
4609 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4610 struct extent_buffer *node, int slot, u64 *size,
4611 unsigned int ext_ref)
4613 struct btrfs_dir_item *di;
4614 struct btrfs_inode_item *ii;
4615 struct btrfs_path path;
4616 struct btrfs_key location;
4617 char namebuf[BTRFS_NAME_LEN] = {0};
4630 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4631 * ignore index check.
4633 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4635 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4636 total = btrfs_item_size_nr(node, slot);
4638 while (cur < total) {
4639 data_len = btrfs_dir_data_len(node, di);
4641 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4642 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643 "DIR_ITEM" : "DIR_INDEX",
4644 key->objectid, key->offset, data_len);
4646 name_len = btrfs_dir_name_len(node, di);
4647 if (cur + sizeof(*di) + name_len > total ||
4648 name_len > BTRFS_NAME_LEN) {
4649 warning("root %llu %s[%llu %llu] name too long",
4651 key->type == BTRFS_DIR_ITEM_KEY ?
4652 "DIR_ITEM" : "DIR_INDEX",
4653 key->objectid, key->offset);
4655 if (cur + sizeof(*di) > total)
4657 len = min_t(u32, total - cur - sizeof(*di),
4662 (*size) += name_len;
4664 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4665 filetype = btrfs_dir_type(node, di);
4667 btrfs_init_path(&path);
4668 btrfs_dir_item_key_to_cpu(node, di, &location);
4670 /* Ignore related ROOT_ITEM check */
4671 if (location.type == BTRFS_ROOT_ITEM_KEY)
4674 /* Check relative INODE_ITEM(existence/filetype) */
4675 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4677 err |= INODE_ITEM_MISSING;
4678 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4679 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4680 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4681 key->offset, location.objectid, name_len,
4686 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4687 struct btrfs_inode_item);
4688 mode = btrfs_inode_mode(path.nodes[0], ii);
4690 if (imode_to_type(mode) != filetype) {
4691 err |= INODE_ITEM_MISMATCH;
4692 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4693 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4694 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4695 key->offset, name_len, namebuf, filetype);
4698 /* Check relative INODE_REF/INODE_EXTREF */
4699 location.type = BTRFS_INODE_REF_KEY;
4700 location.offset = key->objectid;
4701 ret = find_inode_ref(root, &location, namebuf, len,
4704 if (ret & INODE_REF_MISSING)
4705 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4706 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4707 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4708 key->offset, name_len, namebuf, filetype);
4711 btrfs_release_path(&path);
4712 len = sizeof(*di) + name_len + data_len;
4713 di = (struct btrfs_dir_item *)((char *)di + len);
4716 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4717 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4718 root->objectid, key->objectid, key->offset);
4727 * Check file extent datasum/hole, update the size of the file extents,
4728 * check and update the last offset of the file extent.
4730 * @root: the root of fs/file tree.
4731 * @fkey: the key of the file extent.
4732 * @nodatasum: INODE_NODATASUM feature.
4733 * @size: the sum of all EXTENT_DATA items size for this inode.
4734 * @end: the offset of the last extent.
4736 * Return 0 if no error occurred.
4738 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4739 struct extent_buffer *node, int slot,
4740 unsigned int nodatasum, u64 *size, u64 *end)
4742 struct btrfs_file_extent_item *fi;
4745 u64 extent_num_bytes;
4747 u64 csum_found; /* In byte size, sectorsize aligned */
4748 u64 search_start; /* Logical range start we search for csum */
4749 u64 search_len; /* Logical range len we search for csum */
4750 unsigned int extent_type;
4751 unsigned int is_hole;
4756 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4758 /* Check inline extent */
4759 extent_type = btrfs_file_extent_type(node, fi);
4760 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4761 struct btrfs_item *e = btrfs_item_nr(slot);
4762 u32 item_inline_len;
4764 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4765 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4766 compressed = btrfs_file_extent_compression(node, fi);
4767 if (extent_num_bytes == 0) {
4769 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4770 root->objectid, fkey->objectid, fkey->offset);
4771 err |= FILE_EXTENT_ERROR;
4773 if (!compressed && extent_num_bytes != item_inline_len) {
4775 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4776 root->objectid, fkey->objectid, fkey->offset,
4777 extent_num_bytes, item_inline_len);
4778 err |= FILE_EXTENT_ERROR;
4780 *end += extent_num_bytes;
4781 *size += extent_num_bytes;
4785 /* Check extent type */
4786 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4787 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4788 err |= FILE_EXTENT_ERROR;
4789 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4790 root->objectid, fkey->objectid, fkey->offset);
4794 /* Check REG_EXTENT/PREALLOC_EXTENT */
4795 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4796 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4797 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4798 extent_offset = btrfs_file_extent_offset(node, fi);
4799 compressed = btrfs_file_extent_compression(node, fi);
4800 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4803 * Check EXTENT_DATA csum
4805 * For plain (uncompressed) extent, we should only check the range
4806 * we're referring to, as it's possible that part of prealloc extent
4807 * has been written, and has csum:
4809 * |<--- Original large preallocated extent A ---->|
4810 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4813 * For compressed extent, we should check the whole range.
4816 search_start = disk_bytenr + extent_offset;
4817 search_len = extent_num_bytes;
4819 search_start = disk_bytenr;
4820 search_len = disk_num_bytes;
4822 ret = count_csum_range(root, search_start, search_len, &csum_found);
4823 if (csum_found > 0 && nodatasum) {
4824 err |= ODD_CSUM_ITEM;
4825 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4826 root->objectid, fkey->objectid, fkey->offset);
4827 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4828 !is_hole && (ret < 0 || csum_found < search_len)) {
4829 err |= CSUM_ITEM_MISSING;
4830 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4831 root->objectid, fkey->objectid, fkey->offset,
4832 csum_found, search_len);
4833 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4834 err |= ODD_CSUM_ITEM;
4835 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4836 root->objectid, fkey->objectid, fkey->offset, csum_found);
4839 /* Check EXTENT_DATA hole */
4840 if (!no_holes && *end != fkey->offset) {
4841 err |= FILE_EXTENT_ERROR;
4842 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4843 root->objectid, fkey->objectid, fkey->offset);
4846 *end += extent_num_bytes;
4848 *size += extent_num_bytes;
4854 * Check INODE_ITEM and related ITEMs (the same inode number)
4855 * 1. check link count
4856 * 2. check inode ref/extref
4857 * 3. check dir item/index
4859 * @ext_ref: the EXTENDED_IREF feature
4861 * Return 0 if no error occurred.
4862 * Return >0 for error or hit the traversal is done(by error bitmap)
4864 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4865 unsigned int ext_ref)
4867 struct extent_buffer *node;
4868 struct btrfs_inode_item *ii;
4869 struct btrfs_key key;
4878 u64 extent_size = 0;
4880 unsigned int nodatasum;
4885 node = path->nodes[0];
4886 slot = path->slots[0];
4888 btrfs_item_key_to_cpu(node, &key, slot);
4889 inode_id = key.objectid;
4891 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4892 ret = btrfs_next_item(root, path);
4898 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4899 isize = btrfs_inode_size(node, ii);
4900 nbytes = btrfs_inode_nbytes(node, ii);
4901 mode = btrfs_inode_mode(node, ii);
4902 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4903 nlink = btrfs_inode_nlink(node, ii);
4904 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4907 ret = btrfs_next_item(root, path);
4909 /* out will fill 'err' rusing current statistics */
4911 } else if (ret > 0) {
4916 node = path->nodes[0];
4917 slot = path->slots[0];
4918 btrfs_item_key_to_cpu(node, &key, slot);
4919 if (key.objectid != inode_id)
4923 case BTRFS_INODE_REF_KEY:
4924 ret = check_inode_ref(root, &key, node, slot, &refs,
4928 case BTRFS_INODE_EXTREF_KEY:
4929 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4930 warning("root %llu EXTREF[%llu %llu] isn't supported",
4931 root->objectid, key.objectid,
4933 ret = check_inode_extref(root, &key, node, slot, &refs,
4937 case BTRFS_DIR_ITEM_KEY:
4938 case BTRFS_DIR_INDEX_KEY:
4940 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4941 root->objectid, inode_id,
4942 imode_to_type(mode), key.objectid,
4945 ret = check_dir_item(root, &key, node, slot, &size,
4949 case BTRFS_EXTENT_DATA_KEY:
4951 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4952 root->objectid, inode_id, key.objectid,
4955 ret = check_file_extent(root, &key, node, slot,
4956 nodatasum, &extent_size,
4960 case BTRFS_XATTR_ITEM_KEY:
4963 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4964 key.objectid, key.type, key.offset);
4969 /* verify INODE_ITEM nlink/isize/nbytes */
4972 err |= LINK_COUNT_ERROR;
4973 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4974 root->objectid, inode_id, nlink);
4978 * Just a warning, as dir inode nbytes is just an
4979 * instructive value.
4981 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4982 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4983 root->objectid, inode_id, root->nodesize);
4986 if (isize != size) {
4988 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4989 root->objectid, inode_id, isize, size);
4992 if (nlink != refs) {
4993 err |= LINK_COUNT_ERROR;
4994 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4995 root->objectid, inode_id, nlink, refs);
4996 } else if (!nlink) {
5000 if (!nbytes && !no_holes && extent_end < isize) {
5001 err |= NBYTES_ERROR;
5002 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5003 root->objectid, inode_id, isize);
5006 if (nbytes != extent_size) {
5007 err |= NBYTES_ERROR;
5008 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5009 root->objectid, inode_id, nbytes, extent_size);
5016 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5018 struct btrfs_path path;
5019 struct btrfs_key key;
5023 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5024 key.type = BTRFS_INODE_ITEM_KEY;
5027 /* For root being dropped, we don't need to check first inode */
5028 if (btrfs_root_refs(&root->root_item) == 0 &&
5029 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5033 btrfs_init_path(&path);
5035 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5040 err |= INODE_ITEM_MISSING;
5041 error("first inode item of root %llu is missing",
5045 err |= check_inode_item(root, &path, ext_ref);
5050 btrfs_release_path(&path);
5055 * Iterate all item on the tree and call check_inode_item() to check.
5057 * @root: the root of the tree to be checked.
5058 * @ext_ref: the EXTENDED_IREF feature
5060 * Return 0 if no error found.
5061 * Return <0 for error.
5063 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5065 struct btrfs_path path;
5066 struct node_refs nrefs;
5067 struct btrfs_root_item *root_item = &root->root_item;
5073 * We need to manually check the first inode item(256)
5074 * As the following traversal function will only start from
5075 * the first inode item in the leaf, if inode item(256) is missing
5076 * we will just skip it forever.
5078 ret = check_fs_first_inode(root, ext_ref);
5082 memset(&nrefs, 0, sizeof(nrefs));
5083 level = btrfs_header_level(root->node);
5084 btrfs_init_path(&path);
5086 if (btrfs_root_refs(root_item) > 0 ||
5087 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5088 path.nodes[level] = root->node;
5089 path.slots[level] = 0;
5090 extent_buffer_get(root->node);
5092 struct btrfs_key key;
5094 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5095 level = root_item->drop_level;
5096 path.lowest_level = level;
5097 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5104 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5107 /* if ret is negative, walk shall stop */
5113 ret = walk_up_tree_v2(root, &path, &level);
5115 /* Normal exit, reset ret to err */
5122 btrfs_release_path(&path);
5127 * Find the relative ref for root_ref and root_backref.
5129 * @root: the root of the root tree.
5130 * @ref_key: the key of the root ref.
5132 * Return 0 if no error occurred.
5134 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5135 struct extent_buffer *node, int slot)
5137 struct btrfs_path path;
5138 struct btrfs_key key;
5139 struct btrfs_root_ref *ref;
5140 struct btrfs_root_ref *backref;
5141 char ref_name[BTRFS_NAME_LEN] = {0};
5142 char backref_name[BTRFS_NAME_LEN] = {0};
5148 u32 backref_namelen;
5153 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5154 ref_dirid = btrfs_root_ref_dirid(node, ref);
5155 ref_seq = btrfs_root_ref_sequence(node, ref);
5156 ref_namelen = btrfs_root_ref_name_len(node, ref);
5158 if (ref_namelen <= BTRFS_NAME_LEN) {
5161 len = BTRFS_NAME_LEN;
5162 warning("%s[%llu %llu] ref_name too long",
5163 ref_key->type == BTRFS_ROOT_REF_KEY ?
5164 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5167 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5169 /* Find relative root_ref */
5170 key.objectid = ref_key->offset;
5171 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5172 key.offset = ref_key->objectid;
5174 btrfs_init_path(&path);
5175 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5177 err |= ROOT_REF_MISSING;
5178 error("%s[%llu %llu] couldn't find relative ref",
5179 ref_key->type == BTRFS_ROOT_REF_KEY ?
5180 "ROOT_REF" : "ROOT_BACKREF",
5181 ref_key->objectid, ref_key->offset);
5185 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5186 struct btrfs_root_ref);
5187 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5188 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5189 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5191 if (backref_namelen <= BTRFS_NAME_LEN) {
5192 len = backref_namelen;
5194 len = BTRFS_NAME_LEN;
5195 warning("%s[%llu %llu] ref_name too long",
5196 key.type == BTRFS_ROOT_REF_KEY ?
5197 "ROOT_REF" : "ROOT_BACKREF",
5198 key.objectid, key.offset);
5200 read_extent_buffer(path.nodes[0], backref_name,
5201 (unsigned long)(backref + 1), len);
5203 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5204 ref_namelen != backref_namelen ||
5205 strncmp(ref_name, backref_name, len)) {
5206 err |= ROOT_REF_MISMATCH;
5207 error("%s[%llu %llu] mismatch relative ref",
5208 ref_key->type == BTRFS_ROOT_REF_KEY ?
5209 "ROOT_REF" : "ROOT_BACKREF",
5210 ref_key->objectid, ref_key->offset);
5213 btrfs_release_path(&path);
5218 * Check all fs/file tree in low_memory mode.
5220 * 1. for fs tree root item, call check_fs_root_v2()
5221 * 2. for fs tree root ref/backref, call check_root_ref()
5223 * Return 0 if no error occurred.
5225 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5227 struct btrfs_root *tree_root = fs_info->tree_root;
5228 struct btrfs_root *cur_root = NULL;
5229 struct btrfs_path path;
5230 struct btrfs_key key;
5231 struct extent_buffer *node;
5232 unsigned int ext_ref;
5237 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5239 btrfs_init_path(&path);
5240 key.objectid = BTRFS_FS_TREE_OBJECTID;
5242 key.type = BTRFS_ROOT_ITEM_KEY;
5244 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5248 } else if (ret > 0) {
5254 node = path.nodes[0];
5255 slot = path.slots[0];
5256 btrfs_item_key_to_cpu(node, &key, slot);
5257 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5259 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5260 fs_root_objectid(key.objectid)) {
5261 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5262 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5265 key.offset = (u64)-1;
5266 cur_root = btrfs_read_fs_root(fs_info, &key);
5269 if (IS_ERR(cur_root)) {
5270 error("Fail to read fs/subvol tree: %lld",
5276 ret = check_fs_root_v2(cur_root, ext_ref);
5279 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5280 btrfs_free_fs_root(cur_root);
5281 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5282 key.type == BTRFS_ROOT_BACKREF_KEY) {
5283 ret = check_root_ref(tree_root, &key, node, slot);
5287 ret = btrfs_next_item(tree_root, &path);
5297 btrfs_release_path(&path);
5301 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5303 struct list_head *cur = rec->backrefs.next;
5304 struct extent_backref *back;
5305 struct tree_backref *tback;
5306 struct data_backref *dback;
5310 while(cur != &rec->backrefs) {
5311 back = to_extent_backref(cur);
5313 if (!back->found_extent_tree) {
5317 if (back->is_data) {
5318 dback = to_data_backref(back);
5319 fprintf(stderr, "Backref %llu %s %llu"
5320 " owner %llu offset %llu num_refs %lu"
5321 " not found in extent tree\n",
5322 (unsigned long long)rec->start,
5323 back->full_backref ?
5325 back->full_backref ?
5326 (unsigned long long)dback->parent:
5327 (unsigned long long)dback->root,
5328 (unsigned long long)dback->owner,
5329 (unsigned long long)dback->offset,
5330 (unsigned long)dback->num_refs);
5332 tback = to_tree_backref(back);
5333 fprintf(stderr, "Backref %llu parent %llu"
5334 " root %llu not found in extent tree\n",
5335 (unsigned long long)rec->start,
5336 (unsigned long long)tback->parent,
5337 (unsigned long long)tback->root);
5340 if (!back->is_data && !back->found_ref) {
5344 tback = to_tree_backref(back);
5345 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5346 (unsigned long long)rec->start,
5347 back->full_backref ? "parent" : "root",
5348 back->full_backref ?
5349 (unsigned long long)tback->parent :
5350 (unsigned long long)tback->root, back);
5352 if (back->is_data) {
5353 dback = to_data_backref(back);
5354 if (dback->found_ref != dback->num_refs) {
5358 fprintf(stderr, "Incorrect local backref count"
5359 " on %llu %s %llu owner %llu"
5360 " offset %llu found %u wanted %u back %p\n",
5361 (unsigned long long)rec->start,
5362 back->full_backref ?
5364 back->full_backref ?
5365 (unsigned long long)dback->parent:
5366 (unsigned long long)dback->root,
5367 (unsigned long long)dback->owner,
5368 (unsigned long long)dback->offset,
5369 dback->found_ref, dback->num_refs, back);
5371 if (dback->disk_bytenr != rec->start) {
5375 fprintf(stderr, "Backref disk bytenr does not"
5376 " match extent record, bytenr=%llu, "
5377 "ref bytenr=%llu\n",
5378 (unsigned long long)rec->start,
5379 (unsigned long long)dback->disk_bytenr);
5382 if (dback->bytes != rec->nr) {
5386 fprintf(stderr, "Backref bytes do not match "
5387 "extent backref, bytenr=%llu, ref "
5388 "bytes=%llu, backref bytes=%llu\n",
5389 (unsigned long long)rec->start,
5390 (unsigned long long)rec->nr,
5391 (unsigned long long)dback->bytes);
5394 if (!back->is_data) {
5397 dback = to_data_backref(back);
5398 found += dback->found_ref;
5401 if (found != rec->refs) {
5405 fprintf(stderr, "Incorrect global backref count "
5406 "on %llu found %llu wanted %llu\n",
5407 (unsigned long long)rec->start,
5408 (unsigned long long)found,
5409 (unsigned long long)rec->refs);
5415 static int free_all_extent_backrefs(struct extent_record *rec)
5417 struct extent_backref *back;
5418 struct list_head *cur;
5419 while (!list_empty(&rec->backrefs)) {
5420 cur = rec->backrefs.next;
5421 back = to_extent_backref(cur);
5428 static void free_extent_record_cache(struct cache_tree *extent_cache)
5430 struct cache_extent *cache;
5431 struct extent_record *rec;
5434 cache = first_cache_extent(extent_cache);
5437 rec = container_of(cache, struct extent_record, cache);
5438 remove_cache_extent(extent_cache, cache);
5439 free_all_extent_backrefs(rec);
5444 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5445 struct extent_record *rec)
5447 if (rec->content_checked && rec->owner_ref_checked &&
5448 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5449 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5450 !rec->bad_full_backref && !rec->crossing_stripes &&
5451 !rec->wrong_chunk_type) {
5452 remove_cache_extent(extent_cache, &rec->cache);
5453 free_all_extent_backrefs(rec);
5454 list_del_init(&rec->list);
5460 static int check_owner_ref(struct btrfs_root *root,
5461 struct extent_record *rec,
5462 struct extent_buffer *buf)
5464 struct extent_backref *node;
5465 struct tree_backref *back;
5466 struct btrfs_root *ref_root;
5467 struct btrfs_key key;
5468 struct btrfs_path path;
5469 struct extent_buffer *parent;
5474 list_for_each_entry(node, &rec->backrefs, list) {
5477 if (!node->found_ref)
5479 if (node->full_backref)
5481 back = to_tree_backref(node);
5482 if (btrfs_header_owner(buf) == back->root)
5485 BUG_ON(rec->is_root);
5487 /* try to find the block by search corresponding fs tree */
5488 key.objectid = btrfs_header_owner(buf);
5489 key.type = BTRFS_ROOT_ITEM_KEY;
5490 key.offset = (u64)-1;
5492 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5493 if (IS_ERR(ref_root))
5496 level = btrfs_header_level(buf);
5498 btrfs_item_key_to_cpu(buf, &key, 0);
5500 btrfs_node_key_to_cpu(buf, &key, 0);
5502 btrfs_init_path(&path);
5503 path.lowest_level = level + 1;
5504 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5508 parent = path.nodes[level + 1];
5509 if (parent && buf->start == btrfs_node_blockptr(parent,
5510 path.slots[level + 1]))
5513 btrfs_release_path(&path);
5514 return found ? 0 : 1;
5517 static int is_extent_tree_record(struct extent_record *rec)
5519 struct list_head *cur = rec->backrefs.next;
5520 struct extent_backref *node;
5521 struct tree_backref *back;
5524 while(cur != &rec->backrefs) {
5525 node = to_extent_backref(cur);
5529 back = to_tree_backref(node);
5530 if (node->full_backref)
5532 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5539 static int record_bad_block_io(struct btrfs_fs_info *info,
5540 struct cache_tree *extent_cache,
5543 struct extent_record *rec;
5544 struct cache_extent *cache;
5545 struct btrfs_key key;
5547 cache = lookup_cache_extent(extent_cache, start, len);
5551 rec = container_of(cache, struct extent_record, cache);
5552 if (!is_extent_tree_record(rec))
5555 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5556 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5559 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5560 struct extent_buffer *buf, int slot)
5562 if (btrfs_header_level(buf)) {
5563 struct btrfs_key_ptr ptr1, ptr2;
5565 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5566 sizeof(struct btrfs_key_ptr));
5567 read_extent_buffer(buf, &ptr2,
5568 btrfs_node_key_ptr_offset(slot + 1),
5569 sizeof(struct btrfs_key_ptr));
5570 write_extent_buffer(buf, &ptr1,
5571 btrfs_node_key_ptr_offset(slot + 1),
5572 sizeof(struct btrfs_key_ptr));
5573 write_extent_buffer(buf, &ptr2,
5574 btrfs_node_key_ptr_offset(slot),
5575 sizeof(struct btrfs_key_ptr));
5577 struct btrfs_disk_key key;
5578 btrfs_node_key(buf, &key, 0);
5579 btrfs_fixup_low_keys(root, path, &key,
5580 btrfs_header_level(buf) + 1);
5583 struct btrfs_item *item1, *item2;
5584 struct btrfs_key k1, k2;
5585 char *item1_data, *item2_data;
5586 u32 item1_offset, item2_offset, item1_size, item2_size;
5588 item1 = btrfs_item_nr(slot);
5589 item2 = btrfs_item_nr(slot + 1);
5590 btrfs_item_key_to_cpu(buf, &k1, slot);
5591 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5592 item1_offset = btrfs_item_offset(buf, item1);
5593 item2_offset = btrfs_item_offset(buf, item2);
5594 item1_size = btrfs_item_size(buf, item1);
5595 item2_size = btrfs_item_size(buf, item2);
5597 item1_data = malloc(item1_size);
5600 item2_data = malloc(item2_size);
5606 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5607 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5609 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5610 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5614 btrfs_set_item_offset(buf, item1, item2_offset);
5615 btrfs_set_item_offset(buf, item2, item1_offset);
5616 btrfs_set_item_size(buf, item1, item2_size);
5617 btrfs_set_item_size(buf, item2, item1_size);
5619 path->slots[0] = slot;
5620 btrfs_set_item_key_unsafe(root, path, &k2);
5621 path->slots[0] = slot + 1;
5622 btrfs_set_item_key_unsafe(root, path, &k1);
5627 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5629 struct extent_buffer *buf;
5630 struct btrfs_key k1, k2;
5632 int level = path->lowest_level;
5635 buf = path->nodes[level];
5636 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5638 btrfs_node_key_to_cpu(buf, &k1, i);
5639 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5641 btrfs_item_key_to_cpu(buf, &k1, i);
5642 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5644 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5646 ret = swap_values(root, path, buf, i);
5649 btrfs_mark_buffer_dirty(buf);
5655 static int delete_bogus_item(struct btrfs_root *root,
5656 struct btrfs_path *path,
5657 struct extent_buffer *buf, int slot)
5659 struct btrfs_key key;
5660 int nritems = btrfs_header_nritems(buf);
5662 btrfs_item_key_to_cpu(buf, &key, slot);
5664 /* These are all the keys we can deal with missing. */
5665 if (key.type != BTRFS_DIR_INDEX_KEY &&
5666 key.type != BTRFS_EXTENT_ITEM_KEY &&
5667 key.type != BTRFS_METADATA_ITEM_KEY &&
5668 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5669 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5672 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5673 (unsigned long long)key.objectid, key.type,
5674 (unsigned long long)key.offset, slot, buf->start);
5675 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5676 btrfs_item_nr_offset(slot + 1),
5677 sizeof(struct btrfs_item) *
5678 (nritems - slot - 1));
5679 btrfs_set_header_nritems(buf, nritems - 1);
5681 struct btrfs_disk_key disk_key;
5683 btrfs_item_key(buf, &disk_key, 0);
5684 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5686 btrfs_mark_buffer_dirty(buf);
5690 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5692 struct extent_buffer *buf;
5696 /* We should only get this for leaves */
5697 BUG_ON(path->lowest_level);
5698 buf = path->nodes[0];
5700 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5701 unsigned int shift = 0, offset;
5703 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5704 BTRFS_LEAF_DATA_SIZE(root)) {
5705 if (btrfs_item_end_nr(buf, i) >
5706 BTRFS_LEAF_DATA_SIZE(root)) {
5707 ret = delete_bogus_item(root, path, buf, i);
5710 fprintf(stderr, "item is off the end of the "
5711 "leaf, can't fix\n");
5715 shift = BTRFS_LEAF_DATA_SIZE(root) -
5716 btrfs_item_end_nr(buf, i);
5717 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5718 btrfs_item_offset_nr(buf, i - 1)) {
5719 if (btrfs_item_end_nr(buf, i) >
5720 btrfs_item_offset_nr(buf, i - 1)) {
5721 ret = delete_bogus_item(root, path, buf, i);
5724 fprintf(stderr, "items overlap, can't fix\n");
5728 shift = btrfs_item_offset_nr(buf, i - 1) -
5729 btrfs_item_end_nr(buf, i);
5734 printf("Shifting item nr %d by %u bytes in block %llu\n",
5735 i, shift, (unsigned long long)buf->start);
5736 offset = btrfs_item_offset_nr(buf, i);
5737 memmove_extent_buffer(buf,
5738 btrfs_leaf_data(buf) + offset + shift,
5739 btrfs_leaf_data(buf) + offset,
5740 btrfs_item_size_nr(buf, i));
5741 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5743 btrfs_mark_buffer_dirty(buf);
5747 * We may have moved things, in which case we want to exit so we don't
5748 * write those changes out. Once we have proper abort functionality in
5749 * progs this can be changed to something nicer.
5756 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5757 * then just return -EIO.
5759 static int try_to_fix_bad_block(struct btrfs_root *root,
5760 struct extent_buffer *buf,
5761 enum btrfs_tree_block_status status)
5763 struct btrfs_trans_handle *trans;
5764 struct ulist *roots;
5765 struct ulist_node *node;
5766 struct btrfs_root *search_root;
5767 struct btrfs_path path;
5768 struct ulist_iterator iter;
5769 struct btrfs_key root_key, key;
5772 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5773 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5776 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5780 btrfs_init_path(&path);
5781 ULIST_ITER_INIT(&iter);
5782 while ((node = ulist_next(roots, &iter))) {
5783 root_key.objectid = node->val;
5784 root_key.type = BTRFS_ROOT_ITEM_KEY;
5785 root_key.offset = (u64)-1;
5787 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5794 trans = btrfs_start_transaction(search_root, 0);
5795 if (IS_ERR(trans)) {
5796 ret = PTR_ERR(trans);
5800 path.lowest_level = btrfs_header_level(buf);
5801 path.skip_check_block = 1;
5802 if (path.lowest_level)
5803 btrfs_node_key_to_cpu(buf, &key, 0);
5805 btrfs_item_key_to_cpu(buf, &key, 0);
5806 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5809 btrfs_commit_transaction(trans, search_root);
5812 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5813 ret = fix_key_order(search_root, &path);
5814 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5815 ret = fix_item_offset(search_root, &path);
5817 btrfs_commit_transaction(trans, search_root);
5820 btrfs_release_path(&path);
5821 btrfs_commit_transaction(trans, search_root);
5824 btrfs_release_path(&path);
5828 static int check_block(struct btrfs_root *root,
5829 struct cache_tree *extent_cache,
5830 struct extent_buffer *buf, u64 flags)
5832 struct extent_record *rec;
5833 struct cache_extent *cache;
5834 struct btrfs_key key;
5835 enum btrfs_tree_block_status status;
5839 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5842 rec = container_of(cache, struct extent_record, cache);
5843 rec->generation = btrfs_header_generation(buf);
5845 level = btrfs_header_level(buf);
5846 if (btrfs_header_nritems(buf) > 0) {
5849 btrfs_item_key_to_cpu(buf, &key, 0);
5851 btrfs_node_key_to_cpu(buf, &key, 0);
5853 rec->info_objectid = key.objectid;
5855 rec->info_level = level;
5857 if (btrfs_is_leaf(buf))
5858 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5860 status = btrfs_check_node(root, &rec->parent_key, buf);
5862 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5864 status = try_to_fix_bad_block(root, buf, status);
5865 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5867 fprintf(stderr, "bad block %llu\n",
5868 (unsigned long long)buf->start);
5871 * Signal to callers we need to start the scan over
5872 * again since we'll have cowed blocks.
5877 rec->content_checked = 1;
5878 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5879 rec->owner_ref_checked = 1;
5881 ret = check_owner_ref(root, rec, buf);
5883 rec->owner_ref_checked = 1;
5887 maybe_free_extent_rec(extent_cache, rec);
5891 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5892 u64 parent, u64 root)
5894 struct list_head *cur = rec->backrefs.next;
5895 struct extent_backref *node;
5896 struct tree_backref *back;
5898 while(cur != &rec->backrefs) {
5899 node = to_extent_backref(cur);
5903 back = to_tree_backref(node);
5905 if (!node->full_backref)
5907 if (parent == back->parent)
5910 if (node->full_backref)
5912 if (back->root == root)
5919 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5920 u64 parent, u64 root)
5922 struct tree_backref *ref = malloc(sizeof(*ref));
5926 memset(&ref->node, 0, sizeof(ref->node));
5928 ref->parent = parent;
5929 ref->node.full_backref = 1;
5932 ref->node.full_backref = 0;
5934 list_add_tail(&ref->node.list, &rec->backrefs);
5939 static struct data_backref *find_data_backref(struct extent_record *rec,
5940 u64 parent, u64 root,
5941 u64 owner, u64 offset,
5943 u64 disk_bytenr, u64 bytes)
5945 struct list_head *cur = rec->backrefs.next;
5946 struct extent_backref *node;
5947 struct data_backref *back;
5949 while(cur != &rec->backrefs) {
5950 node = to_extent_backref(cur);
5954 back = to_data_backref(node);
5956 if (!node->full_backref)
5958 if (parent == back->parent)
5961 if (node->full_backref)
5963 if (back->root == root && back->owner == owner &&
5964 back->offset == offset) {
5965 if (found_ref && node->found_ref &&
5966 (back->bytes != bytes ||
5967 back->disk_bytenr != disk_bytenr))
5976 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5977 u64 parent, u64 root,
5978 u64 owner, u64 offset,
5981 struct data_backref *ref = malloc(sizeof(*ref));
5985 memset(&ref->node, 0, sizeof(ref->node));
5986 ref->node.is_data = 1;
5989 ref->parent = parent;
5992 ref->node.full_backref = 1;
5996 ref->offset = offset;
5997 ref->node.full_backref = 0;
5999 ref->bytes = max_size;
6002 list_add_tail(&ref->node.list, &rec->backrefs);
6003 if (max_size > rec->max_size)
6004 rec->max_size = max_size;
6008 /* Check if the type of extent matches with its chunk */
6009 static void check_extent_type(struct extent_record *rec)
6011 struct btrfs_block_group_cache *bg_cache;
6013 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6017 /* data extent, check chunk directly*/
6018 if (!rec->metadata) {
6019 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6020 rec->wrong_chunk_type = 1;
6024 /* metadata extent, check the obvious case first */
6025 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6026 BTRFS_BLOCK_GROUP_METADATA))) {
6027 rec->wrong_chunk_type = 1;
6032 * Check SYSTEM extent, as it's also marked as metadata, we can only
6033 * make sure it's a SYSTEM extent by its backref
6035 if (!list_empty(&rec->backrefs)) {
6036 struct extent_backref *node;
6037 struct tree_backref *tback;
6040 node = to_extent_backref(rec->backrefs.next);
6041 if (node->is_data) {
6042 /* tree block shouldn't have data backref */
6043 rec->wrong_chunk_type = 1;
6046 tback = container_of(node, struct tree_backref, node);
6048 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6049 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6051 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6052 if (!(bg_cache->flags & bg_type))
6053 rec->wrong_chunk_type = 1;
6058 * Allocate a new extent record, fill default values from @tmpl and insert int
6059 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6060 * the cache, otherwise it fails.
6062 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6063 struct extent_record *tmpl)
6065 struct extent_record *rec;
6068 BUG_ON(tmpl->max_size == 0);
6069 rec = malloc(sizeof(*rec));
6072 rec->start = tmpl->start;
6073 rec->max_size = tmpl->max_size;
6074 rec->nr = max(tmpl->nr, tmpl->max_size);
6075 rec->found_rec = tmpl->found_rec;
6076 rec->content_checked = tmpl->content_checked;
6077 rec->owner_ref_checked = tmpl->owner_ref_checked;
6078 rec->num_duplicates = 0;
6079 rec->metadata = tmpl->metadata;
6080 rec->flag_block_full_backref = FLAG_UNSET;
6081 rec->bad_full_backref = 0;
6082 rec->crossing_stripes = 0;
6083 rec->wrong_chunk_type = 0;
6084 rec->is_root = tmpl->is_root;
6085 rec->refs = tmpl->refs;
6086 rec->extent_item_refs = tmpl->extent_item_refs;
6087 rec->parent_generation = tmpl->parent_generation;
6088 INIT_LIST_HEAD(&rec->backrefs);
6089 INIT_LIST_HEAD(&rec->dups);
6090 INIT_LIST_HEAD(&rec->list);
6091 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6092 rec->cache.start = tmpl->start;
6093 rec->cache.size = tmpl->nr;
6094 ret = insert_cache_extent(extent_cache, &rec->cache);
6099 bytes_used += rec->nr;
6102 rec->crossing_stripes = check_crossing_stripes(global_info,
6103 rec->start, global_info->tree_root->nodesize);
6104 check_extent_type(rec);
6109 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6111 * - refs - if found, increase refs
6112 * - is_root - if found, set
6113 * - content_checked - if found, set
6114 * - owner_ref_checked - if found, set
6116 * If not found, create a new one, initialize and insert.
6118 static int add_extent_rec(struct cache_tree *extent_cache,
6119 struct extent_record *tmpl)
6121 struct extent_record *rec;
6122 struct cache_extent *cache;
6126 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6128 rec = container_of(cache, struct extent_record, cache);
6132 rec->nr = max(tmpl->nr, tmpl->max_size);
6135 * We need to make sure to reset nr to whatever the extent
6136 * record says was the real size, this way we can compare it to
6139 if (tmpl->found_rec) {
6140 if (tmpl->start != rec->start || rec->found_rec) {
6141 struct extent_record *tmp;
6144 if (list_empty(&rec->list))
6145 list_add_tail(&rec->list,
6146 &duplicate_extents);
6149 * We have to do this song and dance in case we
6150 * find an extent record that falls inside of
6151 * our current extent record but does not have
6152 * the same objectid.
6154 tmp = malloc(sizeof(*tmp));
6157 tmp->start = tmpl->start;
6158 tmp->max_size = tmpl->max_size;
6161 tmp->metadata = tmpl->metadata;
6162 tmp->extent_item_refs = tmpl->extent_item_refs;
6163 INIT_LIST_HEAD(&tmp->list);
6164 list_add_tail(&tmp->list, &rec->dups);
6165 rec->num_duplicates++;
6172 if (tmpl->extent_item_refs && !dup) {
6173 if (rec->extent_item_refs) {
6174 fprintf(stderr, "block %llu rec "
6175 "extent_item_refs %llu, passed %llu\n",
6176 (unsigned long long)tmpl->start,
6177 (unsigned long long)
6178 rec->extent_item_refs,
6179 (unsigned long long)tmpl->extent_item_refs);
6181 rec->extent_item_refs = tmpl->extent_item_refs;
6185 if (tmpl->content_checked)
6186 rec->content_checked = 1;
6187 if (tmpl->owner_ref_checked)
6188 rec->owner_ref_checked = 1;
6189 memcpy(&rec->parent_key, &tmpl->parent_key,
6190 sizeof(tmpl->parent_key));
6191 if (tmpl->parent_generation)
6192 rec->parent_generation = tmpl->parent_generation;
6193 if (rec->max_size < tmpl->max_size)
6194 rec->max_size = tmpl->max_size;
6197 * A metadata extent can't cross stripe_len boundary, otherwise
6198 * kernel scrub won't be able to handle it.
6199 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6203 rec->crossing_stripes = check_crossing_stripes(
6204 global_info, rec->start,
6205 global_info->tree_root->nodesize);
6206 check_extent_type(rec);
6207 maybe_free_extent_rec(extent_cache, rec);
6211 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6216 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6217 u64 parent, u64 root, int found_ref)
6219 struct extent_record *rec;
6220 struct tree_backref *back;
6221 struct cache_extent *cache;
6224 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6226 struct extent_record tmpl;
6228 memset(&tmpl, 0, sizeof(tmpl));
6229 tmpl.start = bytenr;
6234 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6238 /* really a bug in cache_extent implement now */
6239 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6244 rec = container_of(cache, struct extent_record, cache);
6245 if (rec->start != bytenr) {
6247 * Several cause, from unaligned bytenr to over lapping extents
6252 back = find_tree_backref(rec, parent, root);
6254 back = alloc_tree_backref(rec, parent, root);
6260 if (back->node.found_ref) {
6261 fprintf(stderr, "Extent back ref already exists "
6262 "for %llu parent %llu root %llu \n",
6263 (unsigned long long)bytenr,
6264 (unsigned long long)parent,
6265 (unsigned long long)root);
6267 back->node.found_ref = 1;
6269 if (back->node.found_extent_tree) {
6270 fprintf(stderr, "Extent back ref already exists "
6271 "for %llu parent %llu root %llu \n",
6272 (unsigned long long)bytenr,
6273 (unsigned long long)parent,
6274 (unsigned long long)root);
6276 back->node.found_extent_tree = 1;
6278 check_extent_type(rec);
6279 maybe_free_extent_rec(extent_cache, rec);
6283 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6284 u64 parent, u64 root, u64 owner, u64 offset,
6285 u32 num_refs, int found_ref, u64 max_size)
6287 struct extent_record *rec;
6288 struct data_backref *back;
6289 struct cache_extent *cache;
6292 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6294 struct extent_record tmpl;
6296 memset(&tmpl, 0, sizeof(tmpl));
6297 tmpl.start = bytenr;
6299 tmpl.max_size = max_size;
6301 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6305 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6310 rec = container_of(cache, struct extent_record, cache);
6311 if (rec->max_size < max_size)
6312 rec->max_size = max_size;
6315 * If found_ref is set then max_size is the real size and must match the
6316 * existing refs. So if we have already found a ref then we need to
6317 * make sure that this ref matches the existing one, otherwise we need
6318 * to add a new backref so we can notice that the backrefs don't match
6319 * and we need to figure out who is telling the truth. This is to
6320 * account for that awful fsync bug I introduced where we'd end up with
6321 * a btrfs_file_extent_item that would have its length include multiple
6322 * prealloc extents or point inside of a prealloc extent.
6324 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6327 back = alloc_data_backref(rec, parent, root, owner, offset,
6333 BUG_ON(num_refs != 1);
6334 if (back->node.found_ref)
6335 BUG_ON(back->bytes != max_size);
6336 back->node.found_ref = 1;
6337 back->found_ref += 1;
6338 back->bytes = max_size;
6339 back->disk_bytenr = bytenr;
6341 rec->content_checked = 1;
6342 rec->owner_ref_checked = 1;
6344 if (back->node.found_extent_tree) {
6345 fprintf(stderr, "Extent back ref already exists "
6346 "for %llu parent %llu root %llu "
6347 "owner %llu offset %llu num_refs %lu\n",
6348 (unsigned long long)bytenr,
6349 (unsigned long long)parent,
6350 (unsigned long long)root,
6351 (unsigned long long)owner,
6352 (unsigned long long)offset,
6353 (unsigned long)num_refs);
6355 back->num_refs = num_refs;
6356 back->node.found_extent_tree = 1;
6358 maybe_free_extent_rec(extent_cache, rec);
6362 static int add_pending(struct cache_tree *pending,
6363 struct cache_tree *seen, u64 bytenr, u32 size)
6366 ret = add_cache_extent(seen, bytenr, size);
6369 add_cache_extent(pending, bytenr, size);
6373 static int pick_next_pending(struct cache_tree *pending,
6374 struct cache_tree *reada,
6375 struct cache_tree *nodes,
6376 u64 last, struct block_info *bits, int bits_nr,
6379 unsigned long node_start = last;
6380 struct cache_extent *cache;
6383 cache = search_cache_extent(reada, 0);
6385 bits[0].start = cache->start;
6386 bits[0].size = cache->size;
6391 if (node_start > 32768)
6392 node_start -= 32768;
6394 cache = search_cache_extent(nodes, node_start);
6396 cache = search_cache_extent(nodes, 0);
6399 cache = search_cache_extent(pending, 0);
6404 bits[ret].start = cache->start;
6405 bits[ret].size = cache->size;
6406 cache = next_cache_extent(cache);
6408 } while (cache && ret < bits_nr);
6414 bits[ret].start = cache->start;
6415 bits[ret].size = cache->size;
6416 cache = next_cache_extent(cache);
6418 } while (cache && ret < bits_nr);
6420 if (bits_nr - ret > 8) {
6421 u64 lookup = bits[0].start + bits[0].size;
6422 struct cache_extent *next;
6423 next = search_cache_extent(pending, lookup);
6425 if (next->start - lookup > 32768)
6427 bits[ret].start = next->start;
6428 bits[ret].size = next->size;
6429 lookup = next->start + next->size;
6433 next = next_cache_extent(next);
6441 static void free_chunk_record(struct cache_extent *cache)
6443 struct chunk_record *rec;
6445 rec = container_of(cache, struct chunk_record, cache);
6446 list_del_init(&rec->list);
6447 list_del_init(&rec->dextents);
6451 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6453 cache_tree_free_extents(chunk_cache, free_chunk_record);
6456 static void free_device_record(struct rb_node *node)
6458 struct device_record *rec;
6460 rec = container_of(node, struct device_record, node);
6464 FREE_RB_BASED_TREE(device_cache, free_device_record);
6466 int insert_block_group_record(struct block_group_tree *tree,
6467 struct block_group_record *bg_rec)
6471 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6475 list_add_tail(&bg_rec->list, &tree->block_groups);
6479 static void free_block_group_record(struct cache_extent *cache)
6481 struct block_group_record *rec;
6483 rec = container_of(cache, struct block_group_record, cache);
6484 list_del_init(&rec->list);
6488 void free_block_group_tree(struct block_group_tree *tree)
6490 cache_tree_free_extents(&tree->tree, free_block_group_record);
6493 int insert_device_extent_record(struct device_extent_tree *tree,
6494 struct device_extent_record *de_rec)
6499 * Device extent is a bit different from the other extents, because
6500 * the extents which belong to the different devices may have the
6501 * same start and size, so we need use the special extent cache
6502 * search/insert functions.
6504 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6508 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6509 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6513 static void free_device_extent_record(struct cache_extent *cache)
6515 struct device_extent_record *rec;
6517 rec = container_of(cache, struct device_extent_record, cache);
6518 if (!list_empty(&rec->chunk_list))
6519 list_del_init(&rec->chunk_list);
6520 if (!list_empty(&rec->device_list))
6521 list_del_init(&rec->device_list);
6525 void free_device_extent_tree(struct device_extent_tree *tree)
6527 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6530 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6531 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6532 struct extent_buffer *leaf, int slot)
6534 struct btrfs_extent_ref_v0 *ref0;
6535 struct btrfs_key key;
6538 btrfs_item_key_to_cpu(leaf, &key, slot);
6539 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6540 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6541 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6544 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6545 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6551 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6552 struct btrfs_key *key,
6555 struct btrfs_chunk *ptr;
6556 struct chunk_record *rec;
6559 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6560 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6562 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6564 fprintf(stderr, "memory allocation failed\n");
6568 INIT_LIST_HEAD(&rec->list);
6569 INIT_LIST_HEAD(&rec->dextents);
6572 rec->cache.start = key->offset;
6573 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6575 rec->generation = btrfs_header_generation(leaf);
6577 rec->objectid = key->objectid;
6578 rec->type = key->type;
6579 rec->offset = key->offset;
6581 rec->length = rec->cache.size;
6582 rec->owner = btrfs_chunk_owner(leaf, ptr);
6583 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6584 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6585 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6586 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6587 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6588 rec->num_stripes = num_stripes;
6589 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6591 for (i = 0; i < rec->num_stripes; ++i) {
6592 rec->stripes[i].devid =
6593 btrfs_stripe_devid_nr(leaf, ptr, i);
6594 rec->stripes[i].offset =
6595 btrfs_stripe_offset_nr(leaf, ptr, i);
6596 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6597 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6604 static int process_chunk_item(struct cache_tree *chunk_cache,
6605 struct btrfs_key *key, struct extent_buffer *eb,
6608 struct chunk_record *rec;
6609 struct btrfs_chunk *chunk;
6612 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6614 * Do extra check for this chunk item,
6616 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6617 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6618 * and owner<->key_type check.
6620 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6623 error("chunk(%llu, %llu) is not valid, ignore it",
6624 key->offset, btrfs_chunk_length(eb, chunk));
6627 rec = btrfs_new_chunk_record(eb, key, slot);
6628 ret = insert_cache_extent(chunk_cache, &rec->cache);
6630 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6631 rec->offset, rec->length);
6638 static int process_device_item(struct rb_root *dev_cache,
6639 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6641 struct btrfs_dev_item *ptr;
6642 struct device_record *rec;
6645 ptr = btrfs_item_ptr(eb,
6646 slot, struct btrfs_dev_item);
6648 rec = malloc(sizeof(*rec));
6650 fprintf(stderr, "memory allocation failed\n");
6654 rec->devid = key->offset;
6655 rec->generation = btrfs_header_generation(eb);
6657 rec->objectid = key->objectid;
6658 rec->type = key->type;
6659 rec->offset = key->offset;
6661 rec->devid = btrfs_device_id(eb, ptr);
6662 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6663 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6665 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6667 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6674 struct block_group_record *
6675 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6678 struct btrfs_block_group_item *ptr;
6679 struct block_group_record *rec;
6681 rec = calloc(1, sizeof(*rec));
6683 fprintf(stderr, "memory allocation failed\n");
6687 rec->cache.start = key->objectid;
6688 rec->cache.size = key->offset;
6690 rec->generation = btrfs_header_generation(leaf);
6692 rec->objectid = key->objectid;
6693 rec->type = key->type;
6694 rec->offset = key->offset;
6696 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6697 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6699 INIT_LIST_HEAD(&rec->list);
6704 static int process_block_group_item(struct block_group_tree *block_group_cache,
6705 struct btrfs_key *key,
6706 struct extent_buffer *eb, int slot)
6708 struct block_group_record *rec;
6711 rec = btrfs_new_block_group_record(eb, key, slot);
6712 ret = insert_block_group_record(block_group_cache, rec);
6714 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6715 rec->objectid, rec->offset);
6722 struct device_extent_record *
6723 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6724 struct btrfs_key *key, int slot)
6726 struct device_extent_record *rec;
6727 struct btrfs_dev_extent *ptr;
6729 rec = calloc(1, sizeof(*rec));
6731 fprintf(stderr, "memory allocation failed\n");
6735 rec->cache.objectid = key->objectid;
6736 rec->cache.start = key->offset;
6738 rec->generation = btrfs_header_generation(leaf);
6740 rec->objectid = key->objectid;
6741 rec->type = key->type;
6742 rec->offset = key->offset;
6744 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6745 rec->chunk_objecteid =
6746 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6748 btrfs_dev_extent_chunk_offset(leaf, ptr);
6749 rec->length = btrfs_dev_extent_length(leaf, ptr);
6750 rec->cache.size = rec->length;
6752 INIT_LIST_HEAD(&rec->chunk_list);
6753 INIT_LIST_HEAD(&rec->device_list);
6759 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6760 struct btrfs_key *key, struct extent_buffer *eb,
6763 struct device_extent_record *rec;
6766 rec = btrfs_new_device_extent_record(eb, key, slot);
6767 ret = insert_device_extent_record(dev_extent_cache, rec);
6770 "Device extent[%llu, %llu, %llu] existed.\n",
6771 rec->objectid, rec->offset, rec->length);
6778 static int process_extent_item(struct btrfs_root *root,
6779 struct cache_tree *extent_cache,
6780 struct extent_buffer *eb, int slot)
6782 struct btrfs_extent_item *ei;
6783 struct btrfs_extent_inline_ref *iref;
6784 struct btrfs_extent_data_ref *dref;
6785 struct btrfs_shared_data_ref *sref;
6786 struct btrfs_key key;
6787 struct extent_record tmpl;
6792 u32 item_size = btrfs_item_size_nr(eb, slot);
6798 btrfs_item_key_to_cpu(eb, &key, slot);
6800 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6802 num_bytes = root->nodesize;
6804 num_bytes = key.offset;
6807 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6808 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6809 key.objectid, root->sectorsize);
6812 if (item_size < sizeof(*ei)) {
6813 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6814 struct btrfs_extent_item_v0 *ei0;
6815 BUG_ON(item_size != sizeof(*ei0));
6816 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6817 refs = btrfs_extent_refs_v0(eb, ei0);
6821 memset(&tmpl, 0, sizeof(tmpl));
6822 tmpl.start = key.objectid;
6823 tmpl.nr = num_bytes;
6824 tmpl.extent_item_refs = refs;
6825 tmpl.metadata = metadata;
6827 tmpl.max_size = num_bytes;
6829 return add_extent_rec(extent_cache, &tmpl);
6832 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6833 refs = btrfs_extent_refs(eb, ei);
6834 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6838 if (metadata && num_bytes != root->nodesize) {
6839 error("ignore invalid metadata extent, length %llu does not equal to %u",
6840 num_bytes, root->nodesize);
6843 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6844 error("ignore invalid data extent, length %llu is not aligned to %u",
6845 num_bytes, root->sectorsize);
6849 memset(&tmpl, 0, sizeof(tmpl));
6850 tmpl.start = key.objectid;
6851 tmpl.nr = num_bytes;
6852 tmpl.extent_item_refs = refs;
6853 tmpl.metadata = metadata;
6855 tmpl.max_size = num_bytes;
6856 add_extent_rec(extent_cache, &tmpl);
6858 ptr = (unsigned long)(ei + 1);
6859 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6860 key.type == BTRFS_EXTENT_ITEM_KEY)
6861 ptr += sizeof(struct btrfs_tree_block_info);
6863 end = (unsigned long)ei + item_size;
6865 iref = (struct btrfs_extent_inline_ref *)ptr;
6866 type = btrfs_extent_inline_ref_type(eb, iref);
6867 offset = btrfs_extent_inline_ref_offset(eb, iref);
6869 case BTRFS_TREE_BLOCK_REF_KEY:
6870 ret = add_tree_backref(extent_cache, key.objectid,
6874 "add_tree_backref failed (extent items tree block): %s",
6877 case BTRFS_SHARED_BLOCK_REF_KEY:
6878 ret = add_tree_backref(extent_cache, key.objectid,
6882 "add_tree_backref failed (extent items shared block): %s",
6885 case BTRFS_EXTENT_DATA_REF_KEY:
6886 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6887 add_data_backref(extent_cache, key.objectid, 0,
6888 btrfs_extent_data_ref_root(eb, dref),
6889 btrfs_extent_data_ref_objectid(eb,
6891 btrfs_extent_data_ref_offset(eb, dref),
6892 btrfs_extent_data_ref_count(eb, dref),
6895 case BTRFS_SHARED_DATA_REF_KEY:
6896 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6897 add_data_backref(extent_cache, key.objectid, offset,
6899 btrfs_shared_data_ref_count(eb, sref),
6903 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6904 key.objectid, key.type, num_bytes);
6907 ptr += btrfs_extent_inline_ref_size(type);
6914 static int check_cache_range(struct btrfs_root *root,
6915 struct btrfs_block_group_cache *cache,
6916 u64 offset, u64 bytes)
6918 struct btrfs_free_space *entry;
6924 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6925 bytenr = btrfs_sb_offset(i);
6926 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6927 cache->key.objectid, bytenr, 0,
6928 &logical, &nr, &stripe_len);
6933 if (logical[nr] + stripe_len <= offset)
6935 if (offset + bytes <= logical[nr])
6937 if (logical[nr] == offset) {
6938 if (stripe_len >= bytes) {
6942 bytes -= stripe_len;
6943 offset += stripe_len;
6944 } else if (logical[nr] < offset) {
6945 if (logical[nr] + stripe_len >=
6950 bytes = (offset + bytes) -
6951 (logical[nr] + stripe_len);
6952 offset = logical[nr] + stripe_len;
6955 * Could be tricky, the super may land in the
6956 * middle of the area we're checking. First
6957 * check the easiest case, it's at the end.
6959 if (logical[nr] + stripe_len >=
6961 bytes = logical[nr] - offset;
6965 /* Check the left side */
6966 ret = check_cache_range(root, cache,
6968 logical[nr] - offset);
6974 /* Now we continue with the right side */
6975 bytes = (offset + bytes) -
6976 (logical[nr] + stripe_len);
6977 offset = logical[nr] + stripe_len;
6984 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6986 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6987 offset, offset+bytes);
6991 if (entry->offset != offset) {
6992 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6997 if (entry->bytes != bytes) {
6998 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6999 bytes, entry->bytes, offset);
7003 unlink_free_space(cache->free_space_ctl, entry);
7008 static int verify_space_cache(struct btrfs_root *root,
7009 struct btrfs_block_group_cache *cache)
7011 struct btrfs_path path;
7012 struct extent_buffer *leaf;
7013 struct btrfs_key key;
7017 root = root->fs_info->extent_root;
7019 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7021 btrfs_init_path(&path);
7022 key.objectid = last;
7024 key.type = BTRFS_EXTENT_ITEM_KEY;
7025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7030 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7031 ret = btrfs_next_leaf(root, &path);
7039 leaf = path.nodes[0];
7040 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7041 if (key.objectid >= cache->key.offset + cache->key.objectid)
7043 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7044 key.type != BTRFS_METADATA_ITEM_KEY) {
7049 if (last == key.objectid) {
7050 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7051 last = key.objectid + key.offset;
7053 last = key.objectid + root->nodesize;
7058 ret = check_cache_range(root, cache, last,
7059 key.objectid - last);
7062 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7063 last = key.objectid + key.offset;
7065 last = key.objectid + root->nodesize;
7069 if (last < cache->key.objectid + cache->key.offset)
7070 ret = check_cache_range(root, cache, last,
7071 cache->key.objectid +
7072 cache->key.offset - last);
7075 btrfs_release_path(&path);
7078 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7079 fprintf(stderr, "There are still entries left in the space "
7087 static int check_space_cache(struct btrfs_root *root)
7089 struct btrfs_block_group_cache *cache;
7090 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7094 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7095 btrfs_super_generation(root->fs_info->super_copy) !=
7096 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7097 printf("cache and super generation don't match, space cache "
7098 "will be invalidated\n");
7102 if (ctx.progress_enabled) {
7103 ctx.tp = TASK_FREE_SPACE;
7104 task_start(ctx.info);
7108 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7112 start = cache->key.objectid + cache->key.offset;
7113 if (!cache->free_space_ctl) {
7114 if (btrfs_init_free_space_ctl(cache,
7115 root->sectorsize)) {
7120 btrfs_remove_free_space_cache(cache);
7123 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7124 ret = exclude_super_stripes(root, cache);
7126 fprintf(stderr, "could not exclude super stripes: %s\n",
7131 ret = load_free_space_tree(root->fs_info, cache);
7132 free_excluded_extents(root, cache);
7134 fprintf(stderr, "could not load free space tree: %s\n",
7141 ret = load_free_space_cache(root->fs_info, cache);
7146 ret = verify_space_cache(root, cache);
7148 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7149 cache->key.objectid);
7154 task_stop(ctx.info);
7156 return error ? -EINVAL : 0;
7159 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7160 u64 num_bytes, unsigned long leaf_offset,
7161 struct extent_buffer *eb) {
7164 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7166 unsigned long csum_offset;
7170 u64 data_checked = 0;
7176 if (num_bytes % root->sectorsize)
7179 data = malloc(num_bytes);
7183 while (offset < num_bytes) {
7186 read_len = num_bytes - offset;
7187 /* read as much space once a time */
7188 ret = read_extent_data(root, data + offset,
7189 bytenr + offset, &read_len, mirror);
7193 /* verify every 4k data's checksum */
7194 while (data_checked < read_len) {
7196 tmp = offset + data_checked;
7198 csum = btrfs_csum_data((char *)data + tmp,
7199 csum, root->sectorsize);
7200 btrfs_csum_final(csum, (u8 *)&csum);
7202 csum_offset = leaf_offset +
7203 tmp / root->sectorsize * csum_size;
7204 read_extent_buffer(eb, (char *)&csum_expected,
7205 csum_offset, csum_size);
7206 /* try another mirror */
7207 if (csum != csum_expected) {
7208 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7209 mirror, bytenr + tmp,
7210 csum, csum_expected);
7211 num_copies = btrfs_num_copies(
7212 &root->fs_info->mapping_tree,
7214 if (mirror < num_copies - 1) {
7219 data_checked += root->sectorsize;
7228 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7231 struct btrfs_path path;
7232 struct extent_buffer *leaf;
7233 struct btrfs_key key;
7236 btrfs_init_path(&path);
7237 key.objectid = bytenr;
7238 key.type = BTRFS_EXTENT_ITEM_KEY;
7239 key.offset = (u64)-1;
7242 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7245 fprintf(stderr, "Error looking up extent record %d\n", ret);
7246 btrfs_release_path(&path);
7249 if (path.slots[0] > 0) {
7252 ret = btrfs_prev_leaf(root, &path);
7255 } else if (ret > 0) {
7262 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7265 * Block group items come before extent items if they have the same
7266 * bytenr, so walk back one more just in case. Dear future traveller,
7267 * first congrats on mastering time travel. Now if it's not too much
7268 * trouble could you go back to 2006 and tell Chris to make the
7269 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7270 * EXTENT_ITEM_KEY please?
7272 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7273 if (path.slots[0] > 0) {
7276 ret = btrfs_prev_leaf(root, &path);
7279 } else if (ret > 0) {
7284 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7288 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7289 ret = btrfs_next_leaf(root, &path);
7291 fprintf(stderr, "Error going to next leaf "
7293 btrfs_release_path(&path);
7299 leaf = path.nodes[0];
7300 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7301 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7305 if (key.objectid + key.offset < bytenr) {
7309 if (key.objectid > bytenr + num_bytes)
7312 if (key.objectid == bytenr) {
7313 if (key.offset >= num_bytes) {
7317 num_bytes -= key.offset;
7318 bytenr += key.offset;
7319 } else if (key.objectid < bytenr) {
7320 if (key.objectid + key.offset >= bytenr + num_bytes) {
7324 num_bytes = (bytenr + num_bytes) -
7325 (key.objectid + key.offset);
7326 bytenr = key.objectid + key.offset;
7328 if (key.objectid + key.offset < bytenr + num_bytes) {
7329 u64 new_start = key.objectid + key.offset;
7330 u64 new_bytes = bytenr + num_bytes - new_start;
7333 * Weird case, the extent is in the middle of
7334 * our range, we'll have to search one side
7335 * and then the other. Not sure if this happens
7336 * in real life, but no harm in coding it up
7337 * anyway just in case.
7339 btrfs_release_path(&path);
7340 ret = check_extent_exists(root, new_start,
7343 fprintf(stderr, "Right section didn't "
7347 num_bytes = key.objectid - bytenr;
7350 num_bytes = key.objectid - bytenr;
7357 if (num_bytes && !ret) {
7358 fprintf(stderr, "There are no extents for csum range "
7359 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7363 btrfs_release_path(&path);
7367 static int check_csums(struct btrfs_root *root)
7369 struct btrfs_path path;
7370 struct extent_buffer *leaf;
7371 struct btrfs_key key;
7372 u64 offset = 0, num_bytes = 0;
7373 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7377 unsigned long leaf_offset;
7379 root = root->fs_info->csum_root;
7380 if (!extent_buffer_uptodate(root->node)) {
7381 fprintf(stderr, "No valid csum tree found\n");
7385 btrfs_init_path(&path);
7386 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7387 key.type = BTRFS_EXTENT_CSUM_KEY;
7389 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7391 fprintf(stderr, "Error searching csum tree %d\n", ret);
7392 btrfs_release_path(&path);
7396 if (ret > 0 && path.slots[0])
7401 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7402 ret = btrfs_next_leaf(root, &path);
7404 fprintf(stderr, "Error going to next leaf "
7411 leaf = path.nodes[0];
7413 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7414 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7419 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7420 csum_size) * root->sectorsize;
7421 if (!check_data_csum)
7422 goto skip_csum_check;
7423 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7424 ret = check_extent_csums(root, key.offset, data_len,
7430 offset = key.offset;
7431 } else if (key.offset != offset + num_bytes) {
7432 ret = check_extent_exists(root, offset, num_bytes);
7434 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7435 "there is no extent record\n",
7436 offset, offset+num_bytes);
7439 offset = key.offset;
7442 num_bytes += data_len;
7446 btrfs_release_path(&path);
7450 static int is_dropped_key(struct btrfs_key *key,
7451 struct btrfs_key *drop_key) {
7452 if (key->objectid < drop_key->objectid)
7454 else if (key->objectid == drop_key->objectid) {
7455 if (key->type < drop_key->type)
7457 else if (key->type == drop_key->type) {
7458 if (key->offset < drop_key->offset)
7466 * Here are the rules for FULL_BACKREF.
7468 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7469 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7471 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7472 * if it happened after the relocation occurred since we'll have dropped the
7473 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7474 * have no real way to know for sure.
7476 * We process the blocks one root at a time, and we start from the lowest root
7477 * objectid and go to the highest. So we can just lookup the owner backref for
7478 * the record and if we don't find it then we know it doesn't exist and we have
7481 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7482 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7483 * be set or not and then we can check later once we've gathered all the refs.
7485 static int calc_extent_flag(struct cache_tree *extent_cache,
7486 struct extent_buffer *buf,
7487 struct root_item_record *ri,
7490 struct extent_record *rec;
7491 struct cache_extent *cache;
7492 struct tree_backref *tback;
7495 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7496 /* we have added this extent before */
7500 rec = container_of(cache, struct extent_record, cache);
7503 * Except file/reloc tree, we can not have
7506 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7511 if (buf->start == ri->bytenr)
7514 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7517 owner = btrfs_header_owner(buf);
7518 if (owner == ri->objectid)
7521 tback = find_tree_backref(rec, 0, owner);
7526 if (rec->flag_block_full_backref != FLAG_UNSET &&
7527 rec->flag_block_full_backref != 0)
7528 rec->bad_full_backref = 1;
7531 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7532 if (rec->flag_block_full_backref != FLAG_UNSET &&
7533 rec->flag_block_full_backref != 1)
7534 rec->bad_full_backref = 1;
7538 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7540 fprintf(stderr, "Invalid key type(");
7541 print_key_type(stderr, 0, key_type);
7542 fprintf(stderr, ") found in root(");
7543 print_objectid(stderr, rootid, 0);
7544 fprintf(stderr, ")\n");
7548 * Check if the key is valid with its extent buffer.
7550 * This is a early check in case invalid key exists in a extent buffer
7551 * This is not comprehensive yet, but should prevent wrong key/item passed
7554 static int check_type_with_root(u64 rootid, u8 key_type)
7557 /* Only valid in chunk tree */
7558 case BTRFS_DEV_ITEM_KEY:
7559 case BTRFS_CHUNK_ITEM_KEY:
7560 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7563 /* valid in csum and log tree */
7564 case BTRFS_CSUM_TREE_OBJECTID:
7565 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7569 case BTRFS_EXTENT_ITEM_KEY:
7570 case BTRFS_METADATA_ITEM_KEY:
7571 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7572 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7575 case BTRFS_ROOT_ITEM_KEY:
7576 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7579 case BTRFS_DEV_EXTENT_KEY:
7580 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7586 report_mismatch_key_root(key_type, rootid);
7590 static int run_next_block(struct btrfs_root *root,
7591 struct block_info *bits,
7594 struct cache_tree *pending,
7595 struct cache_tree *seen,
7596 struct cache_tree *reada,
7597 struct cache_tree *nodes,
7598 struct cache_tree *extent_cache,
7599 struct cache_tree *chunk_cache,
7600 struct rb_root *dev_cache,
7601 struct block_group_tree *block_group_cache,
7602 struct device_extent_tree *dev_extent_cache,
7603 struct root_item_record *ri)
7605 struct extent_buffer *buf;
7606 struct extent_record *rec = NULL;
7617 struct btrfs_key key;
7618 struct cache_extent *cache;
7621 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7622 bits_nr, &reada_bits);
7627 for(i = 0; i < nritems; i++) {
7628 ret = add_cache_extent(reada, bits[i].start,
7633 /* fixme, get the parent transid */
7634 readahead_tree_block(root, bits[i].start,
7638 *last = bits[0].start;
7639 bytenr = bits[0].start;
7640 size = bits[0].size;
7642 cache = lookup_cache_extent(pending, bytenr, size);
7644 remove_cache_extent(pending, cache);
7647 cache = lookup_cache_extent(reada, bytenr, size);
7649 remove_cache_extent(reada, cache);
7652 cache = lookup_cache_extent(nodes, bytenr, size);
7654 remove_cache_extent(nodes, cache);
7657 cache = lookup_cache_extent(extent_cache, bytenr, size);
7659 rec = container_of(cache, struct extent_record, cache);
7660 gen = rec->parent_generation;
7663 /* fixme, get the real parent transid */
7664 buf = read_tree_block(root, bytenr, size, gen);
7665 if (!extent_buffer_uptodate(buf)) {
7666 record_bad_block_io(root->fs_info,
7667 extent_cache, bytenr, size);
7671 nritems = btrfs_header_nritems(buf);
7674 if (!init_extent_tree) {
7675 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7676 btrfs_header_level(buf), 1, NULL,
7679 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7681 fprintf(stderr, "Couldn't calc extent flags\n");
7682 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7687 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7689 fprintf(stderr, "Couldn't calc extent flags\n");
7690 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7694 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7696 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7697 ri->objectid == btrfs_header_owner(buf)) {
7699 * Ok we got to this block from it's original owner and
7700 * we have FULL_BACKREF set. Relocation can leave
7701 * converted blocks over so this is altogether possible,
7702 * however it's not possible if the generation > the
7703 * last snapshot, so check for this case.
7705 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7706 btrfs_header_generation(buf) > ri->last_snapshot) {
7707 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7708 rec->bad_full_backref = 1;
7713 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7714 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7715 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7716 rec->bad_full_backref = 1;
7720 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7721 rec->flag_block_full_backref = 1;
7725 rec->flag_block_full_backref = 0;
7727 owner = btrfs_header_owner(buf);
7730 ret = check_block(root, extent_cache, buf, flags);
7734 if (btrfs_is_leaf(buf)) {
7735 btree_space_waste += btrfs_leaf_free_space(root, buf);
7736 for (i = 0; i < nritems; i++) {
7737 struct btrfs_file_extent_item *fi;
7738 btrfs_item_key_to_cpu(buf, &key, i);
7740 * Check key type against the leaf owner.
7741 * Could filter quite a lot of early error if
7744 if (check_type_with_root(btrfs_header_owner(buf),
7746 fprintf(stderr, "ignoring invalid key\n");
7749 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7750 process_extent_item(root, extent_cache, buf,
7754 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7755 process_extent_item(root, extent_cache, buf,
7759 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7761 btrfs_item_size_nr(buf, i);
7764 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7765 process_chunk_item(chunk_cache, &key, buf, i);
7768 if (key.type == BTRFS_DEV_ITEM_KEY) {
7769 process_device_item(dev_cache, &key, buf, i);
7772 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7773 process_block_group_item(block_group_cache,
7777 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7778 process_device_extent_item(dev_extent_cache,
7783 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7784 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7785 process_extent_ref_v0(extent_cache, buf, i);
7792 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7793 ret = add_tree_backref(extent_cache,
7794 key.objectid, 0, key.offset, 0);
7797 "add_tree_backref failed (leaf tree block): %s",
7801 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7802 ret = add_tree_backref(extent_cache,
7803 key.objectid, key.offset, 0, 0);
7806 "add_tree_backref failed (leaf shared block): %s",
7810 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7811 struct btrfs_extent_data_ref *ref;
7812 ref = btrfs_item_ptr(buf, i,
7813 struct btrfs_extent_data_ref);
7814 add_data_backref(extent_cache,
7816 btrfs_extent_data_ref_root(buf, ref),
7817 btrfs_extent_data_ref_objectid(buf,
7819 btrfs_extent_data_ref_offset(buf, ref),
7820 btrfs_extent_data_ref_count(buf, ref),
7821 0, root->sectorsize);
7824 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7825 struct btrfs_shared_data_ref *ref;
7826 ref = btrfs_item_ptr(buf, i,
7827 struct btrfs_shared_data_ref);
7828 add_data_backref(extent_cache,
7829 key.objectid, key.offset, 0, 0, 0,
7830 btrfs_shared_data_ref_count(buf, ref),
7831 0, root->sectorsize);
7834 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7835 struct bad_item *bad;
7837 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7841 bad = malloc(sizeof(struct bad_item));
7844 INIT_LIST_HEAD(&bad->list);
7845 memcpy(&bad->key, &key,
7846 sizeof(struct btrfs_key));
7847 bad->root_id = owner;
7848 list_add_tail(&bad->list, &delete_items);
7851 if (key.type != BTRFS_EXTENT_DATA_KEY)
7853 fi = btrfs_item_ptr(buf, i,
7854 struct btrfs_file_extent_item);
7855 if (btrfs_file_extent_type(buf, fi) ==
7856 BTRFS_FILE_EXTENT_INLINE)
7858 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7861 data_bytes_allocated +=
7862 btrfs_file_extent_disk_num_bytes(buf, fi);
7863 if (data_bytes_allocated < root->sectorsize) {
7866 data_bytes_referenced +=
7867 btrfs_file_extent_num_bytes(buf, fi);
7868 add_data_backref(extent_cache,
7869 btrfs_file_extent_disk_bytenr(buf, fi),
7870 parent, owner, key.objectid, key.offset -
7871 btrfs_file_extent_offset(buf, fi), 1, 1,
7872 btrfs_file_extent_disk_num_bytes(buf, fi));
7876 struct btrfs_key first_key;
7878 first_key.objectid = 0;
7881 btrfs_item_key_to_cpu(buf, &first_key, 0);
7882 level = btrfs_header_level(buf);
7883 for (i = 0; i < nritems; i++) {
7884 struct extent_record tmpl;
7886 ptr = btrfs_node_blockptr(buf, i);
7887 size = root->nodesize;
7888 btrfs_node_key_to_cpu(buf, &key, i);
7890 if ((level == ri->drop_level)
7891 && is_dropped_key(&key, &ri->drop_key)) {
7896 memset(&tmpl, 0, sizeof(tmpl));
7897 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7898 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7903 tmpl.max_size = size;
7904 ret = add_extent_rec(extent_cache, &tmpl);
7908 ret = add_tree_backref(extent_cache, ptr, parent,
7912 "add_tree_backref failed (non-leaf block): %s",
7918 add_pending(nodes, seen, ptr, size);
7920 add_pending(pending, seen, ptr, size);
7923 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7924 nritems) * sizeof(struct btrfs_key_ptr);
7926 total_btree_bytes += buf->len;
7927 if (fs_root_objectid(btrfs_header_owner(buf)))
7928 total_fs_tree_bytes += buf->len;
7929 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7930 total_extent_tree_bytes += buf->len;
7931 if (!found_old_backref &&
7932 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7933 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7934 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7935 found_old_backref = 1;
7937 free_extent_buffer(buf);
7941 static int add_root_to_pending(struct extent_buffer *buf,
7942 struct cache_tree *extent_cache,
7943 struct cache_tree *pending,
7944 struct cache_tree *seen,
7945 struct cache_tree *nodes,
7948 struct extent_record tmpl;
7951 if (btrfs_header_level(buf) > 0)
7952 add_pending(nodes, seen, buf->start, buf->len);
7954 add_pending(pending, seen, buf->start, buf->len);
7956 memset(&tmpl, 0, sizeof(tmpl));
7957 tmpl.start = buf->start;
7962 tmpl.max_size = buf->len;
7963 add_extent_rec(extent_cache, &tmpl);
7965 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7966 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7967 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7970 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7975 /* as we fix the tree, we might be deleting blocks that
7976 * we're tracking for repair. This hook makes sure we
7977 * remove any backrefs for blocks as we are fixing them.
7979 static int free_extent_hook(struct btrfs_trans_handle *trans,
7980 struct btrfs_root *root,
7981 u64 bytenr, u64 num_bytes, u64 parent,
7982 u64 root_objectid, u64 owner, u64 offset,
7985 struct extent_record *rec;
7986 struct cache_extent *cache;
7988 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7990 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7991 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7995 rec = container_of(cache, struct extent_record, cache);
7997 struct data_backref *back;
7998 back = find_data_backref(rec, parent, root_objectid, owner,
7999 offset, 1, bytenr, num_bytes);
8002 if (back->node.found_ref) {
8003 back->found_ref -= refs_to_drop;
8005 rec->refs -= refs_to_drop;
8007 if (back->node.found_extent_tree) {
8008 back->num_refs -= refs_to_drop;
8009 if (rec->extent_item_refs)
8010 rec->extent_item_refs -= refs_to_drop;
8012 if (back->found_ref == 0)
8013 back->node.found_ref = 0;
8014 if (back->num_refs == 0)
8015 back->node.found_extent_tree = 0;
8017 if (!back->node.found_extent_tree && back->node.found_ref) {
8018 list_del(&back->node.list);
8022 struct tree_backref *back;
8023 back = find_tree_backref(rec, parent, root_objectid);
8026 if (back->node.found_ref) {
8029 back->node.found_ref = 0;
8031 if (back->node.found_extent_tree) {
8032 if (rec->extent_item_refs)
8033 rec->extent_item_refs--;
8034 back->node.found_extent_tree = 0;
8036 if (!back->node.found_extent_tree && back->node.found_ref) {
8037 list_del(&back->node.list);
8041 maybe_free_extent_rec(extent_cache, rec);
8046 static int delete_extent_records(struct btrfs_trans_handle *trans,
8047 struct btrfs_root *root,
8048 struct btrfs_path *path,
8051 struct btrfs_key key;
8052 struct btrfs_key found_key;
8053 struct extent_buffer *leaf;
8058 key.objectid = bytenr;
8060 key.offset = (u64)-1;
8063 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8070 if (path->slots[0] == 0)
8076 leaf = path->nodes[0];
8077 slot = path->slots[0];
8079 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8080 if (found_key.objectid != bytenr)
8083 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8084 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8085 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8086 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8087 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8088 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8089 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8090 btrfs_release_path(path);
8091 if (found_key.type == 0) {
8092 if (found_key.offset == 0)
8094 key.offset = found_key.offset - 1;
8095 key.type = found_key.type;
8097 key.type = found_key.type - 1;
8098 key.offset = (u64)-1;
8102 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8103 found_key.objectid, found_key.type, found_key.offset);
8105 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8108 btrfs_release_path(path);
8110 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8111 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8112 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8113 found_key.offset : root->nodesize;
8115 ret = btrfs_update_block_group(trans, root, bytenr,
8122 btrfs_release_path(path);
8127 * for a single backref, this will allocate a new extent
8128 * and add the backref to it.
8130 static int record_extent(struct btrfs_trans_handle *trans,
8131 struct btrfs_fs_info *info,
8132 struct btrfs_path *path,
8133 struct extent_record *rec,
8134 struct extent_backref *back,
8135 int allocated, u64 flags)
8138 struct btrfs_root *extent_root = info->extent_root;
8139 struct extent_buffer *leaf;
8140 struct btrfs_key ins_key;
8141 struct btrfs_extent_item *ei;
8142 struct data_backref *dback;
8143 struct btrfs_tree_block_info *bi;
8146 rec->max_size = max_t(u64, rec->max_size,
8147 info->extent_root->nodesize);
8150 u32 item_size = sizeof(*ei);
8153 item_size += sizeof(*bi);
8155 ins_key.objectid = rec->start;
8156 ins_key.offset = rec->max_size;
8157 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8159 ret = btrfs_insert_empty_item(trans, extent_root, path,
8160 &ins_key, item_size);
8164 leaf = path->nodes[0];
8165 ei = btrfs_item_ptr(leaf, path->slots[0],
8166 struct btrfs_extent_item);
8168 btrfs_set_extent_refs(leaf, ei, 0);
8169 btrfs_set_extent_generation(leaf, ei, rec->generation);
8171 if (back->is_data) {
8172 btrfs_set_extent_flags(leaf, ei,
8173 BTRFS_EXTENT_FLAG_DATA);
8175 struct btrfs_disk_key copy_key;;
8177 bi = (struct btrfs_tree_block_info *)(ei + 1);
8178 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8181 btrfs_set_disk_key_objectid(©_key,
8182 rec->info_objectid);
8183 btrfs_set_disk_key_type(©_key, 0);
8184 btrfs_set_disk_key_offset(©_key, 0);
8186 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8187 btrfs_set_tree_block_key(leaf, bi, ©_key);
8189 btrfs_set_extent_flags(leaf, ei,
8190 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8193 btrfs_mark_buffer_dirty(leaf);
8194 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8195 rec->max_size, 1, 0);
8198 btrfs_release_path(path);
8201 if (back->is_data) {
8205 dback = to_data_backref(back);
8206 if (back->full_backref)
8207 parent = dback->parent;
8211 for (i = 0; i < dback->found_ref; i++) {
8212 /* if parent != 0, we're doing a full backref
8213 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8214 * just makes the backref allocator create a data
8217 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8218 rec->start, rec->max_size,
8222 BTRFS_FIRST_FREE_OBJECTID :
8228 fprintf(stderr, "adding new data backref"
8229 " on %llu %s %llu owner %llu"
8230 " offset %llu found %d\n",
8231 (unsigned long long)rec->start,
8232 back->full_backref ?
8234 back->full_backref ?
8235 (unsigned long long)parent :
8236 (unsigned long long)dback->root,
8237 (unsigned long long)dback->owner,
8238 (unsigned long long)dback->offset,
8242 struct tree_backref *tback;
8244 tback = to_tree_backref(back);
8245 if (back->full_backref)
8246 parent = tback->parent;
8250 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8251 rec->start, rec->max_size,
8252 parent, tback->root, 0, 0);
8253 fprintf(stderr, "adding new tree backref on "
8254 "start %llu len %llu parent %llu root %llu\n",
8255 rec->start, rec->max_size, parent, tback->root);
8258 btrfs_release_path(path);
8262 static struct extent_entry *find_entry(struct list_head *entries,
8263 u64 bytenr, u64 bytes)
8265 struct extent_entry *entry = NULL;
8267 list_for_each_entry(entry, entries, list) {
8268 if (entry->bytenr == bytenr && entry->bytes == bytes)
8275 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8277 struct extent_entry *entry, *best = NULL, *prev = NULL;
8279 list_for_each_entry(entry, entries, list) {
8281 * If there are as many broken entries as entries then we know
8282 * not to trust this particular entry.
8284 if (entry->broken == entry->count)
8288 * Special case, when there are only two entries and 'best' is
8298 * If our current entry == best then we can't be sure our best
8299 * is really the best, so we need to keep searching.
8301 if (best && best->count == entry->count) {
8307 /* Prev == entry, not good enough, have to keep searching */
8308 if (!prev->broken && prev->count == entry->count)
8312 best = (prev->count > entry->count) ? prev : entry;
8313 else if (best->count < entry->count)
8321 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8322 struct data_backref *dback, struct extent_entry *entry)
8324 struct btrfs_trans_handle *trans;
8325 struct btrfs_root *root;
8326 struct btrfs_file_extent_item *fi;
8327 struct extent_buffer *leaf;
8328 struct btrfs_key key;
8332 key.objectid = dback->root;
8333 key.type = BTRFS_ROOT_ITEM_KEY;
8334 key.offset = (u64)-1;
8335 root = btrfs_read_fs_root(info, &key);
8337 fprintf(stderr, "Couldn't find root for our ref\n");
8342 * The backref points to the original offset of the extent if it was
8343 * split, so we need to search down to the offset we have and then walk
8344 * forward until we find the backref we're looking for.
8346 key.objectid = dback->owner;
8347 key.type = BTRFS_EXTENT_DATA_KEY;
8348 key.offset = dback->offset;
8349 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8351 fprintf(stderr, "Error looking up ref %d\n", ret);
8356 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8357 ret = btrfs_next_leaf(root, path);
8359 fprintf(stderr, "Couldn't find our ref, next\n");
8363 leaf = path->nodes[0];
8364 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8365 if (key.objectid != dback->owner ||
8366 key.type != BTRFS_EXTENT_DATA_KEY) {
8367 fprintf(stderr, "Couldn't find our ref, search\n");
8370 fi = btrfs_item_ptr(leaf, path->slots[0],
8371 struct btrfs_file_extent_item);
8372 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8373 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8375 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8380 btrfs_release_path(path);
8382 trans = btrfs_start_transaction(root, 1);
8384 return PTR_ERR(trans);
8387 * Ok we have the key of the file extent we want to fix, now we can cow
8388 * down to the thing and fix it.
8390 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8392 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8393 key.objectid, key.type, key.offset, ret);
8397 fprintf(stderr, "Well that's odd, we just found this key "
8398 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8403 leaf = path->nodes[0];
8404 fi = btrfs_item_ptr(leaf, path->slots[0],
8405 struct btrfs_file_extent_item);
8407 if (btrfs_file_extent_compression(leaf, fi) &&
8408 dback->disk_bytenr != entry->bytenr) {
8409 fprintf(stderr, "Ref doesn't match the record start and is "
8410 "compressed, please take a btrfs-image of this file "
8411 "system and send it to a btrfs developer so they can "
8412 "complete this functionality for bytenr %Lu\n",
8413 dback->disk_bytenr);
8418 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8419 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8420 } else if (dback->disk_bytenr > entry->bytenr) {
8421 u64 off_diff, offset;
8423 off_diff = dback->disk_bytenr - entry->bytenr;
8424 offset = btrfs_file_extent_offset(leaf, fi);
8425 if (dback->disk_bytenr + offset +
8426 btrfs_file_extent_num_bytes(leaf, fi) >
8427 entry->bytenr + entry->bytes) {
8428 fprintf(stderr, "Ref is past the entry end, please "
8429 "take a btrfs-image of this file system and "
8430 "send it to a btrfs developer, ref %Lu\n",
8431 dback->disk_bytenr);
8436 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8437 btrfs_set_file_extent_offset(leaf, fi, offset);
8438 } else if (dback->disk_bytenr < entry->bytenr) {
8441 offset = btrfs_file_extent_offset(leaf, fi);
8442 if (dback->disk_bytenr + offset < entry->bytenr) {
8443 fprintf(stderr, "Ref is before the entry start, please"
8444 " take a btrfs-image of this file system and "
8445 "send it to a btrfs developer, ref %Lu\n",
8446 dback->disk_bytenr);
8451 offset += dback->disk_bytenr;
8452 offset -= entry->bytenr;
8453 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8454 btrfs_set_file_extent_offset(leaf, fi, offset);
8457 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8460 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8461 * only do this if we aren't using compression, otherwise it's a
8464 if (!btrfs_file_extent_compression(leaf, fi))
8465 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8467 printf("ram bytes may be wrong?\n");
8468 btrfs_mark_buffer_dirty(leaf);
8470 err = btrfs_commit_transaction(trans, root);
8471 btrfs_release_path(path);
8472 return ret ? ret : err;
8475 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8476 struct extent_record *rec)
8478 struct extent_backref *back;
8479 struct data_backref *dback;
8480 struct extent_entry *entry, *best = NULL;
8483 int broken_entries = 0;
8488 * Metadata is easy and the backrefs should always agree on bytenr and
8489 * size, if not we've got bigger issues.
8494 list_for_each_entry(back, &rec->backrefs, list) {
8495 if (back->full_backref || !back->is_data)
8498 dback = to_data_backref(back);
8501 * We only pay attention to backrefs that we found a real
8504 if (dback->found_ref == 0)
8508 * For now we only catch when the bytes don't match, not the
8509 * bytenr. We can easily do this at the same time, but I want
8510 * to have a fs image to test on before we just add repair
8511 * functionality willy-nilly so we know we won't screw up the
8515 entry = find_entry(&entries, dback->disk_bytenr,
8518 entry = malloc(sizeof(struct extent_entry));
8523 memset(entry, 0, sizeof(*entry));
8524 entry->bytenr = dback->disk_bytenr;
8525 entry->bytes = dback->bytes;
8526 list_add_tail(&entry->list, &entries);
8531 * If we only have on entry we may think the entries agree when
8532 * in reality they don't so we have to do some extra checking.
8534 if (dback->disk_bytenr != rec->start ||
8535 dback->bytes != rec->nr || back->broken)
8546 /* Yay all the backrefs agree, carry on good sir */
8547 if (nr_entries <= 1 && !mismatch)
8550 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8551 "%Lu\n", rec->start);
8554 * First we want to see if the backrefs can agree amongst themselves who
8555 * is right, so figure out which one of the entries has the highest
8558 best = find_most_right_entry(&entries);
8561 * Ok so we may have an even split between what the backrefs think, so
8562 * this is where we use the extent ref to see what it thinks.
8565 entry = find_entry(&entries, rec->start, rec->nr);
8566 if (!entry && (!broken_entries || !rec->found_rec)) {
8567 fprintf(stderr, "Backrefs don't agree with each other "
8568 "and extent record doesn't agree with anybody,"
8569 " so we can't fix bytenr %Lu bytes %Lu\n",
8570 rec->start, rec->nr);
8573 } else if (!entry) {
8575 * Ok our backrefs were broken, we'll assume this is the
8576 * correct value and add an entry for this range.
8578 entry = malloc(sizeof(struct extent_entry));
8583 memset(entry, 0, sizeof(*entry));
8584 entry->bytenr = rec->start;
8585 entry->bytes = rec->nr;
8586 list_add_tail(&entry->list, &entries);
8590 best = find_most_right_entry(&entries);
8592 fprintf(stderr, "Backrefs and extent record evenly "
8593 "split on who is right, this is going to "
8594 "require user input to fix bytenr %Lu bytes "
8595 "%Lu\n", rec->start, rec->nr);
8602 * I don't think this can happen currently as we'll abort() if we catch
8603 * this case higher up, but in case somebody removes that we still can't
8604 * deal with it properly here yet, so just bail out of that's the case.
8606 if (best->bytenr != rec->start) {
8607 fprintf(stderr, "Extent start and backref starts don't match, "
8608 "please use btrfs-image on this file system and send "
8609 "it to a btrfs developer so they can make fsck fix "
8610 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8611 rec->start, rec->nr);
8617 * Ok great we all agreed on an extent record, let's go find the real
8618 * references and fix up the ones that don't match.
8620 list_for_each_entry(back, &rec->backrefs, list) {
8621 if (back->full_backref || !back->is_data)
8624 dback = to_data_backref(back);
8627 * Still ignoring backrefs that don't have a real ref attached
8630 if (dback->found_ref == 0)
8633 if (dback->bytes == best->bytes &&
8634 dback->disk_bytenr == best->bytenr)
8637 ret = repair_ref(info, path, dback, best);
8643 * Ok we messed with the actual refs, which means we need to drop our
8644 * entire cache and go back and rescan. I know this is a huge pain and
8645 * adds a lot of extra work, but it's the only way to be safe. Once all
8646 * the backrefs agree we may not need to do anything to the extent
8651 while (!list_empty(&entries)) {
8652 entry = list_entry(entries.next, struct extent_entry, list);
8653 list_del_init(&entry->list);
8659 static int process_duplicates(struct cache_tree *extent_cache,
8660 struct extent_record *rec)
8662 struct extent_record *good, *tmp;
8663 struct cache_extent *cache;
8667 * If we found a extent record for this extent then return, or if we
8668 * have more than one duplicate we are likely going to need to delete
8671 if (rec->found_rec || rec->num_duplicates > 1)
8674 /* Shouldn't happen but just in case */
8675 BUG_ON(!rec->num_duplicates);
8678 * So this happens if we end up with a backref that doesn't match the
8679 * actual extent entry. So either the backref is bad or the extent
8680 * entry is bad. Either way we want to have the extent_record actually
8681 * reflect what we found in the extent_tree, so we need to take the
8682 * duplicate out and use that as the extent_record since the only way we
8683 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8685 remove_cache_extent(extent_cache, &rec->cache);
8687 good = to_extent_record(rec->dups.next);
8688 list_del_init(&good->list);
8689 INIT_LIST_HEAD(&good->backrefs);
8690 INIT_LIST_HEAD(&good->dups);
8691 good->cache.start = good->start;
8692 good->cache.size = good->nr;
8693 good->content_checked = 0;
8694 good->owner_ref_checked = 0;
8695 good->num_duplicates = 0;
8696 good->refs = rec->refs;
8697 list_splice_init(&rec->backrefs, &good->backrefs);
8699 cache = lookup_cache_extent(extent_cache, good->start,
8703 tmp = container_of(cache, struct extent_record, cache);
8706 * If we find another overlapping extent and it's found_rec is
8707 * set then it's a duplicate and we need to try and delete
8710 if (tmp->found_rec || tmp->num_duplicates > 0) {
8711 if (list_empty(&good->list))
8712 list_add_tail(&good->list,
8713 &duplicate_extents);
8714 good->num_duplicates += tmp->num_duplicates + 1;
8715 list_splice_init(&tmp->dups, &good->dups);
8716 list_del_init(&tmp->list);
8717 list_add_tail(&tmp->list, &good->dups);
8718 remove_cache_extent(extent_cache, &tmp->cache);
8723 * Ok we have another non extent item backed extent rec, so lets
8724 * just add it to this extent and carry on like we did above.
8726 good->refs += tmp->refs;
8727 list_splice_init(&tmp->backrefs, &good->backrefs);
8728 remove_cache_extent(extent_cache, &tmp->cache);
8731 ret = insert_cache_extent(extent_cache, &good->cache);
8734 return good->num_duplicates ? 0 : 1;
8737 static int delete_duplicate_records(struct btrfs_root *root,
8738 struct extent_record *rec)
8740 struct btrfs_trans_handle *trans;
8741 LIST_HEAD(delete_list);
8742 struct btrfs_path path;
8743 struct extent_record *tmp, *good, *n;
8746 struct btrfs_key key;
8748 btrfs_init_path(&path);
8751 /* Find the record that covers all of the duplicates. */
8752 list_for_each_entry(tmp, &rec->dups, list) {
8753 if (good->start < tmp->start)
8755 if (good->nr > tmp->nr)
8758 if (tmp->start + tmp->nr < good->start + good->nr) {
8759 fprintf(stderr, "Ok we have overlapping extents that "
8760 "aren't completely covered by each other, this "
8761 "is going to require more careful thought. "
8762 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8763 tmp->start, tmp->nr, good->start, good->nr);
8770 list_add_tail(&rec->list, &delete_list);
8772 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8775 list_move_tail(&tmp->list, &delete_list);
8778 root = root->fs_info->extent_root;
8779 trans = btrfs_start_transaction(root, 1);
8780 if (IS_ERR(trans)) {
8781 ret = PTR_ERR(trans);
8785 list_for_each_entry(tmp, &delete_list, list) {
8786 if (tmp->found_rec == 0)
8788 key.objectid = tmp->start;
8789 key.type = BTRFS_EXTENT_ITEM_KEY;
8790 key.offset = tmp->nr;
8792 /* Shouldn't happen but just in case */
8793 if (tmp->metadata) {
8794 fprintf(stderr, "Well this shouldn't happen, extent "
8795 "record overlaps but is metadata? "
8796 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8800 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8806 ret = btrfs_del_item(trans, root, &path);
8809 btrfs_release_path(&path);
8812 err = btrfs_commit_transaction(trans, root);
8816 while (!list_empty(&delete_list)) {
8817 tmp = to_extent_record(delete_list.next);
8818 list_del_init(&tmp->list);
8824 while (!list_empty(&rec->dups)) {
8825 tmp = to_extent_record(rec->dups.next);
8826 list_del_init(&tmp->list);
8830 btrfs_release_path(&path);
8832 if (!ret && !nr_del)
8833 rec->num_duplicates = 0;
8835 return ret ? ret : nr_del;
8838 static int find_possible_backrefs(struct btrfs_fs_info *info,
8839 struct btrfs_path *path,
8840 struct cache_tree *extent_cache,
8841 struct extent_record *rec)
8843 struct btrfs_root *root;
8844 struct extent_backref *back;
8845 struct data_backref *dback;
8846 struct cache_extent *cache;
8847 struct btrfs_file_extent_item *fi;
8848 struct btrfs_key key;
8852 list_for_each_entry(back, &rec->backrefs, list) {
8853 /* Don't care about full backrefs (poor unloved backrefs) */
8854 if (back->full_backref || !back->is_data)
8857 dback = to_data_backref(back);
8859 /* We found this one, we don't need to do a lookup */
8860 if (dback->found_ref)
8863 key.objectid = dback->root;
8864 key.type = BTRFS_ROOT_ITEM_KEY;
8865 key.offset = (u64)-1;
8867 root = btrfs_read_fs_root(info, &key);
8869 /* No root, definitely a bad ref, skip */
8870 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8872 /* Other err, exit */
8874 return PTR_ERR(root);
8876 key.objectid = dback->owner;
8877 key.type = BTRFS_EXTENT_DATA_KEY;
8878 key.offset = dback->offset;
8879 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8881 btrfs_release_path(path);
8884 /* Didn't find it, we can carry on */
8889 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8890 struct btrfs_file_extent_item);
8891 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8892 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8893 btrfs_release_path(path);
8894 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8896 struct extent_record *tmp;
8897 tmp = container_of(cache, struct extent_record, cache);
8900 * If we found an extent record for the bytenr for this
8901 * particular backref then we can't add it to our
8902 * current extent record. We only want to add backrefs
8903 * that don't have a corresponding extent item in the
8904 * extent tree since they likely belong to this record
8905 * and we need to fix it if it doesn't match bytenrs.
8911 dback->found_ref += 1;
8912 dback->disk_bytenr = bytenr;
8913 dback->bytes = bytes;
8916 * Set this so the verify backref code knows not to trust the
8917 * values in this backref.
8926 * Record orphan data ref into corresponding root.
8928 * Return 0 if the extent item contains data ref and recorded.
8929 * Return 1 if the extent item contains no useful data ref
8930 * On that case, it may contains only shared_dataref or metadata backref
8931 * or the file extent exists(this should be handled by the extent bytenr
8933 * Return <0 if something goes wrong.
8935 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8936 struct extent_record *rec)
8938 struct btrfs_key key;
8939 struct btrfs_root *dest_root;
8940 struct extent_backref *back;
8941 struct data_backref *dback;
8942 struct orphan_data_extent *orphan;
8943 struct btrfs_path path;
8944 int recorded_data_ref = 0;
8949 btrfs_init_path(&path);
8950 list_for_each_entry(back, &rec->backrefs, list) {
8951 if (back->full_backref || !back->is_data ||
8952 !back->found_extent_tree)
8954 dback = to_data_backref(back);
8955 if (dback->found_ref)
8957 key.objectid = dback->root;
8958 key.type = BTRFS_ROOT_ITEM_KEY;
8959 key.offset = (u64)-1;
8961 dest_root = btrfs_read_fs_root(fs_info, &key);
8963 /* For non-exist root we just skip it */
8964 if (IS_ERR(dest_root) || !dest_root)
8967 key.objectid = dback->owner;
8968 key.type = BTRFS_EXTENT_DATA_KEY;
8969 key.offset = dback->offset;
8971 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8972 btrfs_release_path(&path);
8974 * For ret < 0, it's OK since the fs-tree may be corrupted,
8975 * we need to record it for inode/file extent rebuild.
8976 * For ret > 0, we record it only for file extent rebuild.
8977 * For ret == 0, the file extent exists but only bytenr
8978 * mismatch, let the original bytenr fix routine to handle,
8984 orphan = malloc(sizeof(*orphan));
8989 INIT_LIST_HEAD(&orphan->list);
8990 orphan->root = dback->root;
8991 orphan->objectid = dback->owner;
8992 orphan->offset = dback->offset;
8993 orphan->disk_bytenr = rec->cache.start;
8994 orphan->disk_len = rec->cache.size;
8995 list_add(&dest_root->orphan_data_extents, &orphan->list);
8996 recorded_data_ref = 1;
8999 btrfs_release_path(&path);
9001 return !recorded_data_ref;
9007 * when an incorrect extent item is found, this will delete
9008 * all of the existing entries for it and recreate them
9009 * based on what the tree scan found.
9011 static int fixup_extent_refs(struct btrfs_fs_info *info,
9012 struct cache_tree *extent_cache,
9013 struct extent_record *rec)
9015 struct btrfs_trans_handle *trans = NULL;
9017 struct btrfs_path path;
9018 struct list_head *cur = rec->backrefs.next;
9019 struct cache_extent *cache;
9020 struct extent_backref *back;
9024 if (rec->flag_block_full_backref)
9025 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9027 btrfs_init_path(&path);
9028 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9030 * Sometimes the backrefs themselves are so broken they don't
9031 * get attached to any meaningful rec, so first go back and
9032 * check any of our backrefs that we couldn't find and throw
9033 * them into the list if we find the backref so that
9034 * verify_backrefs can figure out what to do.
9036 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9041 /* step one, make sure all of the backrefs agree */
9042 ret = verify_backrefs(info, &path, rec);
9046 trans = btrfs_start_transaction(info->extent_root, 1);
9047 if (IS_ERR(trans)) {
9048 ret = PTR_ERR(trans);
9052 /* step two, delete all the existing records */
9053 ret = delete_extent_records(trans, info->extent_root, &path,
9059 /* was this block corrupt? If so, don't add references to it */
9060 cache = lookup_cache_extent(info->corrupt_blocks,
9061 rec->start, rec->max_size);
9067 /* step three, recreate all the refs we did find */
9068 while(cur != &rec->backrefs) {
9069 back = to_extent_backref(cur);
9073 * if we didn't find any references, don't create a
9076 if (!back->found_ref)
9079 rec->bad_full_backref = 0;
9080 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9088 int err = btrfs_commit_transaction(trans, info->extent_root);
9094 fprintf(stderr, "Repaired extent references for %llu\n",
9095 (unsigned long long)rec->start);
9097 btrfs_release_path(&path);
9101 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9102 struct extent_record *rec)
9104 struct btrfs_trans_handle *trans;
9105 struct btrfs_root *root = fs_info->extent_root;
9106 struct btrfs_path path;
9107 struct btrfs_extent_item *ei;
9108 struct btrfs_key key;
9112 key.objectid = rec->start;
9113 if (rec->metadata) {
9114 key.type = BTRFS_METADATA_ITEM_KEY;
9115 key.offset = rec->info_level;
9117 key.type = BTRFS_EXTENT_ITEM_KEY;
9118 key.offset = rec->max_size;
9121 trans = btrfs_start_transaction(root, 0);
9123 return PTR_ERR(trans);
9125 btrfs_init_path(&path);
9126 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9128 btrfs_release_path(&path);
9129 btrfs_commit_transaction(trans, root);
9132 fprintf(stderr, "Didn't find extent for %llu\n",
9133 (unsigned long long)rec->start);
9134 btrfs_release_path(&path);
9135 btrfs_commit_transaction(trans, root);
9139 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9140 struct btrfs_extent_item);
9141 flags = btrfs_extent_flags(path.nodes[0], ei);
9142 if (rec->flag_block_full_backref) {
9143 fprintf(stderr, "setting full backref on %llu\n",
9144 (unsigned long long)key.objectid);
9145 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9147 fprintf(stderr, "clearing full backref on %llu\n",
9148 (unsigned long long)key.objectid);
9149 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9151 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9152 btrfs_mark_buffer_dirty(path.nodes[0]);
9153 btrfs_release_path(&path);
9154 ret = btrfs_commit_transaction(trans, root);
9156 fprintf(stderr, "Repaired extent flags for %llu\n",
9157 (unsigned long long)rec->start);
9162 /* right now we only prune from the extent allocation tree */
9163 static int prune_one_block(struct btrfs_trans_handle *trans,
9164 struct btrfs_fs_info *info,
9165 struct btrfs_corrupt_block *corrupt)
9168 struct btrfs_path path;
9169 struct extent_buffer *eb;
9173 int level = corrupt->level + 1;
9175 btrfs_init_path(&path);
9177 /* we want to stop at the parent to our busted block */
9178 path.lowest_level = level;
9180 ret = btrfs_search_slot(trans, info->extent_root,
9181 &corrupt->key, &path, -1, 1);
9186 eb = path.nodes[level];
9193 * hopefully the search gave us the block we want to prune,
9194 * lets try that first
9196 slot = path.slots[level];
9197 found = btrfs_node_blockptr(eb, slot);
9198 if (found == corrupt->cache.start)
9201 nritems = btrfs_header_nritems(eb);
9203 /* the search failed, lets scan this node and hope we find it */
9204 for (slot = 0; slot < nritems; slot++) {
9205 found = btrfs_node_blockptr(eb, slot);
9206 if (found == corrupt->cache.start)
9210 * we couldn't find the bad block. TODO, search all the nodes for pointers
9213 if (eb == info->extent_root->node) {
9218 btrfs_release_path(&path);
9223 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9224 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9227 btrfs_release_path(&path);
9231 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9233 struct btrfs_trans_handle *trans = NULL;
9234 struct cache_extent *cache;
9235 struct btrfs_corrupt_block *corrupt;
9238 cache = search_cache_extent(info->corrupt_blocks, 0);
9242 trans = btrfs_start_transaction(info->extent_root, 1);
9244 return PTR_ERR(trans);
9246 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9247 prune_one_block(trans, info, corrupt);
9248 remove_cache_extent(info->corrupt_blocks, cache);
9251 return btrfs_commit_transaction(trans, info->extent_root);
9255 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9257 struct btrfs_block_group_cache *cache;
9262 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9263 &start, &end, EXTENT_DIRTY);
9266 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9271 cache = btrfs_lookup_first_block_group(fs_info, start);
9276 start = cache->key.objectid + cache->key.offset;
9280 static int check_extent_refs(struct btrfs_root *root,
9281 struct cache_tree *extent_cache)
9283 struct extent_record *rec;
9284 struct cache_extent *cache;
9290 * if we're doing a repair, we have to make sure
9291 * we don't allocate from the problem extents.
9292 * In the worst case, this will be all the
9295 cache = search_cache_extent(extent_cache, 0);
9297 rec = container_of(cache, struct extent_record, cache);
9298 set_extent_dirty(root->fs_info->excluded_extents,
9300 rec->start + rec->max_size - 1);
9301 cache = next_cache_extent(cache);
9304 /* pin down all the corrupted blocks too */
9305 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9307 set_extent_dirty(root->fs_info->excluded_extents,
9309 cache->start + cache->size - 1);
9310 cache = next_cache_extent(cache);
9312 prune_corrupt_blocks(root->fs_info);
9313 reset_cached_block_groups(root->fs_info);
9316 reset_cached_block_groups(root->fs_info);
9319 * We need to delete any duplicate entries we find first otherwise we
9320 * could mess up the extent tree when we have backrefs that actually
9321 * belong to a different extent item and not the weird duplicate one.
9323 while (repair && !list_empty(&duplicate_extents)) {
9324 rec = to_extent_record(duplicate_extents.next);
9325 list_del_init(&rec->list);
9327 /* Sometimes we can find a backref before we find an actual
9328 * extent, so we need to process it a little bit to see if there
9329 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9330 * if this is a backref screwup. If we need to delete stuff
9331 * process_duplicates() will return 0, otherwise it will return
9334 if (process_duplicates(extent_cache, rec))
9336 ret = delete_duplicate_records(root, rec);
9340 * delete_duplicate_records will return the number of entries
9341 * deleted, so if it's greater than 0 then we know we actually
9342 * did something and we need to remove.
9355 cache = search_cache_extent(extent_cache, 0);
9358 rec = container_of(cache, struct extent_record, cache);
9359 if (rec->num_duplicates) {
9360 fprintf(stderr, "extent item %llu has multiple extent "
9361 "items\n", (unsigned long long)rec->start);
9365 if (rec->refs != rec->extent_item_refs) {
9366 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9367 (unsigned long long)rec->start,
9368 (unsigned long long)rec->nr);
9369 fprintf(stderr, "extent item %llu, found %llu\n",
9370 (unsigned long long)rec->extent_item_refs,
9371 (unsigned long long)rec->refs);
9372 ret = record_orphan_data_extents(root->fs_info, rec);
9378 if (all_backpointers_checked(rec, 1)) {
9379 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9380 (unsigned long long)rec->start,
9381 (unsigned long long)rec->nr);
9385 if (!rec->owner_ref_checked) {
9386 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9387 (unsigned long long)rec->start,
9388 (unsigned long long)rec->nr);
9393 if (repair && fix) {
9394 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9400 if (rec->bad_full_backref) {
9401 fprintf(stderr, "bad full backref, on [%llu]\n",
9402 (unsigned long long)rec->start);
9404 ret = fixup_extent_flags(root->fs_info, rec);
9412 * Although it's not a extent ref's problem, we reuse this
9413 * routine for error reporting.
9414 * No repair function yet.
9416 if (rec->crossing_stripes) {
9418 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9419 rec->start, rec->start + rec->max_size);
9423 if (rec->wrong_chunk_type) {
9425 "bad extent [%llu, %llu), type mismatch with chunk\n",
9426 rec->start, rec->start + rec->max_size);
9430 remove_cache_extent(extent_cache, cache);
9431 free_all_extent_backrefs(rec);
9432 if (!init_extent_tree && repair && (!cur_err || fix))
9433 clear_extent_dirty(root->fs_info->excluded_extents,
9435 rec->start + rec->max_size - 1);
9440 if (ret && ret != -EAGAIN) {
9441 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9444 struct btrfs_trans_handle *trans;
9446 root = root->fs_info->extent_root;
9447 trans = btrfs_start_transaction(root, 1);
9448 if (IS_ERR(trans)) {
9449 ret = PTR_ERR(trans);
9453 btrfs_fix_block_accounting(trans, root);
9454 ret = btrfs_commit_transaction(trans, root);
9463 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9467 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9468 stripe_size = length;
9469 stripe_size /= num_stripes;
9470 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9471 stripe_size = length * 2;
9472 stripe_size /= num_stripes;
9473 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9474 stripe_size = length;
9475 stripe_size /= (num_stripes - 1);
9476 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9477 stripe_size = length;
9478 stripe_size /= (num_stripes - 2);
9480 stripe_size = length;
9486 * Check the chunk with its block group/dev list ref:
9487 * Return 0 if all refs seems valid.
9488 * Return 1 if part of refs seems valid, need later check for rebuild ref
9489 * like missing block group and needs to search extent tree to rebuild them.
9490 * Return -1 if essential refs are missing and unable to rebuild.
9492 static int check_chunk_refs(struct chunk_record *chunk_rec,
9493 struct block_group_tree *block_group_cache,
9494 struct device_extent_tree *dev_extent_cache,
9497 struct cache_extent *block_group_item;
9498 struct block_group_record *block_group_rec;
9499 struct cache_extent *dev_extent_item;
9500 struct device_extent_record *dev_extent_rec;
9504 int metadump_v2 = 0;
9508 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9511 if (block_group_item) {
9512 block_group_rec = container_of(block_group_item,
9513 struct block_group_record,
9515 if (chunk_rec->length != block_group_rec->offset ||
9516 chunk_rec->offset != block_group_rec->objectid ||
9518 chunk_rec->type_flags != block_group_rec->flags)) {
9521 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9522 chunk_rec->objectid,
9527 chunk_rec->type_flags,
9528 block_group_rec->objectid,
9529 block_group_rec->type,
9530 block_group_rec->offset,
9531 block_group_rec->offset,
9532 block_group_rec->objectid,
9533 block_group_rec->flags);
9536 list_del_init(&block_group_rec->list);
9537 chunk_rec->bg_rec = block_group_rec;
9542 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9543 chunk_rec->objectid,
9548 chunk_rec->type_flags);
9555 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9556 chunk_rec->num_stripes);
9557 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9558 devid = chunk_rec->stripes[i].devid;
9559 offset = chunk_rec->stripes[i].offset;
9560 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9561 devid, offset, length);
9562 if (dev_extent_item) {
9563 dev_extent_rec = container_of(dev_extent_item,
9564 struct device_extent_record,
9566 if (dev_extent_rec->objectid != devid ||
9567 dev_extent_rec->offset != offset ||
9568 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9569 dev_extent_rec->length != length) {
9572 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9573 chunk_rec->objectid,
9576 chunk_rec->stripes[i].devid,
9577 chunk_rec->stripes[i].offset,
9578 dev_extent_rec->objectid,
9579 dev_extent_rec->offset,
9580 dev_extent_rec->length);
9583 list_move(&dev_extent_rec->chunk_list,
9584 &chunk_rec->dextents);
9589 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9590 chunk_rec->objectid,
9593 chunk_rec->stripes[i].devid,
9594 chunk_rec->stripes[i].offset);
9601 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9602 int check_chunks(struct cache_tree *chunk_cache,
9603 struct block_group_tree *block_group_cache,
9604 struct device_extent_tree *dev_extent_cache,
9605 struct list_head *good, struct list_head *bad,
9606 struct list_head *rebuild, int silent)
9608 struct cache_extent *chunk_item;
9609 struct chunk_record *chunk_rec;
9610 struct block_group_record *bg_rec;
9611 struct device_extent_record *dext_rec;
9615 chunk_item = first_cache_extent(chunk_cache);
9616 while (chunk_item) {
9617 chunk_rec = container_of(chunk_item, struct chunk_record,
9619 err = check_chunk_refs(chunk_rec, block_group_cache,
9620 dev_extent_cache, silent);
9623 if (err == 0 && good)
9624 list_add_tail(&chunk_rec->list, good);
9625 if (err > 0 && rebuild)
9626 list_add_tail(&chunk_rec->list, rebuild);
9628 list_add_tail(&chunk_rec->list, bad);
9629 chunk_item = next_cache_extent(chunk_item);
9632 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9635 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9643 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9647 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9658 static int check_device_used(struct device_record *dev_rec,
9659 struct device_extent_tree *dext_cache)
9661 struct cache_extent *cache;
9662 struct device_extent_record *dev_extent_rec;
9665 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9667 dev_extent_rec = container_of(cache,
9668 struct device_extent_record,
9670 if (dev_extent_rec->objectid != dev_rec->devid)
9673 list_del_init(&dev_extent_rec->device_list);
9674 total_byte += dev_extent_rec->length;
9675 cache = next_cache_extent(cache);
9678 if (total_byte != dev_rec->byte_used) {
9680 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9681 total_byte, dev_rec->byte_used, dev_rec->objectid,
9682 dev_rec->type, dev_rec->offset);
9689 /* check btrfs_dev_item -> btrfs_dev_extent */
9690 static int check_devices(struct rb_root *dev_cache,
9691 struct device_extent_tree *dev_extent_cache)
9693 struct rb_node *dev_node;
9694 struct device_record *dev_rec;
9695 struct device_extent_record *dext_rec;
9699 dev_node = rb_first(dev_cache);
9701 dev_rec = container_of(dev_node, struct device_record, node);
9702 err = check_device_used(dev_rec, dev_extent_cache);
9706 dev_node = rb_next(dev_node);
9708 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9711 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9712 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9719 static int add_root_item_to_list(struct list_head *head,
9720 u64 objectid, u64 bytenr, u64 last_snapshot,
9721 u8 level, u8 drop_level,
9722 int level_size, struct btrfs_key *drop_key)
9725 struct root_item_record *ri_rec;
9726 ri_rec = malloc(sizeof(*ri_rec));
9729 ri_rec->bytenr = bytenr;
9730 ri_rec->objectid = objectid;
9731 ri_rec->level = level;
9732 ri_rec->level_size = level_size;
9733 ri_rec->drop_level = drop_level;
9734 ri_rec->last_snapshot = last_snapshot;
9736 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9737 list_add_tail(&ri_rec->list, head);
9742 static void free_root_item_list(struct list_head *list)
9744 struct root_item_record *ri_rec;
9746 while (!list_empty(list)) {
9747 ri_rec = list_first_entry(list, struct root_item_record,
9749 list_del_init(&ri_rec->list);
9754 static int deal_root_from_list(struct list_head *list,
9755 struct btrfs_root *root,
9756 struct block_info *bits,
9758 struct cache_tree *pending,
9759 struct cache_tree *seen,
9760 struct cache_tree *reada,
9761 struct cache_tree *nodes,
9762 struct cache_tree *extent_cache,
9763 struct cache_tree *chunk_cache,
9764 struct rb_root *dev_cache,
9765 struct block_group_tree *block_group_cache,
9766 struct device_extent_tree *dev_extent_cache)
9771 while (!list_empty(list)) {
9772 struct root_item_record *rec;
9773 struct extent_buffer *buf;
9774 rec = list_entry(list->next,
9775 struct root_item_record, list);
9777 buf = read_tree_block(root->fs_info->tree_root,
9778 rec->bytenr, rec->level_size, 0);
9779 if (!extent_buffer_uptodate(buf)) {
9780 free_extent_buffer(buf);
9784 ret = add_root_to_pending(buf, extent_cache, pending,
9785 seen, nodes, rec->objectid);
9789 * To rebuild extent tree, we need deal with snapshot
9790 * one by one, otherwise we deal with node firstly which
9791 * can maximize readahead.
9794 ret = run_next_block(root, bits, bits_nr, &last,
9795 pending, seen, reada, nodes,
9796 extent_cache, chunk_cache,
9797 dev_cache, block_group_cache,
9798 dev_extent_cache, rec);
9802 free_extent_buffer(buf);
9803 list_del(&rec->list);
9809 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9810 reada, nodes, extent_cache, chunk_cache,
9811 dev_cache, block_group_cache,
9812 dev_extent_cache, NULL);
9822 static int check_chunks_and_extents(struct btrfs_root *root)
9824 struct rb_root dev_cache;
9825 struct cache_tree chunk_cache;
9826 struct block_group_tree block_group_cache;
9827 struct device_extent_tree dev_extent_cache;
9828 struct cache_tree extent_cache;
9829 struct cache_tree seen;
9830 struct cache_tree pending;
9831 struct cache_tree reada;
9832 struct cache_tree nodes;
9833 struct extent_io_tree excluded_extents;
9834 struct cache_tree corrupt_blocks;
9835 struct btrfs_path path;
9836 struct btrfs_key key;
9837 struct btrfs_key found_key;
9839 struct block_info *bits;
9841 struct extent_buffer *leaf;
9843 struct btrfs_root_item ri;
9844 struct list_head dropping_trees;
9845 struct list_head normal_trees;
9846 struct btrfs_root *root1;
9851 dev_cache = RB_ROOT;
9852 cache_tree_init(&chunk_cache);
9853 block_group_tree_init(&block_group_cache);
9854 device_extent_tree_init(&dev_extent_cache);
9856 cache_tree_init(&extent_cache);
9857 cache_tree_init(&seen);
9858 cache_tree_init(&pending);
9859 cache_tree_init(&nodes);
9860 cache_tree_init(&reada);
9861 cache_tree_init(&corrupt_blocks);
9862 extent_io_tree_init(&excluded_extents);
9863 INIT_LIST_HEAD(&dropping_trees);
9864 INIT_LIST_HEAD(&normal_trees);
9867 root->fs_info->excluded_extents = &excluded_extents;
9868 root->fs_info->fsck_extent_cache = &extent_cache;
9869 root->fs_info->free_extent_hook = free_extent_hook;
9870 root->fs_info->corrupt_blocks = &corrupt_blocks;
9874 bits = malloc(bits_nr * sizeof(struct block_info));
9880 if (ctx.progress_enabled) {
9881 ctx.tp = TASK_EXTENTS;
9882 task_start(ctx.info);
9886 root1 = root->fs_info->tree_root;
9887 level = btrfs_header_level(root1->node);
9888 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9889 root1->node->start, 0, level, 0,
9890 root1->nodesize, NULL);
9893 root1 = root->fs_info->chunk_root;
9894 level = btrfs_header_level(root1->node);
9895 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9896 root1->node->start, 0, level, 0,
9897 root1->nodesize, NULL);
9900 btrfs_init_path(&path);
9903 key.type = BTRFS_ROOT_ITEM_KEY;
9904 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9909 leaf = path.nodes[0];
9910 slot = path.slots[0];
9911 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9912 ret = btrfs_next_leaf(root, &path);
9915 leaf = path.nodes[0];
9916 slot = path.slots[0];
9918 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9919 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9920 unsigned long offset;
9923 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9924 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9925 last_snapshot = btrfs_root_last_snapshot(&ri);
9926 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9927 level = btrfs_root_level(&ri);
9928 level_size = root->nodesize;
9929 ret = add_root_item_to_list(&normal_trees,
9931 btrfs_root_bytenr(&ri),
9932 last_snapshot, level,
9933 0, level_size, NULL);
9937 level = btrfs_root_level(&ri);
9938 level_size = root->nodesize;
9939 objectid = found_key.objectid;
9940 btrfs_disk_key_to_cpu(&found_key,
9942 ret = add_root_item_to_list(&dropping_trees,
9944 btrfs_root_bytenr(&ri),
9945 last_snapshot, level,
9947 level_size, &found_key);
9954 btrfs_release_path(&path);
9957 * check_block can return -EAGAIN if it fixes something, please keep
9958 * this in mind when dealing with return values from these functions, if
9959 * we get -EAGAIN we want to fall through and restart the loop.
9961 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9962 &seen, &reada, &nodes, &extent_cache,
9963 &chunk_cache, &dev_cache, &block_group_cache,
9970 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9971 &pending, &seen, &reada, &nodes,
9972 &extent_cache, &chunk_cache, &dev_cache,
9973 &block_group_cache, &dev_extent_cache);
9980 ret = check_chunks(&chunk_cache, &block_group_cache,
9981 &dev_extent_cache, NULL, NULL, NULL, 0);
9988 ret = check_extent_refs(root, &extent_cache);
9995 ret = check_devices(&dev_cache, &dev_extent_cache);
10000 task_stop(ctx.info);
10002 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10003 extent_io_tree_cleanup(&excluded_extents);
10004 root->fs_info->fsck_extent_cache = NULL;
10005 root->fs_info->free_extent_hook = NULL;
10006 root->fs_info->corrupt_blocks = NULL;
10007 root->fs_info->excluded_extents = NULL;
10010 free_chunk_cache_tree(&chunk_cache);
10011 free_device_cache_tree(&dev_cache);
10012 free_block_group_tree(&block_group_cache);
10013 free_device_extent_tree(&dev_extent_cache);
10014 free_extent_cache_tree(&seen);
10015 free_extent_cache_tree(&pending);
10016 free_extent_cache_tree(&reada);
10017 free_extent_cache_tree(&nodes);
10018 free_root_item_list(&normal_trees);
10019 free_root_item_list(&dropping_trees);
10022 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10023 free_extent_cache_tree(&seen);
10024 free_extent_cache_tree(&pending);
10025 free_extent_cache_tree(&reada);
10026 free_extent_cache_tree(&nodes);
10027 free_chunk_cache_tree(&chunk_cache);
10028 free_block_group_tree(&block_group_cache);
10029 free_device_cache_tree(&dev_cache);
10030 free_device_extent_tree(&dev_extent_cache);
10031 free_extent_record_cache(&extent_cache);
10032 free_root_item_list(&normal_trees);
10033 free_root_item_list(&dropping_trees);
10034 extent_io_tree_cleanup(&excluded_extents);
10039 * Check backrefs of a tree block given by @bytenr or @eb.
10041 * @root: the root containing the @bytenr or @eb
10042 * @eb: tree block extent buffer, can be NULL
10043 * @bytenr: bytenr of the tree block to search
10044 * @level: tree level of the tree block
10045 * @owner: owner of the tree block
10047 * Return >0 for any error found and output error message
10048 * Return 0 for no error found
10050 static int check_tree_block_ref(struct btrfs_root *root,
10051 struct extent_buffer *eb, u64 bytenr,
10052 int level, u64 owner)
10054 struct btrfs_key key;
10055 struct btrfs_root *extent_root = root->fs_info->extent_root;
10056 struct btrfs_path path;
10057 struct btrfs_extent_item *ei;
10058 struct btrfs_extent_inline_ref *iref;
10059 struct extent_buffer *leaf;
10065 u32 nodesize = root->nodesize;
10068 int tree_reloc_root = 0;
10073 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10074 btrfs_header_bytenr(root->node) == bytenr)
10075 tree_reloc_root = 1;
10077 btrfs_init_path(&path);
10078 key.objectid = bytenr;
10079 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10080 key.type = BTRFS_METADATA_ITEM_KEY;
10082 key.type = BTRFS_EXTENT_ITEM_KEY;
10083 key.offset = (u64)-1;
10085 /* Search for the backref in extent tree */
10086 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10088 err |= BACKREF_MISSING;
10091 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10093 err |= BACKREF_MISSING;
10097 leaf = path.nodes[0];
10098 slot = path.slots[0];
10099 btrfs_item_key_to_cpu(leaf, &key, slot);
10101 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10103 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10104 skinny_level = (int)key.offset;
10105 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10107 struct btrfs_tree_block_info *info;
10109 info = (struct btrfs_tree_block_info *)(ei + 1);
10110 skinny_level = btrfs_tree_block_level(leaf, info);
10111 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10118 if (!(btrfs_extent_flags(leaf, ei) &
10119 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10121 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10122 key.objectid, nodesize,
10123 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10124 err = BACKREF_MISMATCH;
10126 header_gen = btrfs_header_generation(eb);
10127 extent_gen = btrfs_extent_generation(leaf, ei);
10128 if (header_gen != extent_gen) {
10130 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10131 key.objectid, nodesize, header_gen,
10133 err = BACKREF_MISMATCH;
10135 if (level != skinny_level) {
10137 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10138 key.objectid, nodesize, level, skinny_level);
10139 err = BACKREF_MISMATCH;
10141 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10143 "extent[%llu %u] is referred by other roots than %llu",
10144 key.objectid, nodesize, root->objectid);
10145 err = BACKREF_MISMATCH;
10150 * Iterate the extent/metadata item to find the exact backref
10152 item_size = btrfs_item_size_nr(leaf, slot);
10153 ptr = (unsigned long)iref;
10154 end = (unsigned long)ei + item_size;
10155 while (ptr < end) {
10156 iref = (struct btrfs_extent_inline_ref *)ptr;
10157 type = btrfs_extent_inline_ref_type(leaf, iref);
10158 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10160 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10161 (offset == root->objectid || offset == owner)) {
10163 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10165 * Backref of tree reloc root points to itself, no need
10166 * to check backref any more.
10168 if (tree_reloc_root)
10171 /* Check if the backref points to valid referencer */
10172 found_ref = !check_tree_block_ref(root, NULL,
10173 offset, level + 1, owner);
10178 ptr += btrfs_extent_inline_ref_size(type);
10182 * Inlined extent item doesn't have what we need, check
10183 * TREE_BLOCK_REF_KEY
10186 btrfs_release_path(&path);
10187 key.objectid = bytenr;
10188 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10189 key.offset = root->objectid;
10191 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10196 err |= BACKREF_MISSING;
10198 btrfs_release_path(&path);
10199 if (eb && (err & BACKREF_MISSING))
10200 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10201 bytenr, nodesize, owner, level);
10206 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10208 * Return >0 any error found and output error message
10209 * Return 0 for no error found
10211 static int check_extent_data_item(struct btrfs_root *root,
10212 struct extent_buffer *eb, int slot)
10214 struct btrfs_file_extent_item *fi;
10215 struct btrfs_path path;
10216 struct btrfs_root *extent_root = root->fs_info->extent_root;
10217 struct btrfs_key fi_key;
10218 struct btrfs_key dbref_key;
10219 struct extent_buffer *leaf;
10220 struct btrfs_extent_item *ei;
10221 struct btrfs_extent_inline_ref *iref;
10222 struct btrfs_extent_data_ref *dref;
10225 u64 disk_num_bytes;
10226 u64 extent_num_bytes;
10233 int found_dbackref = 0;
10237 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10238 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10240 /* Nothing to check for hole and inline data extents */
10241 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10242 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10245 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10246 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10247 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10249 /* Check unaligned disk_num_bytes and num_bytes */
10250 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10252 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10253 fi_key.objectid, fi_key.offset, disk_num_bytes,
10255 err |= BYTES_UNALIGNED;
10257 data_bytes_allocated += disk_num_bytes;
10259 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10261 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10262 fi_key.objectid, fi_key.offset, extent_num_bytes,
10264 err |= BYTES_UNALIGNED;
10266 data_bytes_referenced += extent_num_bytes;
10268 owner = btrfs_header_owner(eb);
10270 /* Check the extent item of the file extent in extent tree */
10271 btrfs_init_path(&path);
10272 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10273 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10274 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10276 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10280 leaf = path.nodes[0];
10281 slot = path.slots[0];
10282 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10284 extent_flags = btrfs_extent_flags(leaf, ei);
10286 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10288 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10289 disk_bytenr, disk_num_bytes,
10290 BTRFS_EXTENT_FLAG_DATA);
10291 err |= BACKREF_MISMATCH;
10294 /* Check data backref inside that extent item */
10295 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10296 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10297 ptr = (unsigned long)iref;
10298 end = (unsigned long)ei + item_size;
10299 while (ptr < end) {
10300 iref = (struct btrfs_extent_inline_ref *)ptr;
10301 type = btrfs_extent_inline_ref_type(leaf, iref);
10302 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10304 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10305 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10306 if (ref_root == owner || ref_root == root->objectid)
10307 found_dbackref = 1;
10308 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10309 found_dbackref = !check_tree_block_ref(root, NULL,
10310 btrfs_extent_inline_ref_offset(leaf, iref),
10314 if (found_dbackref)
10316 ptr += btrfs_extent_inline_ref_size(type);
10319 if (!found_dbackref) {
10320 btrfs_release_path(&path);
10322 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10323 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10324 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10325 dbref_key.offset = hash_extent_data_ref(root->objectid,
10326 fi_key.objectid, fi_key.offset);
10328 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10329 &dbref_key, &path, 0, 0);
10331 found_dbackref = 1;
10335 btrfs_release_path(&path);
10338 * Neither inlined nor EXTENT_DATA_REF found, try
10339 * SHARED_DATA_REF as last chance.
10341 dbref_key.objectid = disk_bytenr;
10342 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10343 dbref_key.offset = eb->start;
10345 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10346 &dbref_key, &path, 0, 0);
10348 found_dbackref = 1;
10354 if (!found_dbackref)
10355 err |= BACKREF_MISSING;
10356 btrfs_release_path(&path);
10357 if (err & BACKREF_MISSING) {
10358 error("data extent[%llu %llu] backref lost",
10359 disk_bytenr, disk_num_bytes);
10365 * Get real tree block level for the case like shared block
10366 * Return >= 0 as tree level
10367 * Return <0 for error
10369 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10371 struct extent_buffer *eb;
10372 struct btrfs_path path;
10373 struct btrfs_key key;
10374 struct btrfs_extent_item *ei;
10377 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10382 /* Search extent tree for extent generation and level */
10383 key.objectid = bytenr;
10384 key.type = BTRFS_METADATA_ITEM_KEY;
10385 key.offset = (u64)-1;
10387 btrfs_init_path(&path);
10388 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10391 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10399 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10400 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10401 struct btrfs_extent_item);
10402 flags = btrfs_extent_flags(path.nodes[0], ei);
10403 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10408 /* Get transid for later read_tree_block() check */
10409 transid = btrfs_extent_generation(path.nodes[0], ei);
10411 /* Get backref level as one source */
10412 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10413 backref_level = key.offset;
10415 struct btrfs_tree_block_info *info;
10417 info = (struct btrfs_tree_block_info *)(ei + 1);
10418 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10420 btrfs_release_path(&path);
10422 /* Get level from tree block as an alternative source */
10423 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10424 if (!extent_buffer_uptodate(eb)) {
10425 free_extent_buffer(eb);
10428 header_level = btrfs_header_level(eb);
10429 free_extent_buffer(eb);
10431 if (header_level != backref_level)
10433 return header_level;
10436 btrfs_release_path(&path);
10441 * Check if a tree block backref is valid (points to a valid tree block)
10442 * if level == -1, level will be resolved
10443 * Return >0 for any error found and print error message
10445 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10446 u64 bytenr, int level)
10448 struct btrfs_root *root;
10449 struct btrfs_key key;
10450 struct btrfs_path path;
10451 struct extent_buffer *eb;
10452 struct extent_buffer *node;
10453 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10457 /* Query level for level == -1 special case */
10459 level = query_tree_block_level(fs_info, bytenr);
10461 err |= REFERENCER_MISSING;
10465 key.objectid = root_id;
10466 key.type = BTRFS_ROOT_ITEM_KEY;
10467 key.offset = (u64)-1;
10469 root = btrfs_read_fs_root(fs_info, &key);
10470 if (IS_ERR(root)) {
10471 err |= REFERENCER_MISSING;
10475 /* Read out the tree block to get item/node key */
10476 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10477 if (!extent_buffer_uptodate(eb)) {
10478 err |= REFERENCER_MISSING;
10479 free_extent_buffer(eb);
10483 /* Empty tree, no need to check key */
10484 if (!btrfs_header_nritems(eb) && !level) {
10485 free_extent_buffer(eb);
10490 btrfs_node_key_to_cpu(eb, &key, 0);
10492 btrfs_item_key_to_cpu(eb, &key, 0);
10494 free_extent_buffer(eb);
10496 btrfs_init_path(&path);
10497 path.lowest_level = level;
10498 /* Search with the first key, to ensure we can reach it */
10499 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10501 err |= REFERENCER_MISSING;
10505 node = path.nodes[level];
10506 if (btrfs_header_bytenr(node) != bytenr) {
10508 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10509 bytenr, nodesize, bytenr,
10510 btrfs_header_bytenr(node));
10511 err |= REFERENCER_MISMATCH;
10513 if (btrfs_header_level(node) != level) {
10515 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10516 bytenr, nodesize, level,
10517 btrfs_header_level(node));
10518 err |= REFERENCER_MISMATCH;
10522 btrfs_release_path(&path);
10524 if (err & REFERENCER_MISSING) {
10526 error("extent [%llu %d] lost referencer (owner: %llu)",
10527 bytenr, nodesize, root_id);
10530 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10531 bytenr, nodesize, root_id, level);
10538 * Check if tree block @eb is tree reloc root.
10539 * Return 0 if it's not or any problem happens
10540 * Return 1 if it's a tree reloc root
10542 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10543 struct extent_buffer *eb)
10545 struct btrfs_root *tree_reloc_root;
10546 struct btrfs_key key;
10547 u64 bytenr = btrfs_header_bytenr(eb);
10548 u64 owner = btrfs_header_owner(eb);
10551 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10552 key.offset = owner;
10553 key.type = BTRFS_ROOT_ITEM_KEY;
10555 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10556 if (IS_ERR(tree_reloc_root))
10559 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10561 btrfs_free_fs_root(tree_reloc_root);
10566 * Check referencer for shared block backref
10567 * If level == -1, this function will resolve the level.
10569 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10570 u64 parent, u64 bytenr, int level)
10572 struct extent_buffer *eb;
10573 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10575 int found_parent = 0;
10578 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10579 if (!extent_buffer_uptodate(eb))
10583 level = query_tree_block_level(fs_info, bytenr);
10587 /* It's possible it's a tree reloc root */
10588 if (parent == bytenr) {
10589 if (is_tree_reloc_root(fs_info, eb))
10594 if (level + 1 != btrfs_header_level(eb))
10597 nr = btrfs_header_nritems(eb);
10598 for (i = 0; i < nr; i++) {
10599 if (bytenr == btrfs_node_blockptr(eb, i)) {
10605 free_extent_buffer(eb);
10606 if (!found_parent) {
10608 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10609 bytenr, nodesize, parent, level);
10610 return REFERENCER_MISSING;
10616 * Check referencer for normal (inlined) data ref
10617 * If len == 0, it will be resolved by searching in extent tree
10619 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10620 u64 root_id, u64 objectid, u64 offset,
10621 u64 bytenr, u64 len, u32 count)
10623 struct btrfs_root *root;
10624 struct btrfs_root *extent_root = fs_info->extent_root;
10625 struct btrfs_key key;
10626 struct btrfs_path path;
10627 struct extent_buffer *leaf;
10628 struct btrfs_file_extent_item *fi;
10629 u32 found_count = 0;
10634 key.objectid = bytenr;
10635 key.type = BTRFS_EXTENT_ITEM_KEY;
10636 key.offset = (u64)-1;
10638 btrfs_init_path(&path);
10639 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10642 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10645 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10646 if (key.objectid != bytenr ||
10647 key.type != BTRFS_EXTENT_ITEM_KEY)
10650 btrfs_release_path(&path);
10652 key.objectid = root_id;
10653 key.type = BTRFS_ROOT_ITEM_KEY;
10654 key.offset = (u64)-1;
10655 btrfs_init_path(&path);
10657 root = btrfs_read_fs_root(fs_info, &key);
10661 key.objectid = objectid;
10662 key.type = BTRFS_EXTENT_DATA_KEY;
10664 * It can be nasty as data backref offset is
10665 * file offset - file extent offset, which is smaller or
10666 * equal to original backref offset. The only special case is
10667 * overflow. So we need to special check and do further search.
10669 key.offset = offset & (1ULL << 63) ? 0 : offset;
10671 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10676 * Search afterwards to get correct one
10677 * NOTE: As we must do a comprehensive check on the data backref to
10678 * make sure the dref count also matches, we must iterate all file
10679 * extents for that inode.
10682 leaf = path.nodes[0];
10683 slot = path.slots[0];
10685 if (slot >= btrfs_header_nritems(leaf))
10687 btrfs_item_key_to_cpu(leaf, &key, slot);
10688 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10690 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10692 * Except normal disk bytenr and disk num bytes, we still
10693 * need to do extra check on dbackref offset as
10694 * dbackref offset = file_offset - file_extent_offset
10696 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10697 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10698 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10703 ret = btrfs_next_item(root, &path);
10708 btrfs_release_path(&path);
10709 if (found_count != count) {
10711 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10712 bytenr, len, root_id, objectid, offset, count, found_count);
10713 return REFERENCER_MISSING;
10719 * Check if the referencer of a shared data backref exists
10721 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10722 u64 parent, u64 bytenr)
10724 struct extent_buffer *eb;
10725 struct btrfs_key key;
10726 struct btrfs_file_extent_item *fi;
10727 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10729 int found_parent = 0;
10732 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10733 if (!extent_buffer_uptodate(eb))
10736 nr = btrfs_header_nritems(eb);
10737 for (i = 0; i < nr; i++) {
10738 btrfs_item_key_to_cpu(eb, &key, i);
10739 if (key.type != BTRFS_EXTENT_DATA_KEY)
10742 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10743 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10746 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10753 free_extent_buffer(eb);
10754 if (!found_parent) {
10755 error("shared extent %llu referencer lost (parent: %llu)",
10757 return REFERENCER_MISSING;
10763 * This function will check a given extent item, including its backref and
10764 * itself (like crossing stripe boundary and type)
10766 * Since we don't use extent_record anymore, introduce new error bit
10768 static int check_extent_item(struct btrfs_fs_info *fs_info,
10769 struct extent_buffer *eb, int slot)
10771 struct btrfs_extent_item *ei;
10772 struct btrfs_extent_inline_ref *iref;
10773 struct btrfs_extent_data_ref *dref;
10777 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10778 u32 item_size = btrfs_item_size_nr(eb, slot);
10783 struct btrfs_key key;
10787 btrfs_item_key_to_cpu(eb, &key, slot);
10788 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10789 bytes_used += key.offset;
10791 bytes_used += nodesize;
10793 if (item_size < sizeof(*ei)) {
10795 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10796 * old thing when on disk format is still un-determined.
10797 * No need to care about it anymore
10799 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10803 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10804 flags = btrfs_extent_flags(eb, ei);
10806 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10808 if (metadata && check_crossing_stripes(global_info, key.objectid,
10810 error("bad metadata [%llu, %llu) crossing stripe boundary",
10811 key.objectid, key.objectid + nodesize);
10812 err |= CROSSING_STRIPE_BOUNDARY;
10815 ptr = (unsigned long)(ei + 1);
10817 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10818 /* Old EXTENT_ITEM metadata */
10819 struct btrfs_tree_block_info *info;
10821 info = (struct btrfs_tree_block_info *)ptr;
10822 level = btrfs_tree_block_level(eb, info);
10823 ptr += sizeof(struct btrfs_tree_block_info);
10825 /* New METADATA_ITEM */
10826 level = key.offset;
10828 end = (unsigned long)ei + item_size;
10831 /* Reached extent item end normally */
10835 /* Beyond extent item end, wrong item size */
10837 err |= ITEM_SIZE_MISMATCH;
10838 error("extent item at bytenr %llu slot %d has wrong size",
10843 /* Now check every backref in this extent item */
10844 iref = (struct btrfs_extent_inline_ref *)ptr;
10845 type = btrfs_extent_inline_ref_type(eb, iref);
10846 offset = btrfs_extent_inline_ref_offset(eb, iref);
10848 case BTRFS_TREE_BLOCK_REF_KEY:
10849 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10853 case BTRFS_SHARED_BLOCK_REF_KEY:
10854 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10858 case BTRFS_EXTENT_DATA_REF_KEY:
10859 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10860 ret = check_extent_data_backref(fs_info,
10861 btrfs_extent_data_ref_root(eb, dref),
10862 btrfs_extent_data_ref_objectid(eb, dref),
10863 btrfs_extent_data_ref_offset(eb, dref),
10864 key.objectid, key.offset,
10865 btrfs_extent_data_ref_count(eb, dref));
10868 case BTRFS_SHARED_DATA_REF_KEY:
10869 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10873 error("extent[%llu %d %llu] has unknown ref type: %d",
10874 key.objectid, key.type, key.offset, type);
10875 err |= UNKNOWN_TYPE;
10879 ptr += btrfs_extent_inline_ref_size(type);
10887 * Check if a dev extent item is referred correctly by its chunk
10889 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10890 struct extent_buffer *eb, int slot)
10892 struct btrfs_root *chunk_root = fs_info->chunk_root;
10893 struct btrfs_dev_extent *ptr;
10894 struct btrfs_path path;
10895 struct btrfs_key chunk_key;
10896 struct btrfs_key devext_key;
10897 struct btrfs_chunk *chunk;
10898 struct extent_buffer *l;
10902 int found_chunk = 0;
10905 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10906 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10907 length = btrfs_dev_extent_length(eb, ptr);
10909 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10910 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10911 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10913 btrfs_init_path(&path);
10914 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10919 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10920 if (btrfs_chunk_length(l, chunk) != length)
10923 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10924 for (i = 0; i < num_stripes; i++) {
10925 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10926 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10928 if (devid == devext_key.objectid &&
10929 offset == devext_key.offset) {
10935 btrfs_release_path(&path);
10936 if (!found_chunk) {
10938 "device extent[%llu, %llu, %llu] did not find the related chunk",
10939 devext_key.objectid, devext_key.offset, length);
10940 return REFERENCER_MISSING;
10946 * Check if the used space is correct with the dev item
10948 static int check_dev_item(struct btrfs_fs_info *fs_info,
10949 struct extent_buffer *eb, int slot)
10951 struct btrfs_root *dev_root = fs_info->dev_root;
10952 struct btrfs_dev_item *dev_item;
10953 struct btrfs_path path;
10954 struct btrfs_key key;
10955 struct btrfs_dev_extent *ptr;
10961 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10962 dev_id = btrfs_device_id(eb, dev_item);
10963 used = btrfs_device_bytes_used(eb, dev_item);
10965 key.objectid = dev_id;
10966 key.type = BTRFS_DEV_EXTENT_KEY;
10969 btrfs_init_path(&path);
10970 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10972 btrfs_item_key_to_cpu(eb, &key, slot);
10973 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10974 key.objectid, key.type, key.offset);
10975 btrfs_release_path(&path);
10976 return REFERENCER_MISSING;
10979 /* Iterate dev_extents to calculate the used space of a device */
10981 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10984 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10985 if (key.objectid > dev_id)
10987 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10990 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10991 struct btrfs_dev_extent);
10992 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10994 ret = btrfs_next_item(dev_root, &path);
10998 btrfs_release_path(&path);
11000 if (used != total) {
11001 btrfs_item_key_to_cpu(eb, &key, slot);
11003 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11004 total, used, BTRFS_ROOT_TREE_OBJECTID,
11005 BTRFS_DEV_EXTENT_KEY, dev_id);
11006 return ACCOUNTING_MISMATCH;
11012 * Check a block group item with its referener (chunk) and its used space
11013 * with extent/metadata item
11015 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11016 struct extent_buffer *eb, int slot)
11018 struct btrfs_root *extent_root = fs_info->extent_root;
11019 struct btrfs_root *chunk_root = fs_info->chunk_root;
11020 struct btrfs_block_group_item *bi;
11021 struct btrfs_block_group_item bg_item;
11022 struct btrfs_path path;
11023 struct btrfs_key bg_key;
11024 struct btrfs_key chunk_key;
11025 struct btrfs_key extent_key;
11026 struct btrfs_chunk *chunk;
11027 struct extent_buffer *leaf;
11028 struct btrfs_extent_item *ei;
11029 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11037 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11038 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11039 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11040 used = btrfs_block_group_used(&bg_item);
11041 bg_flags = btrfs_block_group_flags(&bg_item);
11043 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11044 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11045 chunk_key.offset = bg_key.objectid;
11047 btrfs_init_path(&path);
11048 /* Search for the referencer chunk */
11049 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11052 "block group[%llu %llu] did not find the related chunk item",
11053 bg_key.objectid, bg_key.offset);
11054 err |= REFERENCER_MISSING;
11056 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11057 struct btrfs_chunk);
11058 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11061 "block group[%llu %llu] related chunk item length does not match",
11062 bg_key.objectid, bg_key.offset);
11063 err |= REFERENCER_MISMATCH;
11066 btrfs_release_path(&path);
11068 /* Search from the block group bytenr */
11069 extent_key.objectid = bg_key.objectid;
11070 extent_key.type = 0;
11071 extent_key.offset = 0;
11073 btrfs_init_path(&path);
11074 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11078 /* Iterate extent tree to account used space */
11080 leaf = path.nodes[0];
11082 /* Search slot can point to the last item beyond leaf nritems */
11083 if (path.slots[0] >= btrfs_header_nritems(leaf))
11086 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11087 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11090 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11091 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11093 if (extent_key.objectid < bg_key.objectid)
11096 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11099 total += extent_key.offset;
11101 ei = btrfs_item_ptr(leaf, path.slots[0],
11102 struct btrfs_extent_item);
11103 flags = btrfs_extent_flags(leaf, ei);
11104 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11105 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11107 "bad extent[%llu, %llu) type mismatch with chunk",
11108 extent_key.objectid,
11109 extent_key.objectid + extent_key.offset);
11110 err |= CHUNK_TYPE_MISMATCH;
11112 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11113 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11114 BTRFS_BLOCK_GROUP_METADATA))) {
11116 "bad extent[%llu, %llu) type mismatch with chunk",
11117 extent_key.objectid,
11118 extent_key.objectid + nodesize);
11119 err |= CHUNK_TYPE_MISMATCH;
11123 ret = btrfs_next_item(extent_root, &path);
11129 btrfs_release_path(&path);
11131 if (total != used) {
11133 "block group[%llu %llu] used %llu but extent items used %llu",
11134 bg_key.objectid, bg_key.offset, used, total);
11135 err |= ACCOUNTING_MISMATCH;
11141 * Check a chunk item.
11142 * Including checking all referred dev_extents and block group
11144 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11145 struct extent_buffer *eb, int slot)
11147 struct btrfs_root *extent_root = fs_info->extent_root;
11148 struct btrfs_root *dev_root = fs_info->dev_root;
11149 struct btrfs_path path;
11150 struct btrfs_key chunk_key;
11151 struct btrfs_key bg_key;
11152 struct btrfs_key devext_key;
11153 struct btrfs_chunk *chunk;
11154 struct extent_buffer *leaf;
11155 struct btrfs_block_group_item *bi;
11156 struct btrfs_block_group_item bg_item;
11157 struct btrfs_dev_extent *ptr;
11158 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11170 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11171 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11172 length = btrfs_chunk_length(eb, chunk);
11173 chunk_end = chunk_key.offset + length;
11174 if (!IS_ALIGNED(length, sectorsize)) {
11175 error("chunk[%llu %llu) not aligned to %u",
11176 chunk_key.offset, chunk_end, sectorsize);
11177 err |= BYTES_UNALIGNED;
11181 type = btrfs_chunk_type(eb, chunk);
11182 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11183 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11184 error("chunk[%llu %llu) has no chunk type",
11185 chunk_key.offset, chunk_end);
11186 err |= UNKNOWN_TYPE;
11188 if (profile && (profile & (profile - 1))) {
11189 error("chunk[%llu %llu) multiple profiles detected: %llx",
11190 chunk_key.offset, chunk_end, profile);
11191 err |= UNKNOWN_TYPE;
11194 bg_key.objectid = chunk_key.offset;
11195 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11196 bg_key.offset = length;
11198 btrfs_init_path(&path);
11199 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11202 "chunk[%llu %llu) did not find the related block group item",
11203 chunk_key.offset, chunk_end);
11204 err |= REFERENCER_MISSING;
11206 leaf = path.nodes[0];
11207 bi = btrfs_item_ptr(leaf, path.slots[0],
11208 struct btrfs_block_group_item);
11209 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11211 if (btrfs_block_group_flags(&bg_item) != type) {
11213 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11214 chunk_key.offset, chunk_end, type,
11215 btrfs_block_group_flags(&bg_item));
11216 err |= REFERENCER_MISSING;
11220 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11221 for (i = 0; i < num_stripes; i++) {
11222 btrfs_release_path(&path);
11223 btrfs_init_path(&path);
11224 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11225 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11226 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11228 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11231 goto not_match_dev;
11233 leaf = path.nodes[0];
11234 ptr = btrfs_item_ptr(leaf, path.slots[0],
11235 struct btrfs_dev_extent);
11236 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11237 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11238 if (objectid != chunk_key.objectid ||
11239 offset != chunk_key.offset ||
11240 btrfs_dev_extent_length(leaf, ptr) != length)
11241 goto not_match_dev;
11244 err |= BACKREF_MISSING;
11246 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11247 chunk_key.objectid, chunk_end, i);
11250 btrfs_release_path(&path);
11256 * Main entry function to check known items and update related accounting info
11258 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11260 struct btrfs_fs_info *fs_info = root->fs_info;
11261 struct btrfs_key key;
11264 struct btrfs_extent_data_ref *dref;
11269 btrfs_item_key_to_cpu(eb, &key, slot);
11273 case BTRFS_EXTENT_DATA_KEY:
11274 ret = check_extent_data_item(root, eb, slot);
11277 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11278 ret = check_block_group_item(fs_info, eb, slot);
11281 case BTRFS_DEV_ITEM_KEY:
11282 ret = check_dev_item(fs_info, eb, slot);
11285 case BTRFS_CHUNK_ITEM_KEY:
11286 ret = check_chunk_item(fs_info, eb, slot);
11289 case BTRFS_DEV_EXTENT_KEY:
11290 ret = check_dev_extent_item(fs_info, eb, slot);
11293 case BTRFS_EXTENT_ITEM_KEY:
11294 case BTRFS_METADATA_ITEM_KEY:
11295 ret = check_extent_item(fs_info, eb, slot);
11298 case BTRFS_EXTENT_CSUM_KEY:
11299 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11301 case BTRFS_TREE_BLOCK_REF_KEY:
11302 ret = check_tree_block_backref(fs_info, key.offset,
11306 case BTRFS_EXTENT_DATA_REF_KEY:
11307 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11308 ret = check_extent_data_backref(fs_info,
11309 btrfs_extent_data_ref_root(eb, dref),
11310 btrfs_extent_data_ref_objectid(eb, dref),
11311 btrfs_extent_data_ref_offset(eb, dref),
11313 btrfs_extent_data_ref_count(eb, dref));
11316 case BTRFS_SHARED_BLOCK_REF_KEY:
11317 ret = check_shared_block_backref(fs_info, key.offset,
11321 case BTRFS_SHARED_DATA_REF_KEY:
11322 ret = check_shared_data_backref(fs_info, key.offset,
11330 if (++slot < btrfs_header_nritems(eb))
11337 * Helper function for later fs/subvol tree check. To determine if a tree
11338 * block should be checked.
11339 * This function will ensure only the direct referencer with lowest rootid to
11340 * check a fs/subvolume tree block.
11342 * Backref check at extent tree would detect errors like missing subvolume
11343 * tree, so we can do aggressive check to reduce duplicated checks.
11345 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11347 struct btrfs_root *extent_root = root->fs_info->extent_root;
11348 struct btrfs_key key;
11349 struct btrfs_path path;
11350 struct extent_buffer *leaf;
11352 struct btrfs_extent_item *ei;
11358 struct btrfs_extent_inline_ref *iref;
11361 btrfs_init_path(&path);
11362 key.objectid = btrfs_header_bytenr(eb);
11363 key.type = BTRFS_METADATA_ITEM_KEY;
11364 key.offset = (u64)-1;
11367 * Any failure in backref resolving means we can't determine
11368 * whom the tree block belongs to.
11369 * So in that case, we need to check that tree block
11371 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11375 ret = btrfs_previous_extent_item(extent_root, &path,
11376 btrfs_header_bytenr(eb));
11380 leaf = path.nodes[0];
11381 slot = path.slots[0];
11382 btrfs_item_key_to_cpu(leaf, &key, slot);
11383 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11385 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11386 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11388 struct btrfs_tree_block_info *info;
11390 info = (struct btrfs_tree_block_info *)(ei + 1);
11391 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11394 item_size = btrfs_item_size_nr(leaf, slot);
11395 ptr = (unsigned long)iref;
11396 end = (unsigned long)ei + item_size;
11397 while (ptr < end) {
11398 iref = (struct btrfs_extent_inline_ref *)ptr;
11399 type = btrfs_extent_inline_ref_type(leaf, iref);
11400 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11403 * We only check the tree block if current root is
11404 * the lowest referencer of it.
11406 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11407 offset < root->objectid) {
11408 btrfs_release_path(&path);
11412 ptr += btrfs_extent_inline_ref_size(type);
11415 * Normally we should also check keyed tree block ref, but that may be
11416 * very time consuming. Inlined ref should already make us skip a lot
11417 * of refs now. So skip search keyed tree block ref.
11421 btrfs_release_path(&path);
11426 * Traversal function for tree block. We will do:
11427 * 1) Skip shared fs/subvolume tree blocks
11428 * 2) Update related bytes accounting
11429 * 3) Pre-order traversal
11431 static int traverse_tree_block(struct btrfs_root *root,
11432 struct extent_buffer *node)
11434 struct extent_buffer *eb;
11435 struct btrfs_key key;
11436 struct btrfs_key drop_key;
11444 * Skip shared fs/subvolume tree block, in that case they will
11445 * be checked by referencer with lowest rootid
11447 if (is_fstree(root->objectid) && !should_check(root, node))
11450 /* Update bytes accounting */
11451 total_btree_bytes += node->len;
11452 if (fs_root_objectid(btrfs_header_owner(node)))
11453 total_fs_tree_bytes += node->len;
11454 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11455 total_extent_tree_bytes += node->len;
11456 if (!found_old_backref &&
11457 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11458 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11459 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11460 found_old_backref = 1;
11462 /* pre-order tranversal, check itself first */
11463 level = btrfs_header_level(node);
11464 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11465 btrfs_header_level(node),
11466 btrfs_header_owner(node));
11470 "check %s failed root %llu bytenr %llu level %d, force continue check",
11471 level ? "node":"leaf", root->objectid,
11472 btrfs_header_bytenr(node), btrfs_header_level(node));
11475 btree_space_waste += btrfs_leaf_free_space(root, node);
11476 ret = check_leaf_items(root, node);
11481 nr = btrfs_header_nritems(node);
11482 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11483 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11484 sizeof(struct btrfs_key_ptr);
11486 /* Then check all its children */
11487 for (i = 0; i < nr; i++) {
11488 u64 blocknr = btrfs_node_blockptr(node, i);
11490 btrfs_node_key_to_cpu(node, &key, i);
11491 if (level == root->root_item.drop_level &&
11492 is_dropped_key(&key, &drop_key))
11496 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11497 * to call the function itself.
11499 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11500 if (extent_buffer_uptodate(eb)) {
11501 ret = traverse_tree_block(root, eb);
11504 free_extent_buffer(eb);
11511 * Low memory usage version check_chunks_and_extents.
11513 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11515 struct btrfs_path path;
11516 struct btrfs_key key;
11517 struct btrfs_root *root1;
11518 struct btrfs_root *cur_root;
11522 root1 = root->fs_info->chunk_root;
11523 ret = traverse_tree_block(root1, root1->node);
11526 root1 = root->fs_info->tree_root;
11527 ret = traverse_tree_block(root1, root1->node);
11530 btrfs_init_path(&path);
11531 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11533 key.type = BTRFS_ROOT_ITEM_KEY;
11535 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11537 error("cannot find extent treet in tree_root");
11542 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11543 if (key.type != BTRFS_ROOT_ITEM_KEY)
11545 key.offset = (u64)-1;
11547 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11548 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11551 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11552 if (IS_ERR(cur_root) || !cur_root) {
11553 error("failed to read tree: %lld", key.objectid);
11557 ret = traverse_tree_block(cur_root, cur_root->node);
11560 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11561 btrfs_free_fs_root(cur_root);
11563 ret = btrfs_next_item(root1, &path);
11569 btrfs_release_path(&path);
11573 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11574 struct btrfs_root *root, int overwrite)
11576 struct extent_buffer *c;
11577 struct extent_buffer *old = root->node;
11580 struct btrfs_disk_key disk_key = {0,0,0};
11586 extent_buffer_get(c);
11589 c = btrfs_alloc_free_block(trans, root,
11591 root->root_key.objectid,
11592 &disk_key, level, 0, 0);
11595 extent_buffer_get(c);
11599 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11600 btrfs_set_header_level(c, level);
11601 btrfs_set_header_bytenr(c, c->start);
11602 btrfs_set_header_generation(c, trans->transid);
11603 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11604 btrfs_set_header_owner(c, root->root_key.objectid);
11606 write_extent_buffer(c, root->fs_info->fsid,
11607 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11609 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11610 btrfs_header_chunk_tree_uuid(c),
11613 btrfs_mark_buffer_dirty(c);
11615 * this case can happen in the following case:
11617 * 1.overwrite previous root.
11619 * 2.reinit reloc data root, this is because we skip pin
11620 * down reloc data tree before which means we can allocate
11621 * same block bytenr here.
11623 if (old->start == c->start) {
11624 btrfs_set_root_generation(&root->root_item,
11626 root->root_item.level = btrfs_header_level(root->node);
11627 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11628 &root->root_key, &root->root_item);
11630 free_extent_buffer(c);
11634 free_extent_buffer(old);
11636 add_root_to_dirty_list(root);
11640 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11641 struct extent_buffer *eb, int tree_root)
11643 struct extent_buffer *tmp;
11644 struct btrfs_root_item *ri;
11645 struct btrfs_key key;
11648 int level = btrfs_header_level(eb);
11654 * If we have pinned this block before, don't pin it again.
11655 * This can not only avoid forever loop with broken filesystem
11656 * but also give us some speedups.
11658 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11659 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11662 btrfs_pin_extent(fs_info, eb->start, eb->len);
11664 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11665 nritems = btrfs_header_nritems(eb);
11666 for (i = 0; i < nritems; i++) {
11668 btrfs_item_key_to_cpu(eb, &key, i);
11669 if (key.type != BTRFS_ROOT_ITEM_KEY)
11671 /* Skip the extent root and reloc roots */
11672 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11673 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11674 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11676 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11677 bytenr = btrfs_disk_root_bytenr(eb, ri);
11680 * If at any point we start needing the real root we
11681 * will have to build a stump root for the root we are
11682 * in, but for now this doesn't actually use the root so
11683 * just pass in extent_root.
11685 tmp = read_tree_block(fs_info->extent_root, bytenr,
11687 if (!extent_buffer_uptodate(tmp)) {
11688 fprintf(stderr, "Error reading root block\n");
11691 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11692 free_extent_buffer(tmp);
11696 bytenr = btrfs_node_blockptr(eb, i);
11698 /* If we aren't the tree root don't read the block */
11699 if (level == 1 && !tree_root) {
11700 btrfs_pin_extent(fs_info, bytenr, nodesize);
11704 tmp = read_tree_block(fs_info->extent_root, bytenr,
11706 if (!extent_buffer_uptodate(tmp)) {
11707 fprintf(stderr, "Error reading tree block\n");
11710 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11711 free_extent_buffer(tmp);
11720 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11724 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11728 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11731 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11733 struct btrfs_block_group_cache *cache;
11734 struct btrfs_path path;
11735 struct extent_buffer *leaf;
11736 struct btrfs_chunk *chunk;
11737 struct btrfs_key key;
11741 btrfs_init_path(&path);
11743 key.type = BTRFS_CHUNK_ITEM_KEY;
11745 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11747 btrfs_release_path(&path);
11752 * We do this in case the block groups were screwed up and had alloc
11753 * bits that aren't actually set on the chunks. This happens with
11754 * restored images every time and could happen in real life I guess.
11756 fs_info->avail_data_alloc_bits = 0;
11757 fs_info->avail_metadata_alloc_bits = 0;
11758 fs_info->avail_system_alloc_bits = 0;
11760 /* First we need to create the in-memory block groups */
11762 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11763 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11765 btrfs_release_path(&path);
11773 leaf = path.nodes[0];
11774 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11775 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11780 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11781 btrfs_add_block_group(fs_info, 0,
11782 btrfs_chunk_type(leaf, chunk),
11783 key.objectid, key.offset,
11784 btrfs_chunk_length(leaf, chunk));
11785 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11786 key.offset + btrfs_chunk_length(leaf, chunk));
11791 cache = btrfs_lookup_first_block_group(fs_info, start);
11795 start = cache->key.objectid + cache->key.offset;
11798 btrfs_release_path(&path);
11802 static int reset_balance(struct btrfs_trans_handle *trans,
11803 struct btrfs_fs_info *fs_info)
11805 struct btrfs_root *root = fs_info->tree_root;
11806 struct btrfs_path path;
11807 struct extent_buffer *leaf;
11808 struct btrfs_key key;
11809 int del_slot, del_nr = 0;
11813 btrfs_init_path(&path);
11814 key.objectid = BTRFS_BALANCE_OBJECTID;
11815 key.type = BTRFS_BALANCE_ITEM_KEY;
11817 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11822 goto reinit_data_reloc;
11827 ret = btrfs_del_item(trans, root, &path);
11830 btrfs_release_path(&path);
11832 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11833 key.type = BTRFS_ROOT_ITEM_KEY;
11835 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11839 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11844 ret = btrfs_del_items(trans, root, &path,
11851 btrfs_release_path(&path);
11854 ret = btrfs_search_slot(trans, root, &key, &path,
11861 leaf = path.nodes[0];
11862 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11863 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11865 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11870 del_slot = path.slots[0];
11879 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11883 btrfs_release_path(&path);
11886 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11887 key.type = BTRFS_ROOT_ITEM_KEY;
11888 key.offset = (u64)-1;
11889 root = btrfs_read_fs_root(fs_info, &key);
11890 if (IS_ERR(root)) {
11891 fprintf(stderr, "Error reading data reloc tree\n");
11892 ret = PTR_ERR(root);
11895 record_root_in_trans(trans, root);
11896 ret = btrfs_fsck_reinit_root(trans, root, 0);
11899 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11901 btrfs_release_path(&path);
11905 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11906 struct btrfs_fs_info *fs_info)
11912 * The only reason we don't do this is because right now we're just
11913 * walking the trees we find and pinning down their bytes, we don't look
11914 * at any of the leaves. In order to do mixed groups we'd have to check
11915 * the leaves of any fs roots and pin down the bytes for any file
11916 * extents we find. Not hard but why do it if we don't have to?
11918 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11919 fprintf(stderr, "We don't support re-initing the extent tree "
11920 "for mixed block groups yet, please notify a btrfs "
11921 "developer you want to do this so they can add this "
11922 "functionality.\n");
11927 * first we need to walk all of the trees except the extent tree and pin
11928 * down the bytes that are in use so we don't overwrite any existing
11931 ret = pin_metadata_blocks(fs_info);
11933 fprintf(stderr, "error pinning down used bytes\n");
11938 * Need to drop all the block groups since we're going to recreate all
11941 btrfs_free_block_groups(fs_info);
11942 ret = reset_block_groups(fs_info);
11944 fprintf(stderr, "error resetting the block groups\n");
11948 /* Ok we can allocate now, reinit the extent root */
11949 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11951 fprintf(stderr, "extent root initialization failed\n");
11953 * When the transaction code is updated we should end the
11954 * transaction, but for now progs only knows about commit so
11955 * just return an error.
11961 * Now we have all the in-memory block groups setup so we can make
11962 * allocations properly, and the metadata we care about is safe since we
11963 * pinned all of it above.
11966 struct btrfs_block_group_cache *cache;
11968 cache = btrfs_lookup_first_block_group(fs_info, start);
11971 start = cache->key.objectid + cache->key.offset;
11972 ret = btrfs_insert_item(trans, fs_info->extent_root,
11973 &cache->key, &cache->item,
11974 sizeof(cache->item));
11976 fprintf(stderr, "Error adding block group\n");
11979 btrfs_extent_post_op(trans, fs_info->extent_root);
11982 ret = reset_balance(trans, fs_info);
11984 fprintf(stderr, "error resetting the pending balance\n");
11989 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11991 struct btrfs_path path;
11992 struct btrfs_trans_handle *trans;
11993 struct btrfs_key key;
11996 printf("Recowing metadata block %llu\n", eb->start);
11997 key.objectid = btrfs_header_owner(eb);
11998 key.type = BTRFS_ROOT_ITEM_KEY;
11999 key.offset = (u64)-1;
12001 root = btrfs_read_fs_root(root->fs_info, &key);
12002 if (IS_ERR(root)) {
12003 fprintf(stderr, "Couldn't find owner root %llu\n",
12005 return PTR_ERR(root);
12008 trans = btrfs_start_transaction(root, 1);
12010 return PTR_ERR(trans);
12012 btrfs_init_path(&path);
12013 path.lowest_level = btrfs_header_level(eb);
12014 if (path.lowest_level)
12015 btrfs_node_key_to_cpu(eb, &key, 0);
12017 btrfs_item_key_to_cpu(eb, &key, 0);
12019 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12020 btrfs_commit_transaction(trans, root);
12021 btrfs_release_path(&path);
12025 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12027 struct btrfs_path path;
12028 struct btrfs_trans_handle *trans;
12029 struct btrfs_key key;
12032 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12033 bad->key.type, bad->key.offset);
12034 key.objectid = bad->root_id;
12035 key.type = BTRFS_ROOT_ITEM_KEY;
12036 key.offset = (u64)-1;
12038 root = btrfs_read_fs_root(root->fs_info, &key);
12039 if (IS_ERR(root)) {
12040 fprintf(stderr, "Couldn't find owner root %llu\n",
12042 return PTR_ERR(root);
12045 trans = btrfs_start_transaction(root, 1);
12047 return PTR_ERR(trans);
12049 btrfs_init_path(&path);
12050 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12056 ret = btrfs_del_item(trans, root, &path);
12058 btrfs_commit_transaction(trans, root);
12059 btrfs_release_path(&path);
12063 static int zero_log_tree(struct btrfs_root *root)
12065 struct btrfs_trans_handle *trans;
12068 trans = btrfs_start_transaction(root, 1);
12069 if (IS_ERR(trans)) {
12070 ret = PTR_ERR(trans);
12073 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12074 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12075 ret = btrfs_commit_transaction(trans, root);
12079 static int populate_csum(struct btrfs_trans_handle *trans,
12080 struct btrfs_root *csum_root, char *buf, u64 start,
12087 while (offset < len) {
12088 sectorsize = csum_root->sectorsize;
12089 ret = read_extent_data(csum_root, buf, start + offset,
12093 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12094 start + offset, buf, sectorsize);
12097 offset += sectorsize;
12102 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12103 struct btrfs_root *csum_root,
12104 struct btrfs_root *cur_root)
12106 struct btrfs_path path;
12107 struct btrfs_key key;
12108 struct extent_buffer *node;
12109 struct btrfs_file_extent_item *fi;
12116 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12120 btrfs_init_path(&path);
12124 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12127 /* Iterate all regular file extents and fill its csum */
12129 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12131 if (key.type != BTRFS_EXTENT_DATA_KEY)
12133 node = path.nodes[0];
12134 slot = path.slots[0];
12135 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12136 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12138 start = btrfs_file_extent_disk_bytenr(node, fi);
12139 len = btrfs_file_extent_disk_num_bytes(node, fi);
12141 ret = populate_csum(trans, csum_root, buf, start, len);
12142 if (ret == -EEXIST)
12148 * TODO: if next leaf is corrupted, jump to nearest next valid
12151 ret = btrfs_next_item(cur_root, &path);
12161 btrfs_release_path(&path);
12166 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12167 struct btrfs_root *csum_root)
12169 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12170 struct btrfs_path path;
12171 struct btrfs_root *tree_root = fs_info->tree_root;
12172 struct btrfs_root *cur_root;
12173 struct extent_buffer *node;
12174 struct btrfs_key key;
12178 btrfs_init_path(&path);
12179 key.objectid = BTRFS_FS_TREE_OBJECTID;
12181 key.type = BTRFS_ROOT_ITEM_KEY;
12182 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12191 node = path.nodes[0];
12192 slot = path.slots[0];
12193 btrfs_item_key_to_cpu(node, &key, slot);
12194 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12196 if (key.type != BTRFS_ROOT_ITEM_KEY)
12198 if (!is_fstree(key.objectid))
12200 key.offset = (u64)-1;
12202 cur_root = btrfs_read_fs_root(fs_info, &key);
12203 if (IS_ERR(cur_root) || !cur_root) {
12204 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12208 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12213 ret = btrfs_next_item(tree_root, &path);
12223 btrfs_release_path(&path);
12227 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12228 struct btrfs_root *csum_root)
12230 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12231 struct btrfs_path path;
12232 struct btrfs_extent_item *ei;
12233 struct extent_buffer *leaf;
12235 struct btrfs_key key;
12238 btrfs_init_path(&path);
12240 key.type = BTRFS_EXTENT_ITEM_KEY;
12242 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12244 btrfs_release_path(&path);
12248 buf = malloc(csum_root->sectorsize);
12250 btrfs_release_path(&path);
12255 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12256 ret = btrfs_next_leaf(extent_root, &path);
12264 leaf = path.nodes[0];
12266 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12267 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12272 ei = btrfs_item_ptr(leaf, path.slots[0],
12273 struct btrfs_extent_item);
12274 if (!(btrfs_extent_flags(leaf, ei) &
12275 BTRFS_EXTENT_FLAG_DATA)) {
12280 ret = populate_csum(trans, csum_root, buf, key.objectid,
12287 btrfs_release_path(&path);
12293 * Recalculate the csum and put it into the csum tree.
12295 * Extent tree init will wipe out all the extent info, so in that case, we
12296 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12297 * will use fs/subvol trees to init the csum tree.
12299 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12300 struct btrfs_root *csum_root,
12301 int search_fs_tree)
12303 if (search_fs_tree)
12304 return fill_csum_tree_from_fs(trans, csum_root);
12306 return fill_csum_tree_from_extent(trans, csum_root);
12309 static void free_roots_info_cache(void)
12311 if (!roots_info_cache)
12314 while (!cache_tree_empty(roots_info_cache)) {
12315 struct cache_extent *entry;
12316 struct root_item_info *rii;
12318 entry = first_cache_extent(roots_info_cache);
12321 remove_cache_extent(roots_info_cache, entry);
12322 rii = container_of(entry, struct root_item_info, cache_extent);
12326 free(roots_info_cache);
12327 roots_info_cache = NULL;
12330 static int build_roots_info_cache(struct btrfs_fs_info *info)
12333 struct btrfs_key key;
12334 struct extent_buffer *leaf;
12335 struct btrfs_path path;
12337 if (!roots_info_cache) {
12338 roots_info_cache = malloc(sizeof(*roots_info_cache));
12339 if (!roots_info_cache)
12341 cache_tree_init(roots_info_cache);
12344 btrfs_init_path(&path);
12346 key.type = BTRFS_EXTENT_ITEM_KEY;
12348 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12351 leaf = path.nodes[0];
12354 struct btrfs_key found_key;
12355 struct btrfs_extent_item *ei;
12356 struct btrfs_extent_inline_ref *iref;
12357 int slot = path.slots[0];
12362 struct cache_extent *entry;
12363 struct root_item_info *rii;
12365 if (slot >= btrfs_header_nritems(leaf)) {
12366 ret = btrfs_next_leaf(info->extent_root, &path);
12373 leaf = path.nodes[0];
12374 slot = path.slots[0];
12377 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12379 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12380 found_key.type != BTRFS_METADATA_ITEM_KEY)
12383 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12384 flags = btrfs_extent_flags(leaf, ei);
12386 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12387 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12390 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12391 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12392 level = found_key.offset;
12394 struct btrfs_tree_block_info *binfo;
12396 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12397 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12398 level = btrfs_tree_block_level(leaf, binfo);
12402 * For a root extent, it must be of the following type and the
12403 * first (and only one) iref in the item.
12405 type = btrfs_extent_inline_ref_type(leaf, iref);
12406 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12409 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12410 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12412 rii = malloc(sizeof(struct root_item_info));
12417 rii->cache_extent.start = root_id;
12418 rii->cache_extent.size = 1;
12419 rii->level = (u8)-1;
12420 entry = &rii->cache_extent;
12421 ret = insert_cache_extent(roots_info_cache, entry);
12424 rii = container_of(entry, struct root_item_info,
12428 ASSERT(rii->cache_extent.start == root_id);
12429 ASSERT(rii->cache_extent.size == 1);
12431 if (level > rii->level || rii->level == (u8)-1) {
12432 rii->level = level;
12433 rii->bytenr = found_key.objectid;
12434 rii->gen = btrfs_extent_generation(leaf, ei);
12435 rii->node_count = 1;
12436 } else if (level == rii->level) {
12444 btrfs_release_path(&path);
12449 static int maybe_repair_root_item(struct btrfs_path *path,
12450 const struct btrfs_key *root_key,
12451 const int read_only_mode)
12453 const u64 root_id = root_key->objectid;
12454 struct cache_extent *entry;
12455 struct root_item_info *rii;
12456 struct btrfs_root_item ri;
12457 unsigned long offset;
12459 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12462 "Error: could not find extent items for root %llu\n",
12463 root_key->objectid);
12467 rii = container_of(entry, struct root_item_info, cache_extent);
12468 ASSERT(rii->cache_extent.start == root_id);
12469 ASSERT(rii->cache_extent.size == 1);
12471 if (rii->node_count != 1) {
12473 "Error: could not find btree root extent for root %llu\n",
12478 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12479 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12481 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12482 btrfs_root_level(&ri) != rii->level ||
12483 btrfs_root_generation(&ri) != rii->gen) {
12486 * If we're in repair mode but our caller told us to not update
12487 * the root item, i.e. just check if it needs to be updated, don't
12488 * print this message, since the caller will call us again shortly
12489 * for the same root item without read only mode (the caller will
12490 * open a transaction first).
12492 if (!(read_only_mode && repair))
12494 "%sroot item for root %llu,"
12495 " current bytenr %llu, current gen %llu, current level %u,"
12496 " new bytenr %llu, new gen %llu, new level %u\n",
12497 (read_only_mode ? "" : "fixing "),
12499 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12500 btrfs_root_level(&ri),
12501 rii->bytenr, rii->gen, rii->level);
12503 if (btrfs_root_generation(&ri) > rii->gen) {
12505 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12506 root_id, btrfs_root_generation(&ri), rii->gen);
12510 if (!read_only_mode) {
12511 btrfs_set_root_bytenr(&ri, rii->bytenr);
12512 btrfs_set_root_level(&ri, rii->level);
12513 btrfs_set_root_generation(&ri, rii->gen);
12514 write_extent_buffer(path->nodes[0], &ri,
12515 offset, sizeof(ri));
12525 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12526 * caused read-only snapshots to be corrupted if they were created at a moment
12527 * when the source subvolume/snapshot had orphan items. The issue was that the
12528 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12529 * node instead of the post orphan cleanup root node.
12530 * So this function, and its callees, just detects and fixes those cases. Even
12531 * though the regression was for read-only snapshots, this function applies to
12532 * any snapshot/subvolume root.
12533 * This must be run before any other repair code - not doing it so, makes other
12534 * repair code delete or modify backrefs in the extent tree for example, which
12535 * will result in an inconsistent fs after repairing the root items.
12537 static int repair_root_items(struct btrfs_fs_info *info)
12539 struct btrfs_path path;
12540 struct btrfs_key key;
12541 struct extent_buffer *leaf;
12542 struct btrfs_trans_handle *trans = NULL;
12545 int need_trans = 0;
12547 btrfs_init_path(&path);
12549 ret = build_roots_info_cache(info);
12553 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12554 key.type = BTRFS_ROOT_ITEM_KEY;
12559 * Avoid opening and committing transactions if a leaf doesn't have
12560 * any root items that need to be fixed, so that we avoid rotating
12561 * backup roots unnecessarily.
12564 trans = btrfs_start_transaction(info->tree_root, 1);
12565 if (IS_ERR(trans)) {
12566 ret = PTR_ERR(trans);
12571 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12575 leaf = path.nodes[0];
12578 struct btrfs_key found_key;
12580 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12581 int no_more_keys = find_next_key(&path, &key);
12583 btrfs_release_path(&path);
12585 ret = btrfs_commit_transaction(trans,
12597 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12599 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12601 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12604 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12608 if (!trans && repair) {
12611 btrfs_release_path(&path);
12621 free_roots_info_cache();
12622 btrfs_release_path(&path);
12624 btrfs_commit_transaction(trans, info->tree_root);
12631 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12633 struct btrfs_trans_handle *trans;
12634 struct btrfs_block_group_cache *bg_cache;
12638 /* Clear all free space cache inodes and its extent data */
12640 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12643 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12646 current = bg_cache->key.objectid + bg_cache->key.offset;
12649 /* Don't forget to set cache_generation to -1 */
12650 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12651 if (IS_ERR(trans)) {
12652 error("failed to update super block cache generation");
12653 return PTR_ERR(trans);
12655 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12656 btrfs_commit_transaction(trans, fs_info->tree_root);
12661 const char * const cmd_check_usage[] = {
12662 "btrfs check [options] <device>",
12663 "Check structural integrity of a filesystem (unmounted).",
12664 "Check structural integrity of an unmounted filesystem. Verify internal",
12665 "trees' consistency and item connectivity. In the repair mode try to",
12666 "fix the problems found. ",
12667 "WARNING: the repair mode is considered dangerous",
12669 "-s|--super <superblock> use this superblock copy",
12670 "-b|--backup use the first valid backup root copy",
12671 "--repair try to repair the filesystem",
12672 "--readonly run in read-only mode (default)",
12673 "--init-csum-tree create a new CRC tree",
12674 "--init-extent-tree create a new extent tree",
12675 "--mode <MODE> allows choice of memory/IO trade-offs",
12676 " where MODE is one of:",
12677 " original - read inodes and extents to memory (requires",
12678 " more memory, does less IO)",
12679 " lowmem - try to use less memory but read blocks again",
12681 "--check-data-csum verify checksums of data blocks",
12682 "-Q|--qgroup-report print a report on qgroup consistency",
12683 "-E|--subvol-extents <subvolid>",
12684 " print subvolume extents and sharing state",
12685 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12686 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12687 "-p|--progress indicate progress",
12688 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12692 int cmd_check(int argc, char **argv)
12694 struct cache_tree root_cache;
12695 struct btrfs_root *root;
12696 struct btrfs_fs_info *info;
12699 u64 tree_root_bytenr = 0;
12700 u64 chunk_root_bytenr = 0;
12701 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12705 int init_csum_tree = 0;
12707 int clear_space_cache = 0;
12708 int qgroup_report = 0;
12709 int qgroups_repaired = 0;
12710 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12714 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12715 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12716 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12717 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12718 static const struct option long_options[] = {
12719 { "super", required_argument, NULL, 's' },
12720 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12721 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12722 { "init-csum-tree", no_argument, NULL,
12723 GETOPT_VAL_INIT_CSUM },
12724 { "init-extent-tree", no_argument, NULL,
12725 GETOPT_VAL_INIT_EXTENT },
12726 { "check-data-csum", no_argument, NULL,
12727 GETOPT_VAL_CHECK_CSUM },
12728 { "backup", no_argument, NULL, 'b' },
12729 { "subvol-extents", required_argument, NULL, 'E' },
12730 { "qgroup-report", no_argument, NULL, 'Q' },
12731 { "tree-root", required_argument, NULL, 'r' },
12732 { "chunk-root", required_argument, NULL,
12733 GETOPT_VAL_CHUNK_TREE },
12734 { "progress", no_argument, NULL, 'p' },
12735 { "mode", required_argument, NULL,
12737 { "clear-space-cache", required_argument, NULL,
12738 GETOPT_VAL_CLEAR_SPACE_CACHE},
12739 { NULL, 0, NULL, 0}
12742 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12746 case 'a': /* ignored */ break;
12748 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12751 num = arg_strtou64(optarg);
12752 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12754 "super mirror should be less than %d",
12755 BTRFS_SUPER_MIRROR_MAX);
12758 bytenr = btrfs_sb_offset(((int)num));
12759 printf("using SB copy %llu, bytenr %llu\n", num,
12760 (unsigned long long)bytenr);
12766 subvolid = arg_strtou64(optarg);
12769 tree_root_bytenr = arg_strtou64(optarg);
12771 case GETOPT_VAL_CHUNK_TREE:
12772 chunk_root_bytenr = arg_strtou64(optarg);
12775 ctx.progress_enabled = true;
12779 usage(cmd_check_usage);
12780 case GETOPT_VAL_REPAIR:
12781 printf("enabling repair mode\n");
12783 ctree_flags |= OPEN_CTREE_WRITES;
12785 case GETOPT_VAL_READONLY:
12788 case GETOPT_VAL_INIT_CSUM:
12789 printf("Creating a new CRC tree\n");
12790 init_csum_tree = 1;
12792 ctree_flags |= OPEN_CTREE_WRITES;
12794 case GETOPT_VAL_INIT_EXTENT:
12795 init_extent_tree = 1;
12796 ctree_flags |= (OPEN_CTREE_WRITES |
12797 OPEN_CTREE_NO_BLOCK_GROUPS);
12800 case GETOPT_VAL_CHECK_CSUM:
12801 check_data_csum = 1;
12803 case GETOPT_VAL_MODE:
12804 check_mode = parse_check_mode(optarg);
12805 if (check_mode == CHECK_MODE_UNKNOWN) {
12806 error("unknown mode: %s", optarg);
12810 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12811 if (strcmp(optarg, "v1") == 0) {
12812 clear_space_cache = 1;
12813 } else if (strcmp(optarg, "v2") == 0) {
12814 clear_space_cache = 2;
12815 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12818 "invalid argument to --clear-space-cache, must be v1 or v2");
12821 ctree_flags |= OPEN_CTREE_WRITES;
12826 if (check_argc_exact(argc - optind, 1))
12827 usage(cmd_check_usage);
12829 if (ctx.progress_enabled) {
12830 ctx.tp = TASK_NOTHING;
12831 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12834 /* This check is the only reason for --readonly to exist */
12835 if (readonly && repair) {
12836 error("repair options are not compatible with --readonly");
12841 * Not supported yet
12843 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12844 error("low memory mode doesn't support repair yet");
12849 cache_tree_init(&root_cache);
12851 if((ret = check_mounted(argv[optind])) < 0) {
12852 error("could not check mount status: %s", strerror(-ret));
12856 error("%s is currently mounted, aborting", argv[optind]);
12862 /* only allow partial opening under repair mode */
12864 ctree_flags |= OPEN_CTREE_PARTIAL;
12866 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12867 chunk_root_bytenr, ctree_flags);
12869 error("cannot open file system");
12875 global_info = info;
12876 root = info->fs_root;
12877 if (clear_space_cache == 1) {
12878 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12880 "free space cache v2 detected, use --clear-space-cache v2");
12884 printf("Clearing free space cache\n");
12885 ret = clear_free_space_cache(info);
12887 error("failed to clear free space cache");
12890 printf("Free space cache cleared\n");
12893 } else if (clear_space_cache == 2) {
12894 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12895 printf("no free space cache v2 to clear\n");
12899 printf("Clear free space cache v2\n");
12900 ret = btrfs_clear_free_space_tree(info);
12902 error("failed to clear free space cache v2: %d", ret);
12905 printf("free space cache v2 cleared\n");
12911 * repair mode will force us to commit transaction which
12912 * will make us fail to load log tree when mounting.
12914 if (repair && btrfs_super_log_root(info->super_copy)) {
12915 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12921 ret = zero_log_tree(root);
12924 error("failed to zero log tree: %d", ret);
12929 uuid_unparse(info->super_copy->fsid, uuidbuf);
12930 if (qgroup_report) {
12931 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12933 ret = qgroup_verify_all(info);
12940 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12941 subvolid, argv[optind], uuidbuf);
12942 ret = print_extent_state(info, subvolid);
12946 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12948 if (!extent_buffer_uptodate(info->tree_root->node) ||
12949 !extent_buffer_uptodate(info->dev_root->node) ||
12950 !extent_buffer_uptodate(info->chunk_root->node)) {
12951 error("critical roots corrupted, unable to check the filesystem");
12957 if (init_extent_tree || init_csum_tree) {
12958 struct btrfs_trans_handle *trans;
12960 trans = btrfs_start_transaction(info->extent_root, 0);
12961 if (IS_ERR(trans)) {
12962 error("error starting transaction");
12963 ret = PTR_ERR(trans);
12968 if (init_extent_tree) {
12969 printf("Creating a new extent tree\n");
12970 ret = reinit_extent_tree(trans, info);
12976 if (init_csum_tree) {
12977 printf("Reinitialize checksum tree\n");
12978 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12980 error("checksum tree initialization failed: %d",
12987 ret = fill_csum_tree(trans, info->csum_root,
12991 error("checksum tree refilling failed: %d", ret);
12996 * Ok now we commit and run the normal fsck, which will add
12997 * extent entries for all of the items it finds.
12999 ret = btrfs_commit_transaction(trans, info->extent_root);
13004 if (!extent_buffer_uptodate(info->extent_root->node)) {
13005 error("critical: extent_root, unable to check the filesystem");
13010 if (!extent_buffer_uptodate(info->csum_root->node)) {
13011 error("critical: csum_root, unable to check the filesystem");
13017 if (!ctx.progress_enabled)
13018 fprintf(stderr, "checking extents\n");
13019 if (check_mode == CHECK_MODE_LOWMEM)
13020 ret = check_chunks_and_extents_v2(root);
13022 ret = check_chunks_and_extents(root);
13026 "errors found in extent allocation tree or chunk allocation");
13028 ret = repair_root_items(info);
13031 error("failed to repair root items: %s", strerror(-ret));
13035 fprintf(stderr, "Fixed %d roots.\n", ret);
13037 } else if (ret > 0) {
13039 "Found %d roots with an outdated root item.\n",
13042 "Please run a filesystem check with the option --repair to fix them.\n");
13048 if (!ctx.progress_enabled) {
13049 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13050 fprintf(stderr, "checking free space tree\n");
13052 fprintf(stderr, "checking free space cache\n");
13054 ret = check_space_cache(root);
13057 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13058 error("errors found in free space tree");
13060 error("errors found in free space cache");
13065 * We used to have to have these hole extents in between our real
13066 * extents so if we don't have this flag set we need to make sure there
13067 * are no gaps in the file extents for inodes, otherwise we can just
13068 * ignore it when this happens.
13070 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13071 if (!ctx.progress_enabled)
13072 fprintf(stderr, "checking fs roots\n");
13073 if (check_mode == CHECK_MODE_LOWMEM)
13074 ret = check_fs_roots_v2(root->fs_info);
13076 ret = check_fs_roots(root, &root_cache);
13079 error("errors found in fs roots");
13083 fprintf(stderr, "checking csums\n");
13084 ret = check_csums(root);
13087 error("errors found in csum tree");
13091 fprintf(stderr, "checking root refs\n");
13092 /* For low memory mode, check_fs_roots_v2 handles root refs */
13093 if (check_mode != CHECK_MODE_LOWMEM) {
13094 ret = check_root_refs(root, &root_cache);
13097 error("errors found in root refs");
13102 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13103 struct extent_buffer *eb;
13105 eb = list_first_entry(&root->fs_info->recow_ebs,
13106 struct extent_buffer, recow);
13107 list_del_init(&eb->recow);
13108 ret = recow_extent_buffer(root, eb);
13111 error("fails to fix transid errors");
13116 while (!list_empty(&delete_items)) {
13117 struct bad_item *bad;
13119 bad = list_first_entry(&delete_items, struct bad_item, list);
13120 list_del_init(&bad->list);
13122 ret = delete_bad_item(root, bad);
13128 if (info->quota_enabled) {
13129 fprintf(stderr, "checking quota groups\n");
13130 ret = qgroup_verify_all(info);
13133 error("failed to check quota groups");
13137 ret = repair_qgroups(info, &qgroups_repaired);
13140 error("failed to repair quota groups");
13146 if (!list_empty(&root->fs_info->recow_ebs)) {
13147 error("transid errors in file system");
13152 if (found_old_backref) { /*
13153 * there was a disk format change when mixed
13154 * backref was in testing tree. The old format
13155 * existed about one week.
13157 printf("\n * Found old mixed backref format. "
13158 "The old format is not supported! *"
13159 "\n * Please mount the FS in readonly mode, "
13160 "backup data and re-format the FS. *\n\n");
13163 printf("found %llu bytes used, ",
13164 (unsigned long long)bytes_used);
13166 printf("error(s) found\n");
13168 printf("no error found\n");
13169 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13170 printf("total tree bytes: %llu\n",
13171 (unsigned long long)total_btree_bytes);
13172 printf("total fs tree bytes: %llu\n",
13173 (unsigned long long)total_fs_tree_bytes);
13174 printf("total extent tree bytes: %llu\n",
13175 (unsigned long long)total_extent_tree_bytes);
13176 printf("btree space waste bytes: %llu\n",
13177 (unsigned long long)btree_space_waste);
13178 printf("file data blocks allocated: %llu\n referenced %llu\n",
13179 (unsigned long long)data_bytes_allocated,
13180 (unsigned long long)data_bytes_referenced);
13182 free_qgroup_counts();
13183 free_root_recs_tree(&root_cache);
13187 if (ctx.progress_enabled)
13188 task_deinit(ctx.info);