2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (cur + sizeof(*di) + name_len > total ||
1516 name_len > BTRFS_NAME_LEN) {
1517 error = REF_ERR_NAME_TOO_LONG;
1519 if (cur + sizeof(*di) > total)
1521 len = min_t(u32, total - cur - sizeof(*di),
1528 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1530 if (location.type == BTRFS_INODE_ITEM_KEY) {
1531 add_inode_backref(inode_cache, location.objectid,
1532 key->objectid, key->offset, namebuf,
1533 len, filetype, key->type, error);
1534 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1535 add_inode_backref(root_cache, location.objectid,
1536 key->objectid, key->offset,
1537 namebuf, len, filetype,
1540 fprintf(stderr, "invalid location in dir item %u\n",
1542 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1543 key->objectid, key->offset, namebuf,
1544 len, filetype, key->type, error);
1547 len = sizeof(*di) + name_len + data_len;
1548 di = (struct btrfs_dir_item *)((char *)di + len);
1551 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1552 rec->errors |= I_ERR_DUP_DIR_INDEX;
1557 static int process_inode_ref(struct extent_buffer *eb,
1558 int slot, struct btrfs_key *key,
1559 struct shared_node *active_node)
1567 struct cache_tree *inode_cache;
1568 struct btrfs_inode_ref *ref;
1569 char namebuf[BTRFS_NAME_LEN];
1571 inode_cache = &active_node->inode_cache;
1573 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1574 total = btrfs_item_size_nr(eb, slot);
1575 while (cur < total) {
1576 name_len = btrfs_inode_ref_name_len(eb, ref);
1577 index = btrfs_inode_ref_index(eb, ref);
1579 /* inode_ref + namelen should not cross item boundary */
1580 if (cur + sizeof(*ref) + name_len > total ||
1581 name_len > BTRFS_NAME_LEN) {
1582 if (total < cur + sizeof(*ref))
1585 /* Still try to read out the remaining part */
1586 len = min_t(u32, total - cur - sizeof(*ref),
1588 error = REF_ERR_NAME_TOO_LONG;
1594 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1595 add_inode_backref(inode_cache, key->objectid, key->offset,
1596 index, namebuf, len, 0, key->type, error);
1598 len = sizeof(*ref) + name_len;
1599 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1605 static int process_inode_extref(struct extent_buffer *eb,
1606 int slot, struct btrfs_key *key,
1607 struct shared_node *active_node)
1616 struct cache_tree *inode_cache;
1617 struct btrfs_inode_extref *extref;
1618 char namebuf[BTRFS_NAME_LEN];
1620 inode_cache = &active_node->inode_cache;
1622 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1623 total = btrfs_item_size_nr(eb, slot);
1624 while (cur < total) {
1625 name_len = btrfs_inode_extref_name_len(eb, extref);
1626 index = btrfs_inode_extref_index(eb, extref);
1627 parent = btrfs_inode_extref_parent(eb, extref);
1628 if (name_len <= BTRFS_NAME_LEN) {
1632 len = BTRFS_NAME_LEN;
1633 error = REF_ERR_NAME_TOO_LONG;
1635 read_extent_buffer(eb, namebuf,
1636 (unsigned long)(extref + 1), len);
1637 add_inode_backref(inode_cache, key->objectid, parent,
1638 index, namebuf, len, 0, key->type, error);
1640 len = sizeof(*extref) + name_len;
1641 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1648 static int count_csum_range(struct btrfs_root *root, u64 start,
1649 u64 len, u64 *found)
1651 struct btrfs_key key;
1652 struct btrfs_path path;
1653 struct extent_buffer *leaf;
1658 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1660 btrfs_init_path(&path);
1662 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1664 key.type = BTRFS_EXTENT_CSUM_KEY;
1666 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1670 if (ret > 0 && path.slots[0] > 0) {
1671 leaf = path.nodes[0];
1672 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1673 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1674 key.type == BTRFS_EXTENT_CSUM_KEY)
1679 leaf = path.nodes[0];
1680 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1681 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1686 leaf = path.nodes[0];
1689 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1690 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1691 key.type != BTRFS_EXTENT_CSUM_KEY)
1694 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1695 if (key.offset >= start + len)
1698 if (key.offset > start)
1701 size = btrfs_item_size_nr(leaf, path.slots[0]);
1702 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1703 if (csum_end > start) {
1704 size = min(csum_end - start, len);
1713 btrfs_release_path(&path);
1719 static int process_file_extent(struct btrfs_root *root,
1720 struct extent_buffer *eb,
1721 int slot, struct btrfs_key *key,
1722 struct shared_node *active_node)
1724 struct inode_record *rec;
1725 struct btrfs_file_extent_item *fi;
1727 u64 disk_bytenr = 0;
1728 u64 extent_offset = 0;
1729 u64 mask = root->sectorsize - 1;
1733 rec = active_node->current;
1734 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1735 rec->found_file_extent = 1;
1737 if (rec->extent_start == (u64)-1) {
1738 rec->extent_start = key->offset;
1739 rec->extent_end = key->offset;
1742 if (rec->extent_end > key->offset)
1743 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1744 else if (rec->extent_end < key->offset) {
1745 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1746 key->offset - rec->extent_end);
1751 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1752 extent_type = btrfs_file_extent_type(eb, fi);
1754 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1755 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1757 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1758 rec->found_size += num_bytes;
1759 num_bytes = (num_bytes + mask) & ~mask;
1760 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1761 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1762 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1763 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1764 extent_offset = btrfs_file_extent_offset(eb, fi);
1765 if (num_bytes == 0 || (num_bytes & mask))
1766 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1767 if (num_bytes + extent_offset >
1768 btrfs_file_extent_ram_bytes(eb, fi))
1769 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1770 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1771 (btrfs_file_extent_compression(eb, fi) ||
1772 btrfs_file_extent_encryption(eb, fi) ||
1773 btrfs_file_extent_other_encoding(eb, fi)))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (disk_bytenr > 0)
1776 rec->found_size += num_bytes;
1778 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780 rec->extent_end = key->offset + num_bytes;
1783 * The data reloc tree will copy full extents into its inode and then
1784 * copy the corresponding csums. Because the extent it copied could be
1785 * a preallocated extent that hasn't been written to yet there may be no
1786 * csums to copy, ergo we won't have csums for our file extent. This is
1787 * ok so just don't bother checking csums if the inode belongs to the
1790 if (disk_bytenr > 0 &&
1791 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1793 if (btrfs_file_extent_compression(eb, fi))
1794 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1796 disk_bytenr += extent_offset;
1798 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1801 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1803 rec->found_csum_item = 1;
1804 if (found < num_bytes)
1805 rec->some_csum_missing = 1;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1814 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1815 struct walk_control *wc)
1817 struct btrfs_key key;
1821 struct cache_tree *inode_cache;
1822 struct shared_node *active_node;
1824 if (wc->root_level == wc->active_node &&
1825 btrfs_root_refs(&root->root_item) == 0)
1828 active_node = wc->nodes[wc->active_node];
1829 inode_cache = &active_node->inode_cache;
1830 nritems = btrfs_header_nritems(eb);
1831 for (i = 0; i < nritems; i++) {
1832 btrfs_item_key_to_cpu(eb, &key, i);
1834 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1836 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1839 if (active_node->current == NULL ||
1840 active_node->current->ino < key.objectid) {
1841 if (active_node->current) {
1842 active_node->current->checked = 1;
1843 maybe_free_inode_rec(inode_cache,
1844 active_node->current);
1846 active_node->current = get_inode_rec(inode_cache,
1848 BUG_ON(IS_ERR(active_node->current));
1851 case BTRFS_DIR_ITEM_KEY:
1852 case BTRFS_DIR_INDEX_KEY:
1853 ret = process_dir_item(eb, i, &key, active_node);
1855 case BTRFS_INODE_REF_KEY:
1856 ret = process_inode_ref(eb, i, &key, active_node);
1858 case BTRFS_INODE_EXTREF_KEY:
1859 ret = process_inode_extref(eb, i, &key, active_node);
1861 case BTRFS_INODE_ITEM_KEY:
1862 ret = process_inode_item(eb, i, &key, active_node);
1864 case BTRFS_EXTENT_DATA_KEY:
1865 ret = process_file_extent(root, eb, i, &key,
1876 u64 bytenr[BTRFS_MAX_LEVEL];
1877 u64 refs[BTRFS_MAX_LEVEL];
1878 int need_check[BTRFS_MAX_LEVEL];
1881 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1882 struct node_refs *nrefs, u64 level);
1883 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1884 unsigned int ext_ref);
1887 * Returns >0 Found error, not fatal, should continue
1888 * Returns <0 Fatal error, must exit the whole check
1889 * Returns 0 No errors found
1891 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1892 struct node_refs *nrefs, int *level, int ext_ref)
1894 struct extent_buffer *cur = path->nodes[0];
1895 struct btrfs_key key;
1899 int root_level = btrfs_header_level(root->node);
1901 int ret = 0; /* Final return value */
1902 int err = 0; /* Positive error bitmap */
1904 cur_bytenr = cur->start;
1906 /* skip to first inode item or the first inode number change */
1907 nritems = btrfs_header_nritems(cur);
1908 for (i = 0; i < nritems; i++) {
1909 btrfs_item_key_to_cpu(cur, &key, i);
1911 first_ino = key.objectid;
1912 if (key.type == BTRFS_INODE_ITEM_KEY ||
1913 (first_ino && first_ino != key.objectid))
1917 path->slots[0] = nritems;
1923 err |= check_inode_item(root, path, ext_ref);
1925 if (err & LAST_ITEM)
1928 /* still have inode items in thie leaf */
1929 if (cur->start == cur_bytenr)
1933 * we have switched to another leaf, above nodes may
1934 * have changed, here walk down the path, if a node
1935 * or leaf is shared, check whether we can skip this
1938 for (i = root_level; i >= 0; i--) {
1939 if (path->nodes[i]->start == nrefs->bytenr[i])
1942 ret = update_nodes_refs(root,
1943 path->nodes[i]->start,
1948 if (!nrefs->need_check[i]) {
1954 for (i = 0; i < *level; i++) {
1955 free_extent_buffer(path->nodes[i]);
1956 path->nodes[i] = NULL;
1965 static void reada_walk_down(struct btrfs_root *root,
1966 struct extent_buffer *node, int slot)
1975 level = btrfs_header_level(node);
1979 nritems = btrfs_header_nritems(node);
1980 blocksize = root->nodesize;
1981 for (i = slot; i < nritems; i++) {
1982 bytenr = btrfs_node_blockptr(node, i);
1983 ptr_gen = btrfs_node_ptr_generation(node, i);
1984 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1989 * Check the child node/leaf by the following condition:
1990 * 1. the first item key of the node/leaf should be the same with the one
1992 * 2. block in parent node should match the child node/leaf.
1993 * 3. generation of parent node and child's header should be consistent.
1995 * Or the child node/leaf pointed by the key in parent is not valid.
1997 * We hope to check leaf owner too, but since subvol may share leaves,
1998 * which makes leaf owner check not so strong, key check should be
1999 * sufficient enough for that case.
2001 static int check_child_node(struct extent_buffer *parent, int slot,
2002 struct extent_buffer *child)
2004 struct btrfs_key parent_key;
2005 struct btrfs_key child_key;
2008 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2009 if (btrfs_header_level(child) == 0)
2010 btrfs_item_key_to_cpu(child, &child_key, 0);
2012 btrfs_node_key_to_cpu(child, &child_key, 0);
2014 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2017 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2018 parent_key.objectid, parent_key.type, parent_key.offset,
2019 child_key.objectid, child_key.type, child_key.offset);
2021 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2023 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2024 btrfs_node_blockptr(parent, slot),
2025 btrfs_header_bytenr(child));
2027 if (btrfs_node_ptr_generation(parent, slot) !=
2028 btrfs_header_generation(child)) {
2030 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2031 btrfs_header_generation(child),
2032 btrfs_node_ptr_generation(parent, slot));
2038 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2039 * in every fs or file tree check. Here we find its all root ids, and only check
2040 * it in the fs or file tree which has the smallest root id.
2042 static int need_check(struct btrfs_root *root, struct ulist *roots)
2044 struct rb_node *node;
2045 struct ulist_node *u;
2047 if (roots->nnodes == 1)
2050 node = rb_first(&roots->root);
2051 u = rb_entry(node, struct ulist_node, rb_node);
2053 * current root id is not smallest, we skip it and let it be checked
2054 * in the fs or file tree who hash the smallest root id.
2056 if (root->objectid != u->val)
2063 * for a tree node or leaf, we record its reference count, so later if we still
2064 * process this node or leaf, don't need to compute its reference count again.
2066 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2067 struct node_refs *nrefs, u64 level)
2071 struct ulist *roots;
2073 if (nrefs->bytenr[level] != bytenr) {
2074 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2075 level, 1, &refs, NULL);
2079 nrefs->bytenr[level] = bytenr;
2080 nrefs->refs[level] = refs;
2082 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2087 check = need_check(root, roots);
2089 nrefs->need_check[level] = check;
2091 nrefs->need_check[level] = 1;
2098 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2099 struct walk_control *wc, int *level,
2100 struct node_refs *nrefs)
2102 enum btrfs_tree_block_status status;
2105 struct extent_buffer *next;
2106 struct extent_buffer *cur;
2111 WARN_ON(*level < 0);
2112 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2114 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2115 refs = nrefs->refs[*level];
2118 ret = btrfs_lookup_extent_info(NULL, root,
2119 path->nodes[*level]->start,
2120 *level, 1, &refs, NULL);
2125 nrefs->bytenr[*level] = path->nodes[*level]->start;
2126 nrefs->refs[*level] = refs;
2130 ret = enter_shared_node(root, path->nodes[*level]->start,
2138 while (*level >= 0) {
2139 WARN_ON(*level < 0);
2140 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2141 cur = path->nodes[*level];
2143 if (btrfs_header_level(cur) != *level)
2146 if (path->slots[*level] >= btrfs_header_nritems(cur))
2149 ret = process_one_leaf(root, cur, wc);
2154 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2155 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2156 blocksize = root->nodesize;
2158 if (bytenr == nrefs->bytenr[*level - 1]) {
2159 refs = nrefs->refs[*level - 1];
2161 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2162 *level - 1, 1, &refs, NULL);
2166 nrefs->bytenr[*level - 1] = bytenr;
2167 nrefs->refs[*level - 1] = refs;
2172 ret = enter_shared_node(root, bytenr, refs,
2175 path->slots[*level]++;
2180 next = btrfs_find_tree_block(root, bytenr, blocksize);
2181 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2182 free_extent_buffer(next);
2183 reada_walk_down(root, cur, path->slots[*level]);
2184 next = read_tree_block(root, bytenr, blocksize,
2186 if (!extent_buffer_uptodate(next)) {
2187 struct btrfs_key node_key;
2189 btrfs_node_key_to_cpu(path->nodes[*level],
2191 path->slots[*level]);
2192 btrfs_add_corrupt_extent_record(root->fs_info,
2194 path->nodes[*level]->start,
2195 root->nodesize, *level);
2201 ret = check_child_node(cur, path->slots[*level], next);
2203 free_extent_buffer(next);
2208 if (btrfs_is_leaf(next))
2209 status = btrfs_check_leaf(root, NULL, next);
2211 status = btrfs_check_node(root, NULL, next);
2212 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2213 free_extent_buffer(next);
2218 *level = *level - 1;
2219 free_extent_buffer(path->nodes[*level]);
2220 path->nodes[*level] = next;
2221 path->slots[*level] = 0;
2224 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2228 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2229 unsigned int ext_ref);
2232 * Returns >0 Found error, should continue
2233 * Returns <0 Fatal error, must exit the whole check
2234 * Returns 0 No errors found
2236 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2237 int *level, struct node_refs *nrefs, int ext_ref)
2239 enum btrfs_tree_block_status status;
2242 struct extent_buffer *next;
2243 struct extent_buffer *cur;
2247 WARN_ON(*level < 0);
2248 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2250 ret = update_nodes_refs(root, path->nodes[*level]->start,
2255 while (*level >= 0) {
2256 WARN_ON(*level < 0);
2257 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2258 cur = path->nodes[*level];
2260 if (btrfs_header_level(cur) != *level)
2263 if (path->slots[*level] >= btrfs_header_nritems(cur))
2265 /* Don't forgot to check leaf/node validation */
2267 ret = btrfs_check_leaf(root, NULL, cur);
2268 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2272 ret = process_one_leaf_v2(root, path, nrefs,
2276 ret = btrfs_check_node(root, NULL, cur);
2277 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2282 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2283 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2284 blocksize = root->nodesize;
2286 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2289 if (!nrefs->need_check[*level - 1]) {
2290 path->slots[*level]++;
2294 next = btrfs_find_tree_block(root, bytenr, blocksize);
2295 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2296 free_extent_buffer(next);
2297 reada_walk_down(root, cur, path->slots[*level]);
2298 next = read_tree_block(root, bytenr, blocksize,
2300 if (!extent_buffer_uptodate(next)) {
2301 struct btrfs_key node_key;
2303 btrfs_node_key_to_cpu(path->nodes[*level],
2305 path->slots[*level]);
2306 btrfs_add_corrupt_extent_record(root->fs_info,
2308 path->nodes[*level]->start,
2309 root->nodesize, *level);
2315 ret = check_child_node(cur, path->slots[*level], next);
2319 if (btrfs_is_leaf(next))
2320 status = btrfs_check_leaf(root, NULL, next);
2322 status = btrfs_check_node(root, NULL, next);
2323 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2324 free_extent_buffer(next);
2329 *level = *level - 1;
2330 free_extent_buffer(path->nodes[*level]);
2331 path->nodes[*level] = next;
2332 path->slots[*level] = 0;
2337 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2338 struct walk_control *wc, int *level)
2341 struct extent_buffer *leaf;
2343 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2344 leaf = path->nodes[i];
2345 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2350 free_extent_buffer(path->nodes[*level]);
2351 path->nodes[*level] = NULL;
2352 BUG_ON(*level > wc->active_node);
2353 if (*level == wc->active_node)
2354 leave_shared_node(root, wc, *level);
2361 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2365 struct extent_buffer *leaf;
2367 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2368 leaf = path->nodes[i];
2369 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2374 free_extent_buffer(path->nodes[*level]);
2375 path->nodes[*level] = NULL;
2382 static int check_root_dir(struct inode_record *rec)
2384 struct inode_backref *backref;
2387 if (!rec->found_inode_item || rec->errors)
2389 if (rec->nlink != 1 || rec->found_link != 0)
2391 if (list_empty(&rec->backrefs))
2393 backref = to_inode_backref(rec->backrefs.next);
2394 if (!backref->found_inode_ref)
2396 if (backref->index != 0 || backref->namelen != 2 ||
2397 memcmp(backref->name, "..", 2))
2399 if (backref->found_dir_index || backref->found_dir_item)
2406 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2407 struct btrfs_root *root, struct btrfs_path *path,
2408 struct inode_record *rec)
2410 struct btrfs_inode_item *ei;
2411 struct btrfs_key key;
2414 key.objectid = rec->ino;
2415 key.type = BTRFS_INODE_ITEM_KEY;
2416 key.offset = (u64)-1;
2418 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2422 if (!path->slots[0]) {
2429 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2430 if (key.objectid != rec->ino) {
2435 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2436 struct btrfs_inode_item);
2437 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2438 btrfs_mark_buffer_dirty(path->nodes[0]);
2439 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2440 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2441 root->root_key.objectid);
2443 btrfs_release_path(path);
2447 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2448 struct btrfs_root *root,
2449 struct btrfs_path *path,
2450 struct inode_record *rec)
2454 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2455 btrfs_release_path(path);
2457 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2461 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2462 struct btrfs_root *root,
2463 struct btrfs_path *path,
2464 struct inode_record *rec)
2466 struct btrfs_inode_item *ei;
2467 struct btrfs_key key;
2470 key.objectid = rec->ino;
2471 key.type = BTRFS_INODE_ITEM_KEY;
2474 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2481 /* Since ret == 0, no need to check anything */
2482 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2483 struct btrfs_inode_item);
2484 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2485 btrfs_mark_buffer_dirty(path->nodes[0]);
2486 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2487 printf("reset nbytes for ino %llu root %llu\n",
2488 rec->ino, root->root_key.objectid);
2490 btrfs_release_path(path);
2494 static int add_missing_dir_index(struct btrfs_root *root,
2495 struct cache_tree *inode_cache,
2496 struct inode_record *rec,
2497 struct inode_backref *backref)
2499 struct btrfs_path path;
2500 struct btrfs_trans_handle *trans;
2501 struct btrfs_dir_item *dir_item;
2502 struct extent_buffer *leaf;
2503 struct btrfs_key key;
2504 struct btrfs_disk_key disk_key;
2505 struct inode_record *dir_rec;
2506 unsigned long name_ptr;
2507 u32 data_size = sizeof(*dir_item) + backref->namelen;
2510 trans = btrfs_start_transaction(root, 1);
2512 return PTR_ERR(trans);
2514 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2515 (unsigned long long)rec->ino);
2517 btrfs_init_path(&path);
2518 key.objectid = backref->dir;
2519 key.type = BTRFS_DIR_INDEX_KEY;
2520 key.offset = backref->index;
2521 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2524 leaf = path.nodes[0];
2525 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2527 disk_key.objectid = cpu_to_le64(rec->ino);
2528 disk_key.type = BTRFS_INODE_ITEM_KEY;
2529 disk_key.offset = 0;
2531 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2532 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2533 btrfs_set_dir_data_len(leaf, dir_item, 0);
2534 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2535 name_ptr = (unsigned long)(dir_item + 1);
2536 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2537 btrfs_mark_buffer_dirty(leaf);
2538 btrfs_release_path(&path);
2539 btrfs_commit_transaction(trans, root);
2541 backref->found_dir_index = 1;
2542 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2543 BUG_ON(IS_ERR(dir_rec));
2546 dir_rec->found_size += backref->namelen;
2547 if (dir_rec->found_size == dir_rec->isize &&
2548 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2549 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2550 if (dir_rec->found_size != dir_rec->isize)
2551 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2556 static int delete_dir_index(struct btrfs_root *root,
2557 struct inode_backref *backref)
2559 struct btrfs_trans_handle *trans;
2560 struct btrfs_dir_item *di;
2561 struct btrfs_path path;
2564 trans = btrfs_start_transaction(root, 1);
2566 return PTR_ERR(trans);
2568 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2569 (unsigned long long)backref->dir,
2570 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2571 (unsigned long long)root->objectid);
2573 btrfs_init_path(&path);
2574 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2575 backref->name, backref->namelen,
2576 backref->index, -1);
2579 btrfs_release_path(&path);
2580 btrfs_commit_transaction(trans, root);
2587 ret = btrfs_del_item(trans, root, &path);
2589 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2591 btrfs_release_path(&path);
2592 btrfs_commit_transaction(trans, root);
2596 static int create_inode_item(struct btrfs_root *root,
2597 struct inode_record *rec,
2600 struct btrfs_trans_handle *trans;
2601 struct btrfs_inode_item inode_item;
2602 time_t now = time(NULL);
2605 trans = btrfs_start_transaction(root, 1);
2606 if (IS_ERR(trans)) {
2607 ret = PTR_ERR(trans);
2611 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2612 "be incomplete, please check permissions and content after "
2613 "the fsck completes.\n", (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2616 memset(&inode_item, 0, sizeof(inode_item));
2617 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2619 btrfs_set_stack_inode_nlink(&inode_item, 1);
2621 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2622 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2623 if (rec->found_dir_item) {
2624 if (rec->found_file_extent)
2625 fprintf(stderr, "root %llu inode %llu has both a dir "
2626 "item and extents, unsure if it is a dir or a "
2627 "regular file so setting it as a directory\n",
2628 (unsigned long long)root->objectid,
2629 (unsigned long long)rec->ino);
2630 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2631 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2632 } else if (!rec->found_dir_item) {
2633 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2634 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2636 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2637 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2638 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2639 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2640 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2641 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2642 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2643 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2645 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2647 btrfs_commit_transaction(trans, root);
2651 static int repair_inode_backrefs(struct btrfs_root *root,
2652 struct inode_record *rec,
2653 struct cache_tree *inode_cache,
2656 struct inode_backref *tmp, *backref;
2657 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2661 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2662 if (!delete && rec->ino == root_dirid) {
2663 if (!rec->found_inode_item) {
2664 ret = create_inode_item(root, rec, 1);
2671 /* Index 0 for root dir's are special, don't mess with it */
2672 if (rec->ino == root_dirid && backref->index == 0)
2676 ((backref->found_dir_index && !backref->found_inode_ref) ||
2677 (backref->found_dir_index && backref->found_inode_ref &&
2678 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2679 ret = delete_dir_index(root, backref);
2683 list_del(&backref->list);
2688 if (!delete && !backref->found_dir_index &&
2689 backref->found_dir_item && backref->found_inode_ref) {
2690 ret = add_missing_dir_index(root, inode_cache, rec,
2695 if (backref->found_dir_item &&
2696 backref->found_dir_index) {
2697 if (!backref->errors &&
2698 backref->found_inode_ref) {
2699 list_del(&backref->list);
2706 if (!delete && (!backref->found_dir_index &&
2707 !backref->found_dir_item &&
2708 backref->found_inode_ref)) {
2709 struct btrfs_trans_handle *trans;
2710 struct btrfs_key location;
2712 ret = check_dir_conflict(root, backref->name,
2718 * let nlink fixing routine to handle it,
2719 * which can do it better.
2724 location.objectid = rec->ino;
2725 location.type = BTRFS_INODE_ITEM_KEY;
2726 location.offset = 0;
2728 trans = btrfs_start_transaction(root, 1);
2729 if (IS_ERR(trans)) {
2730 ret = PTR_ERR(trans);
2733 fprintf(stderr, "adding missing dir index/item pair "
2735 (unsigned long long)rec->ino);
2736 ret = btrfs_insert_dir_item(trans, root, backref->name,
2738 backref->dir, &location,
2739 imode_to_type(rec->imode),
2742 btrfs_commit_transaction(trans, root);
2746 if (!delete && (backref->found_inode_ref &&
2747 backref->found_dir_index &&
2748 backref->found_dir_item &&
2749 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2750 !rec->found_inode_item)) {
2751 ret = create_inode_item(root, rec, 0);
2758 return ret ? ret : repaired;
2762 * To determine the file type for nlink/inode_item repair
2764 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2765 * Return -ENOENT if file type is not found.
2767 static int find_file_type(struct inode_record *rec, u8 *type)
2769 struct inode_backref *backref;
2771 /* For inode item recovered case */
2772 if (rec->found_inode_item) {
2773 *type = imode_to_type(rec->imode);
2777 list_for_each_entry(backref, &rec->backrefs, list) {
2778 if (backref->found_dir_index || backref->found_dir_item) {
2779 *type = backref->filetype;
2787 * To determine the file name for nlink repair
2789 * Return 0 if file name is found, set name and namelen.
2790 * Return -ENOENT if file name is not found.
2792 static int find_file_name(struct inode_record *rec,
2793 char *name, int *namelen)
2795 struct inode_backref *backref;
2797 list_for_each_entry(backref, &rec->backrefs, list) {
2798 if (backref->found_dir_index || backref->found_dir_item ||
2799 backref->found_inode_ref) {
2800 memcpy(name, backref->name, backref->namelen);
2801 *namelen = backref->namelen;
2808 /* Reset the nlink of the inode to the correct one */
2809 static int reset_nlink(struct btrfs_trans_handle *trans,
2810 struct btrfs_root *root,
2811 struct btrfs_path *path,
2812 struct inode_record *rec)
2814 struct inode_backref *backref;
2815 struct inode_backref *tmp;
2816 struct btrfs_key key;
2817 struct btrfs_inode_item *inode_item;
2820 /* We don't believe this either, reset it and iterate backref */
2821 rec->found_link = 0;
2823 /* Remove all backref including the valid ones */
2824 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2825 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2826 backref->index, backref->name,
2827 backref->namelen, 0);
2831 /* remove invalid backref, so it won't be added back */
2832 if (!(backref->found_dir_index &&
2833 backref->found_dir_item &&
2834 backref->found_inode_ref)) {
2835 list_del(&backref->list);
2842 /* Set nlink to 0 */
2843 key.objectid = rec->ino;
2844 key.type = BTRFS_INODE_ITEM_KEY;
2846 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2853 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2854 struct btrfs_inode_item);
2855 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2856 btrfs_mark_buffer_dirty(path->nodes[0]);
2857 btrfs_release_path(path);
2860 * Add back valid inode_ref/dir_item/dir_index,
2861 * add_link() will handle the nlink inc, so new nlink must be correct
2863 list_for_each_entry(backref, &rec->backrefs, list) {
2864 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2865 backref->name, backref->namelen,
2866 backref->filetype, &backref->index, 1);
2871 btrfs_release_path(path);
2875 static int get_highest_inode(struct btrfs_trans_handle *trans,
2876 struct btrfs_root *root,
2877 struct btrfs_path *path,
2880 struct btrfs_key key, found_key;
2883 btrfs_init_path(path);
2884 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2886 key.type = BTRFS_INODE_ITEM_KEY;
2887 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2889 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2890 path->slots[0] - 1);
2891 *highest_ino = found_key.objectid;
2894 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2896 btrfs_release_path(path);
2900 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2901 struct btrfs_root *root,
2902 struct btrfs_path *path,
2903 struct inode_record *rec)
2905 char *dir_name = "lost+found";
2906 char namebuf[BTRFS_NAME_LEN] = {0};
2911 int name_recovered = 0;
2912 int type_recovered = 0;
2916 * Get file name and type first before these invalid inode ref
2917 * are deleted by remove_all_invalid_backref()
2919 name_recovered = !find_file_name(rec, namebuf, &namelen);
2920 type_recovered = !find_file_type(rec, &type);
2922 if (!name_recovered) {
2923 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2924 rec->ino, rec->ino);
2925 namelen = count_digits(rec->ino);
2926 sprintf(namebuf, "%llu", rec->ino);
2929 if (!type_recovered) {
2930 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2932 type = BTRFS_FT_REG_FILE;
2936 ret = reset_nlink(trans, root, path, rec);
2939 "Failed to reset nlink for inode %llu: %s\n",
2940 rec->ino, strerror(-ret));
2944 if (rec->found_link == 0) {
2945 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2949 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2950 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2953 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2954 dir_name, strerror(-ret));
2957 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2958 namebuf, namelen, type, NULL, 1);
2960 * Add ".INO" suffix several times to handle case where
2961 * "FILENAME.INO" is already taken by another file.
2963 while (ret == -EEXIST) {
2965 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2967 if (namelen + count_digits(rec->ino) + 1 >
2972 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2974 namelen += count_digits(rec->ino) + 1;
2975 ret = btrfs_add_link(trans, root, rec->ino,
2976 lost_found_ino, namebuf,
2977 namelen, type, NULL, 1);
2981 "Failed to link the inode %llu to %s dir: %s\n",
2982 rec->ino, dir_name, strerror(-ret));
2986 * Just increase the found_link, don't actually add the
2987 * backref. This will make things easier and this inode
2988 * record will be freed after the repair is done.
2989 * So fsck will not report problem about this inode.
2992 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2993 namelen, namebuf, dir_name);
2995 printf("Fixed the nlink of inode %llu\n", rec->ino);
2998 * Clear the flag anyway, or we will loop forever for the same inode
2999 * as it will not be removed from the bad inode list and the dead loop
3002 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3003 btrfs_release_path(path);
3008 * Check if there is any normal(reg or prealloc) file extent for given
3010 * This is used to determine the file type when neither its dir_index/item or
3011 * inode_item exists.
3013 * This will *NOT* report error, if any error happens, just consider it does
3014 * not have any normal file extent.
3016 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3018 struct btrfs_path path;
3019 struct btrfs_key key;
3020 struct btrfs_key found_key;
3021 struct btrfs_file_extent_item *fi;
3025 btrfs_init_path(&path);
3027 key.type = BTRFS_EXTENT_DATA_KEY;
3030 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3035 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3036 ret = btrfs_next_leaf(root, &path);
3043 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3045 if (found_key.objectid != ino ||
3046 found_key.type != BTRFS_EXTENT_DATA_KEY)
3048 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3049 struct btrfs_file_extent_item);
3050 type = btrfs_file_extent_type(path.nodes[0], fi);
3051 if (type != BTRFS_FILE_EXTENT_INLINE) {
3057 btrfs_release_path(&path);
3061 static u32 btrfs_type_to_imode(u8 type)
3063 static u32 imode_by_btrfs_type[] = {
3064 [BTRFS_FT_REG_FILE] = S_IFREG,
3065 [BTRFS_FT_DIR] = S_IFDIR,
3066 [BTRFS_FT_CHRDEV] = S_IFCHR,
3067 [BTRFS_FT_BLKDEV] = S_IFBLK,
3068 [BTRFS_FT_FIFO] = S_IFIFO,
3069 [BTRFS_FT_SOCK] = S_IFSOCK,
3070 [BTRFS_FT_SYMLINK] = S_IFLNK,
3073 return imode_by_btrfs_type[(type)];
3076 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3077 struct btrfs_root *root,
3078 struct btrfs_path *path,
3079 struct inode_record *rec)
3083 int type_recovered = 0;
3086 printf("Trying to rebuild inode:%llu\n", rec->ino);
3088 type_recovered = !find_file_type(rec, &filetype);
3091 * Try to determine inode type if type not found.
3093 * For found regular file extent, it must be FILE.
3094 * For found dir_item/index, it must be DIR.
3096 * For undetermined one, use FILE as fallback.
3099 * 1. If found backref(inode_index/item is already handled) to it,
3101 * Need new inode-inode ref structure to allow search for that.
3103 if (!type_recovered) {
3104 if (rec->found_file_extent &&
3105 find_normal_file_extent(root, rec->ino)) {
3107 filetype = BTRFS_FT_REG_FILE;
3108 } else if (rec->found_dir_item) {
3110 filetype = BTRFS_FT_DIR;
3111 } else if (!list_empty(&rec->orphan_extents)) {
3113 filetype = BTRFS_FT_REG_FILE;
3115 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3118 filetype = BTRFS_FT_REG_FILE;
3122 ret = btrfs_new_inode(trans, root, rec->ino,
3123 mode | btrfs_type_to_imode(filetype));
3128 * Here inode rebuild is done, we only rebuild the inode item,
3129 * don't repair the nlink(like move to lost+found).
3130 * That is the job of nlink repair.
3132 * We just fill the record and return
3134 rec->found_dir_item = 1;
3135 rec->imode = mode | btrfs_type_to_imode(filetype);
3137 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3138 /* Ensure the inode_nlinks repair function will be called */
3139 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3144 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3145 struct btrfs_root *root,
3146 struct btrfs_path *path,
3147 struct inode_record *rec)
3149 struct orphan_data_extent *orphan;
3150 struct orphan_data_extent *tmp;
3153 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3155 * Check for conflicting file extents
3157 * Here we don't know whether the extents is compressed or not,
3158 * so we can only assume it not compressed nor data offset,
3159 * and use its disk_len as extent length.
3161 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3162 orphan->offset, orphan->disk_len, 0);
3163 btrfs_release_path(path);
3168 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3169 orphan->disk_bytenr, orphan->disk_len);
3170 ret = btrfs_free_extent(trans,
3171 root->fs_info->extent_root,
3172 orphan->disk_bytenr, orphan->disk_len,
3173 0, root->objectid, orphan->objectid,
3178 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3179 orphan->offset, orphan->disk_bytenr,
3180 orphan->disk_len, orphan->disk_len);
3184 /* Update file size info */
3185 rec->found_size += orphan->disk_len;
3186 if (rec->found_size == rec->nbytes)
3187 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3189 /* Update the file extent hole info too */
3190 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3194 if (RB_EMPTY_ROOT(&rec->holes))
3195 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3197 list_del(&orphan->list);
3200 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3205 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3206 struct btrfs_root *root,
3207 struct btrfs_path *path,
3208 struct inode_record *rec)
3210 struct rb_node *node;
3211 struct file_extent_hole *hole;
3215 node = rb_first(&rec->holes);
3219 hole = rb_entry(node, struct file_extent_hole, node);
3220 ret = btrfs_punch_hole(trans, root, rec->ino,
3221 hole->start, hole->len);
3224 ret = del_file_extent_hole(&rec->holes, hole->start,
3228 if (RB_EMPTY_ROOT(&rec->holes))
3229 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3230 node = rb_first(&rec->holes);
3232 /* special case for a file losing all its file extent */
3234 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3235 round_up(rec->isize, root->sectorsize));
3239 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3240 rec->ino, root->objectid);
3245 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3247 struct btrfs_trans_handle *trans;
3248 struct btrfs_path path;
3251 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3252 I_ERR_NO_ORPHAN_ITEM |
3253 I_ERR_LINK_COUNT_WRONG |
3254 I_ERR_NO_INODE_ITEM |
3255 I_ERR_FILE_EXTENT_ORPHAN |
3256 I_ERR_FILE_EXTENT_DISCOUNT|
3257 I_ERR_FILE_NBYTES_WRONG)))
3261 * For nlink repair, it may create a dir and add link, so
3262 * 2 for parent(256)'s dir_index and dir_item
3263 * 2 for lost+found dir's inode_item and inode_ref
3264 * 1 for the new inode_ref of the file
3265 * 2 for lost+found dir's dir_index and dir_item for the file
3267 trans = btrfs_start_transaction(root, 7);
3269 return PTR_ERR(trans);
3271 btrfs_init_path(&path);
3272 if (rec->errors & I_ERR_NO_INODE_ITEM)
3273 ret = repair_inode_no_item(trans, root, &path, rec);
3274 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3275 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3276 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3277 ret = repair_inode_discount_extent(trans, root, &path, rec);
3278 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3279 ret = repair_inode_isize(trans, root, &path, rec);
3280 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3281 ret = repair_inode_orphan_item(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3283 ret = repair_inode_nlinks(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3285 ret = repair_inode_nbytes(trans, root, &path, rec);
3286 btrfs_commit_transaction(trans, root);
3287 btrfs_release_path(&path);
3291 static int check_inode_recs(struct btrfs_root *root,
3292 struct cache_tree *inode_cache)
3294 struct cache_extent *cache;
3295 struct ptr_node *node;
3296 struct inode_record *rec;
3297 struct inode_backref *backref;
3302 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3304 if (btrfs_root_refs(&root->root_item) == 0) {
3305 if (!cache_tree_empty(inode_cache))
3306 fprintf(stderr, "warning line %d\n", __LINE__);
3311 * We need to repair backrefs first because we could change some of the
3312 * errors in the inode recs.
3314 * We also need to go through and delete invalid backrefs first and then
3315 * add the correct ones second. We do this because we may get EEXIST
3316 * when adding back the correct index because we hadn't yet deleted the
3319 * For example, if we were missing a dir index then the directories
3320 * isize would be wrong, so if we fixed the isize to what we thought it
3321 * would be and then fixed the backref we'd still have a invalid fs, so
3322 * we need to add back the dir index and then check to see if the isize
3327 if (stage == 3 && !err)
3330 cache = search_cache_extent(inode_cache, 0);
3331 while (repair && cache) {
3332 node = container_of(cache, struct ptr_node, cache);
3334 cache = next_cache_extent(cache);
3336 /* Need to free everything up and rescan */
3338 remove_cache_extent(inode_cache, &node->cache);
3340 free_inode_rec(rec);
3344 if (list_empty(&rec->backrefs))
3347 ret = repair_inode_backrefs(root, rec, inode_cache,
3361 rec = get_inode_rec(inode_cache, root_dirid, 0);
3362 BUG_ON(IS_ERR(rec));
3364 ret = check_root_dir(rec);
3366 fprintf(stderr, "root %llu root dir %llu error\n",
3367 (unsigned long long)root->root_key.objectid,
3368 (unsigned long long)root_dirid);
3369 print_inode_error(root, rec);
3374 struct btrfs_trans_handle *trans;
3376 trans = btrfs_start_transaction(root, 1);
3377 if (IS_ERR(trans)) {
3378 err = PTR_ERR(trans);
3383 "root %llu missing its root dir, recreating\n",
3384 (unsigned long long)root->objectid);
3386 ret = btrfs_make_root_dir(trans, root, root_dirid);
3389 btrfs_commit_transaction(trans, root);
3393 fprintf(stderr, "root %llu root dir %llu not found\n",
3394 (unsigned long long)root->root_key.objectid,
3395 (unsigned long long)root_dirid);
3399 cache = search_cache_extent(inode_cache, 0);
3402 node = container_of(cache, struct ptr_node, cache);
3404 remove_cache_extent(inode_cache, &node->cache);
3406 if (rec->ino == root_dirid ||
3407 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3408 free_inode_rec(rec);
3412 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3413 ret = check_orphan_item(root, rec->ino);
3415 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3416 if (can_free_inode_rec(rec)) {
3417 free_inode_rec(rec);
3422 if (!rec->found_inode_item)
3423 rec->errors |= I_ERR_NO_INODE_ITEM;
3424 if (rec->found_link != rec->nlink)
3425 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3427 ret = try_repair_inode(root, rec);
3428 if (ret == 0 && can_free_inode_rec(rec)) {
3429 free_inode_rec(rec);
3435 if (!(repair && ret == 0))
3437 print_inode_error(root, rec);
3438 list_for_each_entry(backref, &rec->backrefs, list) {
3439 if (!backref->found_dir_item)
3440 backref->errors |= REF_ERR_NO_DIR_ITEM;
3441 if (!backref->found_dir_index)
3442 backref->errors |= REF_ERR_NO_DIR_INDEX;
3443 if (!backref->found_inode_ref)
3444 backref->errors |= REF_ERR_NO_INODE_REF;
3445 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3446 " namelen %u name %s filetype %d errors %x",
3447 (unsigned long long)backref->dir,
3448 (unsigned long long)backref->index,
3449 backref->namelen, backref->name,
3450 backref->filetype, backref->errors);
3451 print_ref_error(backref->errors);
3453 free_inode_rec(rec);
3455 return (error > 0) ? -1 : 0;
3458 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3461 struct cache_extent *cache;
3462 struct root_record *rec = NULL;
3465 cache = lookup_cache_extent(root_cache, objectid, 1);
3467 rec = container_of(cache, struct root_record, cache);
3469 rec = calloc(1, sizeof(*rec));
3471 return ERR_PTR(-ENOMEM);
3472 rec->objectid = objectid;
3473 INIT_LIST_HEAD(&rec->backrefs);
3474 rec->cache.start = objectid;
3475 rec->cache.size = 1;
3477 ret = insert_cache_extent(root_cache, &rec->cache);
3479 return ERR_PTR(-EEXIST);
3484 static struct root_backref *get_root_backref(struct root_record *rec,
3485 u64 ref_root, u64 dir, u64 index,
3486 const char *name, int namelen)
3488 struct root_backref *backref;
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (backref->ref_root != ref_root || backref->dir != dir ||
3492 backref->namelen != namelen)
3494 if (memcmp(name, backref->name, namelen))
3499 backref = calloc(1, sizeof(*backref) + namelen + 1);
3502 backref->ref_root = ref_root;
3504 backref->index = index;
3505 backref->namelen = namelen;
3506 memcpy(backref->name, name, namelen);
3507 backref->name[namelen] = '\0';
3508 list_add_tail(&backref->list, &rec->backrefs);
3512 static void free_root_record(struct cache_extent *cache)
3514 struct root_record *rec;
3515 struct root_backref *backref;
3517 rec = container_of(cache, struct root_record, cache);
3518 while (!list_empty(&rec->backrefs)) {
3519 backref = to_root_backref(rec->backrefs.next);
3520 list_del(&backref->list);
3527 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3529 static int add_root_backref(struct cache_tree *root_cache,
3530 u64 root_id, u64 ref_root, u64 dir, u64 index,
3531 const char *name, int namelen,
3532 int item_type, int errors)
3534 struct root_record *rec;
3535 struct root_backref *backref;
3537 rec = get_root_rec(root_cache, root_id);
3538 BUG_ON(IS_ERR(rec));
3539 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3542 backref->errors |= errors;
3544 if (item_type != BTRFS_DIR_ITEM_KEY) {
3545 if (backref->found_dir_index || backref->found_back_ref ||
3546 backref->found_forward_ref) {
3547 if (backref->index != index)
3548 backref->errors |= REF_ERR_INDEX_UNMATCH;
3550 backref->index = index;
3554 if (item_type == BTRFS_DIR_ITEM_KEY) {
3555 if (backref->found_forward_ref)
3557 backref->found_dir_item = 1;
3558 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3559 backref->found_dir_index = 1;
3560 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3561 if (backref->found_forward_ref)
3562 backref->errors |= REF_ERR_DUP_ROOT_REF;
3563 else if (backref->found_dir_item)
3565 backref->found_forward_ref = 1;
3566 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3567 if (backref->found_back_ref)
3568 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3569 backref->found_back_ref = 1;
3574 if (backref->found_forward_ref && backref->found_dir_item)
3575 backref->reachable = 1;
3579 static int merge_root_recs(struct btrfs_root *root,
3580 struct cache_tree *src_cache,
3581 struct cache_tree *dst_cache)
3583 struct cache_extent *cache;
3584 struct ptr_node *node;
3585 struct inode_record *rec;
3586 struct inode_backref *backref;
3589 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3590 free_inode_recs_tree(src_cache);
3595 cache = search_cache_extent(src_cache, 0);
3598 node = container_of(cache, struct ptr_node, cache);
3600 remove_cache_extent(src_cache, &node->cache);
3603 ret = is_child_root(root, root->objectid, rec->ino);
3609 list_for_each_entry(backref, &rec->backrefs, list) {
3610 BUG_ON(backref->found_inode_ref);
3611 if (backref->found_dir_item)
3612 add_root_backref(dst_cache, rec->ino,
3613 root->root_key.objectid, backref->dir,
3614 backref->index, backref->name,
3615 backref->namelen, BTRFS_DIR_ITEM_KEY,
3617 if (backref->found_dir_index)
3618 add_root_backref(dst_cache, rec->ino,
3619 root->root_key.objectid, backref->dir,
3620 backref->index, backref->name,
3621 backref->namelen, BTRFS_DIR_INDEX_KEY,
3625 free_inode_rec(rec);
3632 static int check_root_refs(struct btrfs_root *root,
3633 struct cache_tree *root_cache)
3635 struct root_record *rec;
3636 struct root_record *ref_root;
3637 struct root_backref *backref;
3638 struct cache_extent *cache;
3644 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3645 BUG_ON(IS_ERR(rec));
3648 /* fixme: this can not detect circular references */
3651 cache = search_cache_extent(root_cache, 0);
3655 rec = container_of(cache, struct root_record, cache);
3656 cache = next_cache_extent(cache);
3658 if (rec->found_ref == 0)
3661 list_for_each_entry(backref, &rec->backrefs, list) {
3662 if (!backref->reachable)
3665 ref_root = get_root_rec(root_cache,
3667 BUG_ON(IS_ERR(ref_root));
3668 if (ref_root->found_ref > 0)
3671 backref->reachable = 0;
3673 if (rec->found_ref == 0)
3679 cache = search_cache_extent(root_cache, 0);
3683 rec = container_of(cache, struct root_record, cache);
3684 cache = next_cache_extent(cache);
3686 if (rec->found_ref == 0 &&
3687 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3688 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3689 ret = check_orphan_item(root->fs_info->tree_root,
3695 * If we don't have a root item then we likely just have
3696 * a dir item in a snapshot for this root but no actual
3697 * ref key or anything so it's meaningless.
3699 if (!rec->found_root_item)
3702 fprintf(stderr, "fs tree %llu not referenced\n",
3703 (unsigned long long)rec->objectid);
3707 if (rec->found_ref > 0 && !rec->found_root_item)
3709 list_for_each_entry(backref, &rec->backrefs, list) {
3710 if (!backref->found_dir_item)
3711 backref->errors |= REF_ERR_NO_DIR_ITEM;
3712 if (!backref->found_dir_index)
3713 backref->errors |= REF_ERR_NO_DIR_INDEX;
3714 if (!backref->found_back_ref)
3715 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3716 if (!backref->found_forward_ref)
3717 backref->errors |= REF_ERR_NO_ROOT_REF;
3718 if (backref->reachable && backref->errors)
3725 fprintf(stderr, "fs tree %llu refs %u %s\n",
3726 (unsigned long long)rec->objectid, rec->found_ref,
3727 rec->found_root_item ? "" : "not found");
3729 list_for_each_entry(backref, &rec->backrefs, list) {
3730 if (!backref->reachable)
3732 if (!backref->errors && rec->found_root_item)
3734 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3735 " index %llu namelen %u name %s errors %x\n",
3736 (unsigned long long)backref->ref_root,
3737 (unsigned long long)backref->dir,
3738 (unsigned long long)backref->index,
3739 backref->namelen, backref->name,
3741 print_ref_error(backref->errors);
3744 return errors > 0 ? 1 : 0;
3747 static int process_root_ref(struct extent_buffer *eb, int slot,
3748 struct btrfs_key *key,
3749 struct cache_tree *root_cache)
3755 struct btrfs_root_ref *ref;
3756 char namebuf[BTRFS_NAME_LEN];
3759 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3761 dirid = btrfs_root_ref_dirid(eb, ref);
3762 index = btrfs_root_ref_sequence(eb, ref);
3763 name_len = btrfs_root_ref_name_len(eb, ref);
3765 if (name_len <= BTRFS_NAME_LEN) {
3769 len = BTRFS_NAME_LEN;
3770 error = REF_ERR_NAME_TOO_LONG;
3772 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3774 if (key->type == BTRFS_ROOT_REF_KEY) {
3775 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3776 index, namebuf, len, key->type, error);
3778 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3779 index, namebuf, len, key->type, error);
3784 static void free_corrupt_block(struct cache_extent *cache)
3786 struct btrfs_corrupt_block *corrupt;
3788 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3792 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3795 * Repair the btree of the given root.
3797 * The fix is to remove the node key in corrupt_blocks cache_tree.
3798 * and rebalance the tree.
3799 * After the fix, the btree should be writeable.
3801 static int repair_btree(struct btrfs_root *root,
3802 struct cache_tree *corrupt_blocks)
3804 struct btrfs_trans_handle *trans;
3805 struct btrfs_path path;
3806 struct btrfs_corrupt_block *corrupt;
3807 struct cache_extent *cache;
3808 struct btrfs_key key;
3813 if (cache_tree_empty(corrupt_blocks))
3816 trans = btrfs_start_transaction(root, 1);
3817 if (IS_ERR(trans)) {
3818 ret = PTR_ERR(trans);
3819 fprintf(stderr, "Error starting transaction: %s\n",
3823 btrfs_init_path(&path);
3824 cache = first_cache_extent(corrupt_blocks);
3826 corrupt = container_of(cache, struct btrfs_corrupt_block,
3828 level = corrupt->level;
3829 path.lowest_level = level;
3830 key.objectid = corrupt->key.objectid;
3831 key.type = corrupt->key.type;
3832 key.offset = corrupt->key.offset;
3835 * Here we don't want to do any tree balance, since it may
3836 * cause a balance with corrupted brother leaf/node,
3837 * so ins_len set to 0 here.
3838 * Balance will be done after all corrupt node/leaf is deleted.
3840 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3843 offset = btrfs_node_blockptr(path.nodes[level],
3846 /* Remove the ptr */
3847 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3851 * Remove the corresponding extent
3852 * return value is not concerned.
3854 btrfs_release_path(&path);
3855 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3856 0, root->root_key.objectid,
3858 cache = next_cache_extent(cache);
3861 /* Balance the btree using btrfs_search_slot() */
3862 cache = first_cache_extent(corrupt_blocks);
3864 corrupt = container_of(cache, struct btrfs_corrupt_block,
3866 memcpy(&key, &corrupt->key, sizeof(key));
3867 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3870 /* return will always >0 since it won't find the item */
3872 btrfs_release_path(&path);
3873 cache = next_cache_extent(cache);
3876 btrfs_commit_transaction(trans, root);
3877 btrfs_release_path(&path);
3881 static int check_fs_root(struct btrfs_root *root,
3882 struct cache_tree *root_cache,
3883 struct walk_control *wc)
3889 struct btrfs_path path;
3890 struct shared_node root_node;
3891 struct root_record *rec;
3892 struct btrfs_root_item *root_item = &root->root_item;
3893 struct cache_tree corrupt_blocks;
3894 struct orphan_data_extent *orphan;
3895 struct orphan_data_extent *tmp;
3896 enum btrfs_tree_block_status status;
3897 struct node_refs nrefs;
3900 * Reuse the corrupt_block cache tree to record corrupted tree block
3902 * Unlike the usage in extent tree check, here we do it in a per
3903 * fs/subvol tree base.
3905 cache_tree_init(&corrupt_blocks);
3906 root->fs_info->corrupt_blocks = &corrupt_blocks;
3908 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3909 rec = get_root_rec(root_cache, root->root_key.objectid);
3910 BUG_ON(IS_ERR(rec));
3911 if (btrfs_root_refs(root_item) > 0)
3912 rec->found_root_item = 1;
3915 btrfs_init_path(&path);
3916 memset(&root_node, 0, sizeof(root_node));
3917 cache_tree_init(&root_node.root_cache);
3918 cache_tree_init(&root_node.inode_cache);
3919 memset(&nrefs, 0, sizeof(nrefs));
3921 /* Move the orphan extent record to corresponding inode_record */
3922 list_for_each_entry_safe(orphan, tmp,
3923 &root->orphan_data_extents, list) {
3924 struct inode_record *inode;
3926 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3928 BUG_ON(IS_ERR(inode));
3929 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3930 list_move(&orphan->list, &inode->orphan_extents);
3933 level = btrfs_header_level(root->node);
3934 memset(wc->nodes, 0, sizeof(wc->nodes));
3935 wc->nodes[level] = &root_node;
3936 wc->active_node = level;
3937 wc->root_level = level;
3939 /* We may not have checked the root block, lets do that now */
3940 if (btrfs_is_leaf(root->node))
3941 status = btrfs_check_leaf(root, NULL, root->node);
3943 status = btrfs_check_node(root, NULL, root->node);
3944 if (status != BTRFS_TREE_BLOCK_CLEAN)
3947 if (btrfs_root_refs(root_item) > 0 ||
3948 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3949 path.nodes[level] = root->node;
3950 extent_buffer_get(root->node);
3951 path.slots[level] = 0;
3953 struct btrfs_key key;
3954 struct btrfs_disk_key found_key;
3956 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3957 level = root_item->drop_level;
3958 path.lowest_level = level;
3959 if (level > btrfs_header_level(root->node) ||
3960 level >= BTRFS_MAX_LEVEL) {
3961 error("ignoring invalid drop level: %u", level);
3964 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3967 btrfs_node_key(path.nodes[level], &found_key,
3969 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3970 sizeof(found_key)));
3974 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3980 wret = walk_up_tree(root, &path, wc, &level);
3987 btrfs_release_path(&path);
3989 if (!cache_tree_empty(&corrupt_blocks)) {
3990 struct cache_extent *cache;
3991 struct btrfs_corrupt_block *corrupt;
3993 printf("The following tree block(s) is corrupted in tree %llu:\n",
3994 root->root_key.objectid);
3995 cache = first_cache_extent(&corrupt_blocks);
3997 corrupt = container_of(cache,
3998 struct btrfs_corrupt_block,
4000 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4001 cache->start, corrupt->level,
4002 corrupt->key.objectid, corrupt->key.type,
4003 corrupt->key.offset);
4004 cache = next_cache_extent(cache);
4007 printf("Try to repair the btree for root %llu\n",
4008 root->root_key.objectid);
4009 ret = repair_btree(root, &corrupt_blocks);
4011 fprintf(stderr, "Failed to repair btree: %s\n",
4014 printf("Btree for root %llu is fixed\n",
4015 root->root_key.objectid);
4019 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4023 if (root_node.current) {
4024 root_node.current->checked = 1;
4025 maybe_free_inode_rec(&root_node.inode_cache,
4029 err = check_inode_recs(root, &root_node.inode_cache);
4033 free_corrupt_blocks_tree(&corrupt_blocks);
4034 root->fs_info->corrupt_blocks = NULL;
4035 free_orphan_data_extents(&root->orphan_data_extents);
4039 static int fs_root_objectid(u64 objectid)
4041 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4042 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4044 return is_fstree(objectid);
4047 static int check_fs_roots(struct btrfs_root *root,
4048 struct cache_tree *root_cache)
4050 struct btrfs_path path;
4051 struct btrfs_key key;
4052 struct walk_control wc;
4053 struct extent_buffer *leaf, *tree_node;
4054 struct btrfs_root *tmp_root;
4055 struct btrfs_root *tree_root = root->fs_info->tree_root;
4059 if (ctx.progress_enabled) {
4060 ctx.tp = TASK_FS_ROOTS;
4061 task_start(ctx.info);
4065 * Just in case we made any changes to the extent tree that weren't
4066 * reflected into the free space cache yet.
4069 reset_cached_block_groups(root->fs_info);
4070 memset(&wc, 0, sizeof(wc));
4071 cache_tree_init(&wc.shared);
4072 btrfs_init_path(&path);
4077 key.type = BTRFS_ROOT_ITEM_KEY;
4078 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4083 tree_node = tree_root->node;
4085 if (tree_node != tree_root->node) {
4086 free_root_recs_tree(root_cache);
4087 btrfs_release_path(&path);
4090 leaf = path.nodes[0];
4091 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4092 ret = btrfs_next_leaf(tree_root, &path);
4098 leaf = path.nodes[0];
4100 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4101 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4102 fs_root_objectid(key.objectid)) {
4103 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4104 tmp_root = btrfs_read_fs_root_no_cache(
4105 root->fs_info, &key);
4107 key.offset = (u64)-1;
4108 tmp_root = btrfs_read_fs_root(
4109 root->fs_info, &key);
4111 if (IS_ERR(tmp_root)) {
4115 ret = check_fs_root(tmp_root, root_cache, &wc);
4116 if (ret == -EAGAIN) {
4117 free_root_recs_tree(root_cache);
4118 btrfs_release_path(&path);
4123 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4124 btrfs_free_fs_root(tmp_root);
4125 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4126 key.type == BTRFS_ROOT_BACKREF_KEY) {
4127 process_root_ref(leaf, path.slots[0], &key,
4134 btrfs_release_path(&path);
4136 free_extent_cache_tree(&wc.shared);
4137 if (!cache_tree_empty(&wc.shared))
4138 fprintf(stderr, "warning line %d\n", __LINE__);
4140 task_stop(ctx.info);
4146 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4147 * INODE_REF/INODE_EXTREF match.
4149 * @root: the root of the fs/file tree
4150 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4151 * @key: the key of the DIR_ITEM/DIR_INDEX
4152 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4153 * distinguish root_dir between normal dir/file
4154 * @name: the name in the INODE_REF/INODE_EXTREF
4155 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4156 * @mode: the st_mode of INODE_ITEM
4158 * Return 0 if no error occurred.
4159 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4160 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4162 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4163 * not match for normal dir/file.
4165 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4166 struct btrfs_key *key, u64 index, char *name,
4167 u32 namelen, u32 mode)
4169 struct btrfs_path path;
4170 struct extent_buffer *node;
4171 struct btrfs_dir_item *di;
4172 struct btrfs_key location;
4173 char namebuf[BTRFS_NAME_LEN] = {0};
4183 btrfs_init_path(&path);
4184 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4186 ret = DIR_ITEM_MISSING;
4190 /* Process root dir and goto out*/
4193 ret = ROOT_DIR_ERROR;
4195 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4197 ref_key->type == BTRFS_INODE_REF_KEY ?
4199 ref_key->objectid, ref_key->offset,
4200 key->type == BTRFS_DIR_ITEM_KEY ?
4201 "DIR_ITEM" : "DIR_INDEX");
4209 /* Process normal file/dir */
4211 ret = DIR_ITEM_MISSING;
4213 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4215 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4216 ref_key->objectid, ref_key->offset,
4217 key->type == BTRFS_DIR_ITEM_KEY ?
4218 "DIR_ITEM" : "DIR_INDEX",
4219 key->objectid, key->offset, namelen, name,
4220 imode_to_type(mode));
4224 /* Check whether inode_id/filetype/name match */
4225 node = path.nodes[0];
4226 slot = path.slots[0];
4227 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4228 total = btrfs_item_size_nr(node, slot);
4229 while (cur < total) {
4230 ret = DIR_ITEM_MISMATCH;
4231 name_len = btrfs_dir_name_len(node, di);
4232 data_len = btrfs_dir_data_len(node, di);
4234 btrfs_dir_item_key_to_cpu(node, di, &location);
4235 if (location.objectid != ref_key->objectid ||
4236 location.type != BTRFS_INODE_ITEM_KEY ||
4237 location.offset != 0)
4240 filetype = btrfs_dir_type(node, di);
4241 if (imode_to_type(mode) != filetype)
4244 if (cur + sizeof(*di) + name_len > total ||
4245 name_len > BTRFS_NAME_LEN) {
4246 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4248 key->type == BTRFS_DIR_ITEM_KEY ?
4249 "DIR_ITEM" : "DIR_INDEX",
4250 key->objectid, key->offset, name_len);
4252 if (cur + sizeof(*di) > total)
4254 len = min_t(u32, total - cur - sizeof(*di),
4260 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4261 if (len != namelen || strncmp(namebuf, name, len))
4267 len = sizeof(*di) + name_len + data_len;
4268 di = (struct btrfs_dir_item *)((char *)di + len);
4271 if (ret == DIR_ITEM_MISMATCH)
4273 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4275 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4276 ref_key->objectid, ref_key->offset,
4277 key->type == BTRFS_DIR_ITEM_KEY ?
4278 "DIR_ITEM" : "DIR_INDEX",
4279 key->objectid, key->offset, namelen, name,
4280 imode_to_type(mode));
4282 btrfs_release_path(&path);
4287 * Traverse the given INODE_REF and call find_dir_item() to find related
4288 * DIR_ITEM/DIR_INDEX.
4290 * @root: the root of the fs/file tree
4291 * @ref_key: the key of the INODE_REF
4292 * @refs: the count of INODE_REF
4293 * @mode: the st_mode of INODE_ITEM
4295 * Return 0 if no error occurred.
4297 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4298 struct extent_buffer *node, int slot, u64 *refs,
4301 struct btrfs_key key;
4302 struct btrfs_inode_ref *ref;
4303 char namebuf[BTRFS_NAME_LEN] = {0};
4311 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4312 total = btrfs_item_size_nr(node, slot);
4315 /* Update inode ref count */
4318 index = btrfs_inode_ref_index(node, ref);
4319 name_len = btrfs_inode_ref_name_len(node, ref);
4320 if (cur + sizeof(*ref) + name_len > total ||
4321 name_len > BTRFS_NAME_LEN) {
4322 warning("root %llu INODE_REF[%llu %llu] name too long",
4323 root->objectid, ref_key->objectid, ref_key->offset);
4325 if (total < cur + sizeof(*ref))
4327 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4332 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4334 /* Check root dir ref name */
4335 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4336 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4337 root->objectid, ref_key->objectid, ref_key->offset,
4339 err |= ROOT_DIR_ERROR;
4342 /* Find related DIR_INDEX */
4343 key.objectid = ref_key->offset;
4344 key.type = BTRFS_DIR_INDEX_KEY;
4346 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4349 /* Find related dir_item */
4350 key.objectid = ref_key->offset;
4351 key.type = BTRFS_DIR_ITEM_KEY;
4352 key.offset = btrfs_name_hash(namebuf, len);
4353 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356 len = sizeof(*ref) + name_len;
4357 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4367 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4368 * DIR_ITEM/DIR_INDEX.
4370 * @root: the root of the fs/file tree
4371 * @ref_key: the key of the INODE_EXTREF
4372 * @refs: the count of INODE_EXTREF
4373 * @mode: the st_mode of INODE_ITEM
4375 * Return 0 if no error occurred.
4377 static int check_inode_extref(struct btrfs_root *root,
4378 struct btrfs_key *ref_key,
4379 struct extent_buffer *node, int slot, u64 *refs,
4382 struct btrfs_key key;
4383 struct btrfs_inode_extref *extref;
4384 char namebuf[BTRFS_NAME_LEN] = {0};
4394 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4395 total = btrfs_item_size_nr(node, slot);
4398 /* update inode ref count */
4400 name_len = btrfs_inode_extref_name_len(node, extref);
4401 index = btrfs_inode_extref_index(node, extref);
4402 parent = btrfs_inode_extref_parent(node, extref);
4403 if (name_len <= BTRFS_NAME_LEN) {
4406 len = BTRFS_NAME_LEN;
4407 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4408 root->objectid, ref_key->objectid, ref_key->offset);
4410 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4412 /* Check root dir ref name */
4413 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4414 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4415 root->objectid, ref_key->objectid, ref_key->offset,
4417 err |= ROOT_DIR_ERROR;
4420 /* find related dir_index */
4421 key.objectid = parent;
4422 key.type = BTRFS_DIR_INDEX_KEY;
4424 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4427 /* find related dir_item */
4428 key.objectid = parent;
4429 key.type = BTRFS_DIR_ITEM_KEY;
4430 key.offset = btrfs_name_hash(namebuf, len);
4431 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434 len = sizeof(*extref) + name_len;
4435 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4445 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4446 * DIR_ITEM/DIR_INDEX match.
4448 * @root: the root of the fs/file tree
4449 * @key: the key of the INODE_REF/INODE_EXTREF
4450 * @name: the name in the INODE_REF/INODE_EXTREF
4451 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4452 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4454 * @ext_ref: the EXTENDED_IREF feature
4456 * Return 0 if no error occurred.
4457 * Return >0 for error bitmap
4459 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4460 char *name, int namelen, u64 index,
4461 unsigned int ext_ref)
4463 struct btrfs_path path;
4464 struct btrfs_inode_ref *ref;
4465 struct btrfs_inode_extref *extref;
4466 struct extent_buffer *node;
4467 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4478 btrfs_init_path(&path);
4479 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4481 ret = INODE_REF_MISSING;
4485 node = path.nodes[0];
4486 slot = path.slots[0];
4488 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4489 total = btrfs_item_size_nr(node, slot);
4491 /* Iterate all entry of INODE_REF */
4492 while (cur < total) {
4493 ret = INODE_REF_MISSING;
4495 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4496 ref_index = btrfs_inode_ref_index(node, ref);
4497 if (index != (u64)-1 && index != ref_index)
4500 if (cur + sizeof(*ref) + ref_namelen > total ||
4501 ref_namelen > BTRFS_NAME_LEN) {
4502 warning("root %llu INODE %s[%llu %llu] name too long",
4504 key->type == BTRFS_INODE_REF_KEY ?
4506 key->objectid, key->offset);
4508 if (cur + sizeof(*ref) > total)
4510 len = min_t(u32, total - cur - sizeof(*ref),
4516 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4519 if (len != namelen || strncmp(ref_namebuf, name, len))
4525 len = sizeof(*ref) + ref_namelen;
4526 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4531 /* Skip if not support EXTENDED_IREF feature */
4535 btrfs_release_path(&path);
4536 btrfs_init_path(&path);
4538 dir_id = key->offset;
4539 key->type = BTRFS_INODE_EXTREF_KEY;
4540 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4542 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4544 ret = INODE_REF_MISSING;
4548 node = path.nodes[0];
4549 slot = path.slots[0];
4551 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4553 total = btrfs_item_size_nr(node, slot);
4555 /* Iterate all entry of INODE_EXTREF */
4556 while (cur < total) {
4557 ret = INODE_REF_MISSING;
4559 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4560 ref_index = btrfs_inode_extref_index(node, extref);
4561 parent = btrfs_inode_extref_parent(node, extref);
4562 if (index != (u64)-1 && index != ref_index)
4565 if (parent != dir_id)
4568 if (ref_namelen <= BTRFS_NAME_LEN) {
4571 len = BTRFS_NAME_LEN;
4572 warning("root %llu INODE %s[%llu %llu] name too long",
4574 key->type == BTRFS_INODE_REF_KEY ?
4576 key->objectid, key->offset);
4578 read_extent_buffer(node, ref_namebuf,
4579 (unsigned long)(extref + 1), len);
4581 if (len != namelen || strncmp(ref_namebuf, name, len))
4588 len = sizeof(*extref) + ref_namelen;
4589 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4594 btrfs_release_path(&path);
4599 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4600 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4602 * @root: the root of the fs/file tree
4603 * @key: the key of the INODE_REF/INODE_EXTREF
4604 * @size: the st_size of the INODE_ITEM
4605 * @ext_ref: the EXTENDED_IREF feature
4607 * Return 0 if no error occurred.
4609 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4610 struct extent_buffer *node, int slot, u64 *size,
4611 unsigned int ext_ref)
4613 struct btrfs_dir_item *di;
4614 struct btrfs_inode_item *ii;
4615 struct btrfs_path path;
4616 struct btrfs_key location;
4617 char namebuf[BTRFS_NAME_LEN] = {0};
4630 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4631 * ignore index check.
4633 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4635 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4636 total = btrfs_item_size_nr(node, slot);
4638 while (cur < total) {
4639 data_len = btrfs_dir_data_len(node, di);
4641 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4642 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643 "DIR_ITEM" : "DIR_INDEX",
4644 key->objectid, key->offset, data_len);
4646 name_len = btrfs_dir_name_len(node, di);
4647 if (cur + sizeof(*di) + name_len > total ||
4648 name_len > BTRFS_NAME_LEN) {
4649 warning("root %llu %s[%llu %llu] name too long",
4651 key->type == BTRFS_DIR_ITEM_KEY ?
4652 "DIR_ITEM" : "DIR_INDEX",
4653 key->objectid, key->offset);
4655 if (cur + sizeof(*di) > total)
4657 len = min_t(u32, total - cur - sizeof(*di),
4662 (*size) += name_len;
4664 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4665 filetype = btrfs_dir_type(node, di);
4667 btrfs_init_path(&path);
4668 btrfs_dir_item_key_to_cpu(node, di, &location);
4670 /* Ignore related ROOT_ITEM check */
4671 if (location.type == BTRFS_ROOT_ITEM_KEY)
4674 /* Check relative INODE_ITEM(existence/filetype) */
4675 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4677 err |= INODE_ITEM_MISSING;
4678 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4679 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4680 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4681 key->offset, location.objectid, name_len,
4686 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4687 struct btrfs_inode_item);
4688 mode = btrfs_inode_mode(path.nodes[0], ii);
4690 if (imode_to_type(mode) != filetype) {
4691 err |= INODE_ITEM_MISMATCH;
4692 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4693 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4694 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4695 key->offset, name_len, namebuf, filetype);
4698 /* Check relative INODE_REF/INODE_EXTREF */
4699 location.type = BTRFS_INODE_REF_KEY;
4700 location.offset = key->objectid;
4701 ret = find_inode_ref(root, &location, namebuf, len,
4704 if (ret & INODE_REF_MISSING)
4705 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4706 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4707 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4708 key->offset, name_len, namebuf, filetype);
4711 btrfs_release_path(&path);
4712 len = sizeof(*di) + name_len + data_len;
4713 di = (struct btrfs_dir_item *)((char *)di + len);
4716 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4717 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4718 root->objectid, key->objectid, key->offset);
4727 * Check file extent datasum/hole, update the size of the file extents,
4728 * check and update the last offset of the file extent.
4730 * @root: the root of fs/file tree.
4731 * @fkey: the key of the file extent.
4732 * @nodatasum: INODE_NODATASUM feature.
4733 * @size: the sum of all EXTENT_DATA items size for this inode.
4734 * @end: the offset of the last extent.
4736 * Return 0 if no error occurred.
4738 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4739 struct extent_buffer *node, int slot,
4740 unsigned int nodatasum, u64 *size, u64 *end)
4742 struct btrfs_file_extent_item *fi;
4745 u64 extent_num_bytes;
4747 u64 csum_found; /* In byte size, sectorsize aligned */
4748 u64 search_start; /* Logical range start we search for csum */
4749 u64 search_len; /* Logical range len we search for csum */
4750 unsigned int extent_type;
4751 unsigned int is_hole;
4756 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4758 /* Check inline extent */
4759 extent_type = btrfs_file_extent_type(node, fi);
4760 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4761 struct btrfs_item *e = btrfs_item_nr(slot);
4762 u32 item_inline_len;
4764 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4765 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4766 compressed = btrfs_file_extent_compression(node, fi);
4767 if (extent_num_bytes == 0) {
4769 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4770 root->objectid, fkey->objectid, fkey->offset);
4771 err |= FILE_EXTENT_ERROR;
4773 if (!compressed && extent_num_bytes != item_inline_len) {
4775 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4776 root->objectid, fkey->objectid, fkey->offset,
4777 extent_num_bytes, item_inline_len);
4778 err |= FILE_EXTENT_ERROR;
4780 *size += extent_num_bytes;
4784 /* Check extent type */
4785 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4786 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4787 err |= FILE_EXTENT_ERROR;
4788 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4789 root->objectid, fkey->objectid, fkey->offset);
4793 /* Check REG_EXTENT/PREALLOC_EXTENT */
4794 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4795 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4796 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4797 extent_offset = btrfs_file_extent_offset(node, fi);
4798 compressed = btrfs_file_extent_compression(node, fi);
4799 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4802 * Check EXTENT_DATA csum
4804 * For plain (uncompressed) extent, we should only check the range
4805 * we're referring to, as it's possible that part of prealloc extent
4806 * has been written, and has csum:
4808 * |<--- Original large preallocated extent A ---->|
4809 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4812 * For compressed extent, we should check the whole range.
4815 search_start = disk_bytenr + extent_offset;
4816 search_len = extent_num_bytes;
4818 search_start = disk_bytenr;
4819 search_len = disk_num_bytes;
4821 ret = count_csum_range(root, search_start, search_len, &csum_found);
4822 if (csum_found > 0 && nodatasum) {
4823 err |= ODD_CSUM_ITEM;
4824 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4825 root->objectid, fkey->objectid, fkey->offset);
4826 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4827 !is_hole && (ret < 0 || csum_found < search_len)) {
4828 err |= CSUM_ITEM_MISSING;
4829 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4830 root->objectid, fkey->objectid, fkey->offset,
4831 csum_found, search_len);
4832 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4833 err |= ODD_CSUM_ITEM;
4834 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4835 root->objectid, fkey->objectid, fkey->offset, csum_found);
4838 /* Check EXTENT_DATA hole */
4839 if (no_holes && is_hole) {
4840 err |= FILE_EXTENT_ERROR;
4841 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4842 root->objectid, fkey->objectid, fkey->offset);
4843 } else if (!no_holes && *end != fkey->offset) {
4844 err |= FILE_EXTENT_ERROR;
4845 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4846 root->objectid, fkey->objectid, fkey->offset);
4849 *end += extent_num_bytes;
4851 *size += extent_num_bytes;
4857 * Check INODE_ITEM and related ITEMs (the same inode number)
4858 * 1. check link count
4859 * 2. check inode ref/extref
4860 * 3. check dir item/index
4862 * @ext_ref: the EXTENDED_IREF feature
4864 * Return 0 if no error occurred.
4865 * Return >0 for error or hit the traversal is done(by error bitmap)
4867 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4868 unsigned int ext_ref)
4870 struct extent_buffer *node;
4871 struct btrfs_inode_item *ii;
4872 struct btrfs_key key;
4881 u64 extent_size = 0;
4883 unsigned int nodatasum;
4888 node = path->nodes[0];
4889 slot = path->slots[0];
4891 btrfs_item_key_to_cpu(node, &key, slot);
4892 inode_id = key.objectid;
4894 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4895 ret = btrfs_next_item(root, path);
4901 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4902 isize = btrfs_inode_size(node, ii);
4903 nbytes = btrfs_inode_nbytes(node, ii);
4904 mode = btrfs_inode_mode(node, ii);
4905 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4906 nlink = btrfs_inode_nlink(node, ii);
4907 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4910 ret = btrfs_next_item(root, path);
4912 /* out will fill 'err' rusing current statistics */
4914 } else if (ret > 0) {
4919 node = path->nodes[0];
4920 slot = path->slots[0];
4921 btrfs_item_key_to_cpu(node, &key, slot);
4922 if (key.objectid != inode_id)
4926 case BTRFS_INODE_REF_KEY:
4927 ret = check_inode_ref(root, &key, node, slot, &refs,
4931 case BTRFS_INODE_EXTREF_KEY:
4932 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4933 warning("root %llu EXTREF[%llu %llu] isn't supported",
4934 root->objectid, key.objectid,
4936 ret = check_inode_extref(root, &key, node, slot, &refs,
4940 case BTRFS_DIR_ITEM_KEY:
4941 case BTRFS_DIR_INDEX_KEY:
4943 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4944 root->objectid, inode_id,
4945 imode_to_type(mode), key.objectid,
4948 ret = check_dir_item(root, &key, node, slot, &size,
4952 case BTRFS_EXTENT_DATA_KEY:
4954 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4955 root->objectid, inode_id, key.objectid,
4958 ret = check_file_extent(root, &key, node, slot,
4959 nodatasum, &extent_size,
4963 case BTRFS_XATTR_ITEM_KEY:
4966 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4967 key.objectid, key.type, key.offset);
4972 /* verify INODE_ITEM nlink/isize/nbytes */
4975 err |= LINK_COUNT_ERROR;
4976 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4977 root->objectid, inode_id, nlink);
4981 * Just a warning, as dir inode nbytes is just an
4982 * instructive value.
4984 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4985 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4986 root->objectid, inode_id, root->nodesize);
4989 if (isize != size) {
4991 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4992 root->objectid, inode_id, isize, size);
4995 if (nlink != refs) {
4996 err |= LINK_COUNT_ERROR;
4997 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4998 root->objectid, inode_id, nlink, refs);
4999 } else if (!nlink) {
5003 if (!nbytes && !no_holes && extent_end < isize) {
5004 err |= NBYTES_ERROR;
5005 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5006 root->objectid, inode_id, isize);
5009 if (nbytes != extent_size) {
5010 err |= NBYTES_ERROR;
5011 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5012 root->objectid, inode_id, nbytes, extent_size);
5019 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5021 struct btrfs_path path;
5022 struct btrfs_key key;
5026 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5027 key.type = BTRFS_INODE_ITEM_KEY;
5030 /* For root being dropped, we don't need to check first inode */
5031 if (btrfs_root_refs(&root->root_item) == 0 &&
5032 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5036 btrfs_init_path(&path);
5038 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5043 err |= INODE_ITEM_MISSING;
5044 error("first inode item of root %llu is missing",
5048 err |= check_inode_item(root, &path, ext_ref);
5053 btrfs_release_path(&path);
5058 * Iterate all item on the tree and call check_inode_item() to check.
5060 * @root: the root of the tree to be checked.
5061 * @ext_ref: the EXTENDED_IREF feature
5063 * Return 0 if no error found.
5064 * Return <0 for error.
5066 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5068 struct btrfs_path path;
5069 struct node_refs nrefs;
5070 struct btrfs_root_item *root_item = &root->root_item;
5076 * We need to manually check the first inode item(256)
5077 * As the following traversal function will only start from
5078 * the first inode item in the leaf, if inode item(256) is missing
5079 * we will just skip it forever.
5081 ret = check_fs_first_inode(root, ext_ref);
5085 memset(&nrefs, 0, sizeof(nrefs));
5086 level = btrfs_header_level(root->node);
5087 btrfs_init_path(&path);
5089 if (btrfs_root_refs(root_item) > 0 ||
5090 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5091 path.nodes[level] = root->node;
5092 path.slots[level] = 0;
5093 extent_buffer_get(root->node);
5095 struct btrfs_key key;
5097 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5098 level = root_item->drop_level;
5099 path.lowest_level = level;
5100 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5107 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5110 /* if ret is negative, walk shall stop */
5116 ret = walk_up_tree_v2(root, &path, &level);
5118 /* Normal exit, reset ret to err */
5125 btrfs_release_path(&path);
5130 * Find the relative ref for root_ref and root_backref.
5132 * @root: the root of the root tree.
5133 * @ref_key: the key of the root ref.
5135 * Return 0 if no error occurred.
5137 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5138 struct extent_buffer *node, int slot)
5140 struct btrfs_path path;
5141 struct btrfs_key key;
5142 struct btrfs_root_ref *ref;
5143 struct btrfs_root_ref *backref;
5144 char ref_name[BTRFS_NAME_LEN] = {0};
5145 char backref_name[BTRFS_NAME_LEN] = {0};
5151 u32 backref_namelen;
5156 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5157 ref_dirid = btrfs_root_ref_dirid(node, ref);
5158 ref_seq = btrfs_root_ref_sequence(node, ref);
5159 ref_namelen = btrfs_root_ref_name_len(node, ref);
5161 if (ref_namelen <= BTRFS_NAME_LEN) {
5164 len = BTRFS_NAME_LEN;
5165 warning("%s[%llu %llu] ref_name too long",
5166 ref_key->type == BTRFS_ROOT_REF_KEY ?
5167 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5170 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5172 /* Find relative root_ref */
5173 key.objectid = ref_key->offset;
5174 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5175 key.offset = ref_key->objectid;
5177 btrfs_init_path(&path);
5178 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5180 err |= ROOT_REF_MISSING;
5181 error("%s[%llu %llu] couldn't find relative ref",
5182 ref_key->type == BTRFS_ROOT_REF_KEY ?
5183 "ROOT_REF" : "ROOT_BACKREF",
5184 ref_key->objectid, ref_key->offset);
5188 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5189 struct btrfs_root_ref);
5190 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5191 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5192 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5194 if (backref_namelen <= BTRFS_NAME_LEN) {
5195 len = backref_namelen;
5197 len = BTRFS_NAME_LEN;
5198 warning("%s[%llu %llu] ref_name too long",
5199 key.type == BTRFS_ROOT_REF_KEY ?
5200 "ROOT_REF" : "ROOT_BACKREF",
5201 key.objectid, key.offset);
5203 read_extent_buffer(path.nodes[0], backref_name,
5204 (unsigned long)(backref + 1), len);
5206 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5207 ref_namelen != backref_namelen ||
5208 strncmp(ref_name, backref_name, len)) {
5209 err |= ROOT_REF_MISMATCH;
5210 error("%s[%llu %llu] mismatch relative ref",
5211 ref_key->type == BTRFS_ROOT_REF_KEY ?
5212 "ROOT_REF" : "ROOT_BACKREF",
5213 ref_key->objectid, ref_key->offset);
5216 btrfs_release_path(&path);
5221 * Check all fs/file tree in low_memory mode.
5223 * 1. for fs tree root item, call check_fs_root_v2()
5224 * 2. for fs tree root ref/backref, call check_root_ref()
5226 * Return 0 if no error occurred.
5228 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5230 struct btrfs_root *tree_root = fs_info->tree_root;
5231 struct btrfs_root *cur_root = NULL;
5232 struct btrfs_path path;
5233 struct btrfs_key key;
5234 struct extent_buffer *node;
5235 unsigned int ext_ref;
5240 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5242 btrfs_init_path(&path);
5243 key.objectid = BTRFS_FS_TREE_OBJECTID;
5245 key.type = BTRFS_ROOT_ITEM_KEY;
5247 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5251 } else if (ret > 0) {
5257 node = path.nodes[0];
5258 slot = path.slots[0];
5259 btrfs_item_key_to_cpu(node, &key, slot);
5260 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5262 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5263 fs_root_objectid(key.objectid)) {
5264 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5265 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5268 key.offset = (u64)-1;
5269 cur_root = btrfs_read_fs_root(fs_info, &key);
5272 if (IS_ERR(cur_root)) {
5273 error("Fail to read fs/subvol tree: %lld",
5279 ret = check_fs_root_v2(cur_root, ext_ref);
5282 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5283 btrfs_free_fs_root(cur_root);
5284 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5285 key.type == BTRFS_ROOT_BACKREF_KEY) {
5286 ret = check_root_ref(tree_root, &key, node, slot);
5290 ret = btrfs_next_item(tree_root, &path);
5300 btrfs_release_path(&path);
5304 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5306 struct list_head *cur = rec->backrefs.next;
5307 struct extent_backref *back;
5308 struct tree_backref *tback;
5309 struct data_backref *dback;
5313 while(cur != &rec->backrefs) {
5314 back = to_extent_backref(cur);
5316 if (!back->found_extent_tree) {
5320 if (back->is_data) {
5321 dback = to_data_backref(back);
5322 fprintf(stderr, "Backref %llu %s %llu"
5323 " owner %llu offset %llu num_refs %lu"
5324 " not found in extent tree\n",
5325 (unsigned long long)rec->start,
5326 back->full_backref ?
5328 back->full_backref ?
5329 (unsigned long long)dback->parent:
5330 (unsigned long long)dback->root,
5331 (unsigned long long)dback->owner,
5332 (unsigned long long)dback->offset,
5333 (unsigned long)dback->num_refs);
5335 tback = to_tree_backref(back);
5336 fprintf(stderr, "Backref %llu parent %llu"
5337 " root %llu not found in extent tree\n",
5338 (unsigned long long)rec->start,
5339 (unsigned long long)tback->parent,
5340 (unsigned long long)tback->root);
5343 if (!back->is_data && !back->found_ref) {
5347 tback = to_tree_backref(back);
5348 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5349 (unsigned long long)rec->start,
5350 back->full_backref ? "parent" : "root",
5351 back->full_backref ?
5352 (unsigned long long)tback->parent :
5353 (unsigned long long)tback->root, back);
5355 if (back->is_data) {
5356 dback = to_data_backref(back);
5357 if (dback->found_ref != dback->num_refs) {
5361 fprintf(stderr, "Incorrect local backref count"
5362 " on %llu %s %llu owner %llu"
5363 " offset %llu found %u wanted %u back %p\n",
5364 (unsigned long long)rec->start,
5365 back->full_backref ?
5367 back->full_backref ?
5368 (unsigned long long)dback->parent:
5369 (unsigned long long)dback->root,
5370 (unsigned long long)dback->owner,
5371 (unsigned long long)dback->offset,
5372 dback->found_ref, dback->num_refs, back);
5374 if (dback->disk_bytenr != rec->start) {
5378 fprintf(stderr, "Backref disk bytenr does not"
5379 " match extent record, bytenr=%llu, "
5380 "ref bytenr=%llu\n",
5381 (unsigned long long)rec->start,
5382 (unsigned long long)dback->disk_bytenr);
5385 if (dback->bytes != rec->nr) {
5389 fprintf(stderr, "Backref bytes do not match "
5390 "extent backref, bytenr=%llu, ref "
5391 "bytes=%llu, backref bytes=%llu\n",
5392 (unsigned long long)rec->start,
5393 (unsigned long long)rec->nr,
5394 (unsigned long long)dback->bytes);
5397 if (!back->is_data) {
5400 dback = to_data_backref(back);
5401 found += dback->found_ref;
5404 if (found != rec->refs) {
5408 fprintf(stderr, "Incorrect global backref count "
5409 "on %llu found %llu wanted %llu\n",
5410 (unsigned long long)rec->start,
5411 (unsigned long long)found,
5412 (unsigned long long)rec->refs);
5418 static int free_all_extent_backrefs(struct extent_record *rec)
5420 struct extent_backref *back;
5421 struct list_head *cur;
5422 while (!list_empty(&rec->backrefs)) {
5423 cur = rec->backrefs.next;
5424 back = to_extent_backref(cur);
5431 static void free_extent_record_cache(struct cache_tree *extent_cache)
5433 struct cache_extent *cache;
5434 struct extent_record *rec;
5437 cache = first_cache_extent(extent_cache);
5440 rec = container_of(cache, struct extent_record, cache);
5441 remove_cache_extent(extent_cache, cache);
5442 free_all_extent_backrefs(rec);
5447 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5448 struct extent_record *rec)
5450 if (rec->content_checked && rec->owner_ref_checked &&
5451 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5452 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5453 !rec->bad_full_backref && !rec->crossing_stripes &&
5454 !rec->wrong_chunk_type) {
5455 remove_cache_extent(extent_cache, &rec->cache);
5456 free_all_extent_backrefs(rec);
5457 list_del_init(&rec->list);
5463 static int check_owner_ref(struct btrfs_root *root,
5464 struct extent_record *rec,
5465 struct extent_buffer *buf)
5467 struct extent_backref *node;
5468 struct tree_backref *back;
5469 struct btrfs_root *ref_root;
5470 struct btrfs_key key;
5471 struct btrfs_path path;
5472 struct extent_buffer *parent;
5477 list_for_each_entry(node, &rec->backrefs, list) {
5480 if (!node->found_ref)
5482 if (node->full_backref)
5484 back = to_tree_backref(node);
5485 if (btrfs_header_owner(buf) == back->root)
5488 BUG_ON(rec->is_root);
5490 /* try to find the block by search corresponding fs tree */
5491 key.objectid = btrfs_header_owner(buf);
5492 key.type = BTRFS_ROOT_ITEM_KEY;
5493 key.offset = (u64)-1;
5495 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5496 if (IS_ERR(ref_root))
5499 level = btrfs_header_level(buf);
5501 btrfs_item_key_to_cpu(buf, &key, 0);
5503 btrfs_node_key_to_cpu(buf, &key, 0);
5505 btrfs_init_path(&path);
5506 path.lowest_level = level + 1;
5507 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5511 parent = path.nodes[level + 1];
5512 if (parent && buf->start == btrfs_node_blockptr(parent,
5513 path.slots[level + 1]))
5516 btrfs_release_path(&path);
5517 return found ? 0 : 1;
5520 static int is_extent_tree_record(struct extent_record *rec)
5522 struct list_head *cur = rec->backrefs.next;
5523 struct extent_backref *node;
5524 struct tree_backref *back;
5527 while(cur != &rec->backrefs) {
5528 node = to_extent_backref(cur);
5532 back = to_tree_backref(node);
5533 if (node->full_backref)
5535 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5542 static int record_bad_block_io(struct btrfs_fs_info *info,
5543 struct cache_tree *extent_cache,
5546 struct extent_record *rec;
5547 struct cache_extent *cache;
5548 struct btrfs_key key;
5550 cache = lookup_cache_extent(extent_cache, start, len);
5554 rec = container_of(cache, struct extent_record, cache);
5555 if (!is_extent_tree_record(rec))
5558 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5559 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5562 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5563 struct extent_buffer *buf, int slot)
5565 if (btrfs_header_level(buf)) {
5566 struct btrfs_key_ptr ptr1, ptr2;
5568 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5569 sizeof(struct btrfs_key_ptr));
5570 read_extent_buffer(buf, &ptr2,
5571 btrfs_node_key_ptr_offset(slot + 1),
5572 sizeof(struct btrfs_key_ptr));
5573 write_extent_buffer(buf, &ptr1,
5574 btrfs_node_key_ptr_offset(slot + 1),
5575 sizeof(struct btrfs_key_ptr));
5576 write_extent_buffer(buf, &ptr2,
5577 btrfs_node_key_ptr_offset(slot),
5578 sizeof(struct btrfs_key_ptr));
5580 struct btrfs_disk_key key;
5581 btrfs_node_key(buf, &key, 0);
5582 btrfs_fixup_low_keys(root, path, &key,
5583 btrfs_header_level(buf) + 1);
5586 struct btrfs_item *item1, *item2;
5587 struct btrfs_key k1, k2;
5588 char *item1_data, *item2_data;
5589 u32 item1_offset, item2_offset, item1_size, item2_size;
5591 item1 = btrfs_item_nr(slot);
5592 item2 = btrfs_item_nr(slot + 1);
5593 btrfs_item_key_to_cpu(buf, &k1, slot);
5594 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5595 item1_offset = btrfs_item_offset(buf, item1);
5596 item2_offset = btrfs_item_offset(buf, item2);
5597 item1_size = btrfs_item_size(buf, item1);
5598 item2_size = btrfs_item_size(buf, item2);
5600 item1_data = malloc(item1_size);
5603 item2_data = malloc(item2_size);
5609 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5610 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5612 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5613 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5617 btrfs_set_item_offset(buf, item1, item2_offset);
5618 btrfs_set_item_offset(buf, item2, item1_offset);
5619 btrfs_set_item_size(buf, item1, item2_size);
5620 btrfs_set_item_size(buf, item2, item1_size);
5622 path->slots[0] = slot;
5623 btrfs_set_item_key_unsafe(root, path, &k2);
5624 path->slots[0] = slot + 1;
5625 btrfs_set_item_key_unsafe(root, path, &k1);
5630 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5632 struct extent_buffer *buf;
5633 struct btrfs_key k1, k2;
5635 int level = path->lowest_level;
5638 buf = path->nodes[level];
5639 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5641 btrfs_node_key_to_cpu(buf, &k1, i);
5642 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5644 btrfs_item_key_to_cpu(buf, &k1, i);
5645 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5647 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5649 ret = swap_values(root, path, buf, i);
5652 btrfs_mark_buffer_dirty(buf);
5658 static int delete_bogus_item(struct btrfs_root *root,
5659 struct btrfs_path *path,
5660 struct extent_buffer *buf, int slot)
5662 struct btrfs_key key;
5663 int nritems = btrfs_header_nritems(buf);
5665 btrfs_item_key_to_cpu(buf, &key, slot);
5667 /* These are all the keys we can deal with missing. */
5668 if (key.type != BTRFS_DIR_INDEX_KEY &&
5669 key.type != BTRFS_EXTENT_ITEM_KEY &&
5670 key.type != BTRFS_METADATA_ITEM_KEY &&
5671 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5672 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5675 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5676 (unsigned long long)key.objectid, key.type,
5677 (unsigned long long)key.offset, slot, buf->start);
5678 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5679 btrfs_item_nr_offset(slot + 1),
5680 sizeof(struct btrfs_item) *
5681 (nritems - slot - 1));
5682 btrfs_set_header_nritems(buf, nritems - 1);
5684 struct btrfs_disk_key disk_key;
5686 btrfs_item_key(buf, &disk_key, 0);
5687 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5689 btrfs_mark_buffer_dirty(buf);
5693 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5695 struct extent_buffer *buf;
5699 /* We should only get this for leaves */
5700 BUG_ON(path->lowest_level);
5701 buf = path->nodes[0];
5703 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5704 unsigned int shift = 0, offset;
5706 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5707 BTRFS_LEAF_DATA_SIZE(root)) {
5708 if (btrfs_item_end_nr(buf, i) >
5709 BTRFS_LEAF_DATA_SIZE(root)) {
5710 ret = delete_bogus_item(root, path, buf, i);
5713 fprintf(stderr, "item is off the end of the "
5714 "leaf, can't fix\n");
5718 shift = BTRFS_LEAF_DATA_SIZE(root) -
5719 btrfs_item_end_nr(buf, i);
5720 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5721 btrfs_item_offset_nr(buf, i - 1)) {
5722 if (btrfs_item_end_nr(buf, i) >
5723 btrfs_item_offset_nr(buf, i - 1)) {
5724 ret = delete_bogus_item(root, path, buf, i);
5727 fprintf(stderr, "items overlap, can't fix\n");
5731 shift = btrfs_item_offset_nr(buf, i - 1) -
5732 btrfs_item_end_nr(buf, i);
5737 printf("Shifting item nr %d by %u bytes in block %llu\n",
5738 i, shift, (unsigned long long)buf->start);
5739 offset = btrfs_item_offset_nr(buf, i);
5740 memmove_extent_buffer(buf,
5741 btrfs_leaf_data(buf) + offset + shift,
5742 btrfs_leaf_data(buf) + offset,
5743 btrfs_item_size_nr(buf, i));
5744 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5746 btrfs_mark_buffer_dirty(buf);
5750 * We may have moved things, in which case we want to exit so we don't
5751 * write those changes out. Once we have proper abort functionality in
5752 * progs this can be changed to something nicer.
5759 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5760 * then just return -EIO.
5762 static int try_to_fix_bad_block(struct btrfs_root *root,
5763 struct extent_buffer *buf,
5764 enum btrfs_tree_block_status status)
5766 struct btrfs_trans_handle *trans;
5767 struct ulist *roots;
5768 struct ulist_node *node;
5769 struct btrfs_root *search_root;
5770 struct btrfs_path path;
5771 struct ulist_iterator iter;
5772 struct btrfs_key root_key, key;
5775 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5776 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5779 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5783 btrfs_init_path(&path);
5784 ULIST_ITER_INIT(&iter);
5785 while ((node = ulist_next(roots, &iter))) {
5786 root_key.objectid = node->val;
5787 root_key.type = BTRFS_ROOT_ITEM_KEY;
5788 root_key.offset = (u64)-1;
5790 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5797 trans = btrfs_start_transaction(search_root, 0);
5798 if (IS_ERR(trans)) {
5799 ret = PTR_ERR(trans);
5803 path.lowest_level = btrfs_header_level(buf);
5804 path.skip_check_block = 1;
5805 if (path.lowest_level)
5806 btrfs_node_key_to_cpu(buf, &key, 0);
5808 btrfs_item_key_to_cpu(buf, &key, 0);
5809 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5812 btrfs_commit_transaction(trans, search_root);
5815 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5816 ret = fix_key_order(search_root, &path);
5817 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5818 ret = fix_item_offset(search_root, &path);
5820 btrfs_commit_transaction(trans, search_root);
5823 btrfs_release_path(&path);
5824 btrfs_commit_transaction(trans, search_root);
5827 btrfs_release_path(&path);
5831 static int check_block(struct btrfs_root *root,
5832 struct cache_tree *extent_cache,
5833 struct extent_buffer *buf, u64 flags)
5835 struct extent_record *rec;
5836 struct cache_extent *cache;
5837 struct btrfs_key key;
5838 enum btrfs_tree_block_status status;
5842 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5845 rec = container_of(cache, struct extent_record, cache);
5846 rec->generation = btrfs_header_generation(buf);
5848 level = btrfs_header_level(buf);
5849 if (btrfs_header_nritems(buf) > 0) {
5852 btrfs_item_key_to_cpu(buf, &key, 0);
5854 btrfs_node_key_to_cpu(buf, &key, 0);
5856 rec->info_objectid = key.objectid;
5858 rec->info_level = level;
5860 if (btrfs_is_leaf(buf))
5861 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5863 status = btrfs_check_node(root, &rec->parent_key, buf);
5865 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5867 status = try_to_fix_bad_block(root, buf, status);
5868 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5870 fprintf(stderr, "bad block %llu\n",
5871 (unsigned long long)buf->start);
5874 * Signal to callers we need to start the scan over
5875 * again since we'll have cowed blocks.
5880 rec->content_checked = 1;
5881 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5882 rec->owner_ref_checked = 1;
5884 ret = check_owner_ref(root, rec, buf);
5886 rec->owner_ref_checked = 1;
5890 maybe_free_extent_rec(extent_cache, rec);
5894 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5895 u64 parent, u64 root)
5897 struct list_head *cur = rec->backrefs.next;
5898 struct extent_backref *node;
5899 struct tree_backref *back;
5901 while(cur != &rec->backrefs) {
5902 node = to_extent_backref(cur);
5906 back = to_tree_backref(node);
5908 if (!node->full_backref)
5910 if (parent == back->parent)
5913 if (node->full_backref)
5915 if (back->root == root)
5922 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5923 u64 parent, u64 root)
5925 struct tree_backref *ref = malloc(sizeof(*ref));
5929 memset(&ref->node, 0, sizeof(ref->node));
5931 ref->parent = parent;
5932 ref->node.full_backref = 1;
5935 ref->node.full_backref = 0;
5937 list_add_tail(&ref->node.list, &rec->backrefs);
5942 static struct data_backref *find_data_backref(struct extent_record *rec,
5943 u64 parent, u64 root,
5944 u64 owner, u64 offset,
5946 u64 disk_bytenr, u64 bytes)
5948 struct list_head *cur = rec->backrefs.next;
5949 struct extent_backref *node;
5950 struct data_backref *back;
5952 while(cur != &rec->backrefs) {
5953 node = to_extent_backref(cur);
5957 back = to_data_backref(node);
5959 if (!node->full_backref)
5961 if (parent == back->parent)
5964 if (node->full_backref)
5966 if (back->root == root && back->owner == owner &&
5967 back->offset == offset) {
5968 if (found_ref && node->found_ref &&
5969 (back->bytes != bytes ||
5970 back->disk_bytenr != disk_bytenr))
5979 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5980 u64 parent, u64 root,
5981 u64 owner, u64 offset,
5984 struct data_backref *ref = malloc(sizeof(*ref));
5988 memset(&ref->node, 0, sizeof(ref->node));
5989 ref->node.is_data = 1;
5992 ref->parent = parent;
5995 ref->node.full_backref = 1;
5999 ref->offset = offset;
6000 ref->node.full_backref = 0;
6002 ref->bytes = max_size;
6005 list_add_tail(&ref->node.list, &rec->backrefs);
6006 if (max_size > rec->max_size)
6007 rec->max_size = max_size;
6011 /* Check if the type of extent matches with its chunk */
6012 static void check_extent_type(struct extent_record *rec)
6014 struct btrfs_block_group_cache *bg_cache;
6016 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6020 /* data extent, check chunk directly*/
6021 if (!rec->metadata) {
6022 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6023 rec->wrong_chunk_type = 1;
6027 /* metadata extent, check the obvious case first */
6028 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6029 BTRFS_BLOCK_GROUP_METADATA))) {
6030 rec->wrong_chunk_type = 1;
6035 * Check SYSTEM extent, as it's also marked as metadata, we can only
6036 * make sure it's a SYSTEM extent by its backref
6038 if (!list_empty(&rec->backrefs)) {
6039 struct extent_backref *node;
6040 struct tree_backref *tback;
6043 node = to_extent_backref(rec->backrefs.next);
6044 if (node->is_data) {
6045 /* tree block shouldn't have data backref */
6046 rec->wrong_chunk_type = 1;
6049 tback = container_of(node, struct tree_backref, node);
6051 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6052 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6054 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6055 if (!(bg_cache->flags & bg_type))
6056 rec->wrong_chunk_type = 1;
6061 * Allocate a new extent record, fill default values from @tmpl and insert int
6062 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6063 * the cache, otherwise it fails.
6065 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6066 struct extent_record *tmpl)
6068 struct extent_record *rec;
6071 BUG_ON(tmpl->max_size == 0);
6072 rec = malloc(sizeof(*rec));
6075 rec->start = tmpl->start;
6076 rec->max_size = tmpl->max_size;
6077 rec->nr = max(tmpl->nr, tmpl->max_size);
6078 rec->found_rec = tmpl->found_rec;
6079 rec->content_checked = tmpl->content_checked;
6080 rec->owner_ref_checked = tmpl->owner_ref_checked;
6081 rec->num_duplicates = 0;
6082 rec->metadata = tmpl->metadata;
6083 rec->flag_block_full_backref = FLAG_UNSET;
6084 rec->bad_full_backref = 0;
6085 rec->crossing_stripes = 0;
6086 rec->wrong_chunk_type = 0;
6087 rec->is_root = tmpl->is_root;
6088 rec->refs = tmpl->refs;
6089 rec->extent_item_refs = tmpl->extent_item_refs;
6090 rec->parent_generation = tmpl->parent_generation;
6091 INIT_LIST_HEAD(&rec->backrefs);
6092 INIT_LIST_HEAD(&rec->dups);
6093 INIT_LIST_HEAD(&rec->list);
6094 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6095 rec->cache.start = tmpl->start;
6096 rec->cache.size = tmpl->nr;
6097 ret = insert_cache_extent(extent_cache, &rec->cache);
6102 bytes_used += rec->nr;
6105 rec->crossing_stripes = check_crossing_stripes(global_info,
6106 rec->start, global_info->tree_root->nodesize);
6107 check_extent_type(rec);
6112 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6114 * - refs - if found, increase refs
6115 * - is_root - if found, set
6116 * - content_checked - if found, set
6117 * - owner_ref_checked - if found, set
6119 * If not found, create a new one, initialize and insert.
6121 static int add_extent_rec(struct cache_tree *extent_cache,
6122 struct extent_record *tmpl)
6124 struct extent_record *rec;
6125 struct cache_extent *cache;
6129 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6131 rec = container_of(cache, struct extent_record, cache);
6135 rec->nr = max(tmpl->nr, tmpl->max_size);
6138 * We need to make sure to reset nr to whatever the extent
6139 * record says was the real size, this way we can compare it to
6142 if (tmpl->found_rec) {
6143 if (tmpl->start != rec->start || rec->found_rec) {
6144 struct extent_record *tmp;
6147 if (list_empty(&rec->list))
6148 list_add_tail(&rec->list,
6149 &duplicate_extents);
6152 * We have to do this song and dance in case we
6153 * find an extent record that falls inside of
6154 * our current extent record but does not have
6155 * the same objectid.
6157 tmp = malloc(sizeof(*tmp));
6160 tmp->start = tmpl->start;
6161 tmp->max_size = tmpl->max_size;
6164 tmp->metadata = tmpl->metadata;
6165 tmp->extent_item_refs = tmpl->extent_item_refs;
6166 INIT_LIST_HEAD(&tmp->list);
6167 list_add_tail(&tmp->list, &rec->dups);
6168 rec->num_duplicates++;
6175 if (tmpl->extent_item_refs && !dup) {
6176 if (rec->extent_item_refs) {
6177 fprintf(stderr, "block %llu rec "
6178 "extent_item_refs %llu, passed %llu\n",
6179 (unsigned long long)tmpl->start,
6180 (unsigned long long)
6181 rec->extent_item_refs,
6182 (unsigned long long)tmpl->extent_item_refs);
6184 rec->extent_item_refs = tmpl->extent_item_refs;
6188 if (tmpl->content_checked)
6189 rec->content_checked = 1;
6190 if (tmpl->owner_ref_checked)
6191 rec->owner_ref_checked = 1;
6192 memcpy(&rec->parent_key, &tmpl->parent_key,
6193 sizeof(tmpl->parent_key));
6194 if (tmpl->parent_generation)
6195 rec->parent_generation = tmpl->parent_generation;
6196 if (rec->max_size < tmpl->max_size)
6197 rec->max_size = tmpl->max_size;
6200 * A metadata extent can't cross stripe_len boundary, otherwise
6201 * kernel scrub won't be able to handle it.
6202 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6206 rec->crossing_stripes = check_crossing_stripes(
6207 global_info, rec->start,
6208 global_info->tree_root->nodesize);
6209 check_extent_type(rec);
6210 maybe_free_extent_rec(extent_cache, rec);
6214 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6219 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6220 u64 parent, u64 root, int found_ref)
6222 struct extent_record *rec;
6223 struct tree_backref *back;
6224 struct cache_extent *cache;
6227 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6229 struct extent_record tmpl;
6231 memset(&tmpl, 0, sizeof(tmpl));
6232 tmpl.start = bytenr;
6237 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6241 /* really a bug in cache_extent implement now */
6242 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6247 rec = container_of(cache, struct extent_record, cache);
6248 if (rec->start != bytenr) {
6250 * Several cause, from unaligned bytenr to over lapping extents
6255 back = find_tree_backref(rec, parent, root);
6257 back = alloc_tree_backref(rec, parent, root);
6263 if (back->node.found_ref) {
6264 fprintf(stderr, "Extent back ref already exists "
6265 "for %llu parent %llu root %llu \n",
6266 (unsigned long long)bytenr,
6267 (unsigned long long)parent,
6268 (unsigned long long)root);
6270 back->node.found_ref = 1;
6272 if (back->node.found_extent_tree) {
6273 fprintf(stderr, "Extent back ref already exists "
6274 "for %llu parent %llu root %llu \n",
6275 (unsigned long long)bytenr,
6276 (unsigned long long)parent,
6277 (unsigned long long)root);
6279 back->node.found_extent_tree = 1;
6281 check_extent_type(rec);
6282 maybe_free_extent_rec(extent_cache, rec);
6286 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6287 u64 parent, u64 root, u64 owner, u64 offset,
6288 u32 num_refs, int found_ref, u64 max_size)
6290 struct extent_record *rec;
6291 struct data_backref *back;
6292 struct cache_extent *cache;
6295 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6297 struct extent_record tmpl;
6299 memset(&tmpl, 0, sizeof(tmpl));
6300 tmpl.start = bytenr;
6302 tmpl.max_size = max_size;
6304 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6308 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6313 rec = container_of(cache, struct extent_record, cache);
6314 if (rec->max_size < max_size)
6315 rec->max_size = max_size;
6318 * If found_ref is set then max_size is the real size and must match the
6319 * existing refs. So if we have already found a ref then we need to
6320 * make sure that this ref matches the existing one, otherwise we need
6321 * to add a new backref so we can notice that the backrefs don't match
6322 * and we need to figure out who is telling the truth. This is to
6323 * account for that awful fsync bug I introduced where we'd end up with
6324 * a btrfs_file_extent_item that would have its length include multiple
6325 * prealloc extents or point inside of a prealloc extent.
6327 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6330 back = alloc_data_backref(rec, parent, root, owner, offset,
6336 BUG_ON(num_refs != 1);
6337 if (back->node.found_ref)
6338 BUG_ON(back->bytes != max_size);
6339 back->node.found_ref = 1;
6340 back->found_ref += 1;
6341 back->bytes = max_size;
6342 back->disk_bytenr = bytenr;
6344 rec->content_checked = 1;
6345 rec->owner_ref_checked = 1;
6347 if (back->node.found_extent_tree) {
6348 fprintf(stderr, "Extent back ref already exists "
6349 "for %llu parent %llu root %llu "
6350 "owner %llu offset %llu num_refs %lu\n",
6351 (unsigned long long)bytenr,
6352 (unsigned long long)parent,
6353 (unsigned long long)root,
6354 (unsigned long long)owner,
6355 (unsigned long long)offset,
6356 (unsigned long)num_refs);
6358 back->num_refs = num_refs;
6359 back->node.found_extent_tree = 1;
6361 maybe_free_extent_rec(extent_cache, rec);
6365 static int add_pending(struct cache_tree *pending,
6366 struct cache_tree *seen, u64 bytenr, u32 size)
6369 ret = add_cache_extent(seen, bytenr, size);
6372 add_cache_extent(pending, bytenr, size);
6376 static int pick_next_pending(struct cache_tree *pending,
6377 struct cache_tree *reada,
6378 struct cache_tree *nodes,
6379 u64 last, struct block_info *bits, int bits_nr,
6382 unsigned long node_start = last;
6383 struct cache_extent *cache;
6386 cache = search_cache_extent(reada, 0);
6388 bits[0].start = cache->start;
6389 bits[0].size = cache->size;
6394 if (node_start > 32768)
6395 node_start -= 32768;
6397 cache = search_cache_extent(nodes, node_start);
6399 cache = search_cache_extent(nodes, 0);
6402 cache = search_cache_extent(pending, 0);
6407 bits[ret].start = cache->start;
6408 bits[ret].size = cache->size;
6409 cache = next_cache_extent(cache);
6411 } while (cache && ret < bits_nr);
6417 bits[ret].start = cache->start;
6418 bits[ret].size = cache->size;
6419 cache = next_cache_extent(cache);
6421 } while (cache && ret < bits_nr);
6423 if (bits_nr - ret > 8) {
6424 u64 lookup = bits[0].start + bits[0].size;
6425 struct cache_extent *next;
6426 next = search_cache_extent(pending, lookup);
6428 if (next->start - lookup > 32768)
6430 bits[ret].start = next->start;
6431 bits[ret].size = next->size;
6432 lookup = next->start + next->size;
6436 next = next_cache_extent(next);
6444 static void free_chunk_record(struct cache_extent *cache)
6446 struct chunk_record *rec;
6448 rec = container_of(cache, struct chunk_record, cache);
6449 list_del_init(&rec->list);
6450 list_del_init(&rec->dextents);
6454 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6456 cache_tree_free_extents(chunk_cache, free_chunk_record);
6459 static void free_device_record(struct rb_node *node)
6461 struct device_record *rec;
6463 rec = container_of(node, struct device_record, node);
6467 FREE_RB_BASED_TREE(device_cache, free_device_record);
6469 int insert_block_group_record(struct block_group_tree *tree,
6470 struct block_group_record *bg_rec)
6474 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6478 list_add_tail(&bg_rec->list, &tree->block_groups);
6482 static void free_block_group_record(struct cache_extent *cache)
6484 struct block_group_record *rec;
6486 rec = container_of(cache, struct block_group_record, cache);
6487 list_del_init(&rec->list);
6491 void free_block_group_tree(struct block_group_tree *tree)
6493 cache_tree_free_extents(&tree->tree, free_block_group_record);
6496 int insert_device_extent_record(struct device_extent_tree *tree,
6497 struct device_extent_record *de_rec)
6502 * Device extent is a bit different from the other extents, because
6503 * the extents which belong to the different devices may have the
6504 * same start and size, so we need use the special extent cache
6505 * search/insert functions.
6507 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6511 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6512 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6516 static void free_device_extent_record(struct cache_extent *cache)
6518 struct device_extent_record *rec;
6520 rec = container_of(cache, struct device_extent_record, cache);
6521 if (!list_empty(&rec->chunk_list))
6522 list_del_init(&rec->chunk_list);
6523 if (!list_empty(&rec->device_list))
6524 list_del_init(&rec->device_list);
6528 void free_device_extent_tree(struct device_extent_tree *tree)
6530 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6533 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6534 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6535 struct extent_buffer *leaf, int slot)
6537 struct btrfs_extent_ref_v0 *ref0;
6538 struct btrfs_key key;
6541 btrfs_item_key_to_cpu(leaf, &key, slot);
6542 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6543 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6544 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6547 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6548 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6554 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6555 struct btrfs_key *key,
6558 struct btrfs_chunk *ptr;
6559 struct chunk_record *rec;
6562 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6563 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6565 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6567 fprintf(stderr, "memory allocation failed\n");
6571 INIT_LIST_HEAD(&rec->list);
6572 INIT_LIST_HEAD(&rec->dextents);
6575 rec->cache.start = key->offset;
6576 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6578 rec->generation = btrfs_header_generation(leaf);
6580 rec->objectid = key->objectid;
6581 rec->type = key->type;
6582 rec->offset = key->offset;
6584 rec->length = rec->cache.size;
6585 rec->owner = btrfs_chunk_owner(leaf, ptr);
6586 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6587 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6588 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6589 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6590 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6591 rec->num_stripes = num_stripes;
6592 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6594 for (i = 0; i < rec->num_stripes; ++i) {
6595 rec->stripes[i].devid =
6596 btrfs_stripe_devid_nr(leaf, ptr, i);
6597 rec->stripes[i].offset =
6598 btrfs_stripe_offset_nr(leaf, ptr, i);
6599 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6600 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6607 static int process_chunk_item(struct cache_tree *chunk_cache,
6608 struct btrfs_key *key, struct extent_buffer *eb,
6611 struct chunk_record *rec;
6612 struct btrfs_chunk *chunk;
6615 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6617 * Do extra check for this chunk item,
6619 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6620 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6621 * and owner<->key_type check.
6623 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6626 error("chunk(%llu, %llu) is not valid, ignore it",
6627 key->offset, btrfs_chunk_length(eb, chunk));
6630 rec = btrfs_new_chunk_record(eb, key, slot);
6631 ret = insert_cache_extent(chunk_cache, &rec->cache);
6633 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6634 rec->offset, rec->length);
6641 static int process_device_item(struct rb_root *dev_cache,
6642 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6644 struct btrfs_dev_item *ptr;
6645 struct device_record *rec;
6648 ptr = btrfs_item_ptr(eb,
6649 slot, struct btrfs_dev_item);
6651 rec = malloc(sizeof(*rec));
6653 fprintf(stderr, "memory allocation failed\n");
6657 rec->devid = key->offset;
6658 rec->generation = btrfs_header_generation(eb);
6660 rec->objectid = key->objectid;
6661 rec->type = key->type;
6662 rec->offset = key->offset;
6664 rec->devid = btrfs_device_id(eb, ptr);
6665 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6666 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6668 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6670 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6677 struct block_group_record *
6678 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6681 struct btrfs_block_group_item *ptr;
6682 struct block_group_record *rec;
6684 rec = calloc(1, sizeof(*rec));
6686 fprintf(stderr, "memory allocation failed\n");
6690 rec->cache.start = key->objectid;
6691 rec->cache.size = key->offset;
6693 rec->generation = btrfs_header_generation(leaf);
6695 rec->objectid = key->objectid;
6696 rec->type = key->type;
6697 rec->offset = key->offset;
6699 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6700 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6702 INIT_LIST_HEAD(&rec->list);
6707 static int process_block_group_item(struct block_group_tree *block_group_cache,
6708 struct btrfs_key *key,
6709 struct extent_buffer *eb, int slot)
6711 struct block_group_record *rec;
6714 rec = btrfs_new_block_group_record(eb, key, slot);
6715 ret = insert_block_group_record(block_group_cache, rec);
6717 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6718 rec->objectid, rec->offset);
6725 struct device_extent_record *
6726 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6727 struct btrfs_key *key, int slot)
6729 struct device_extent_record *rec;
6730 struct btrfs_dev_extent *ptr;
6732 rec = calloc(1, sizeof(*rec));
6734 fprintf(stderr, "memory allocation failed\n");
6738 rec->cache.objectid = key->objectid;
6739 rec->cache.start = key->offset;
6741 rec->generation = btrfs_header_generation(leaf);
6743 rec->objectid = key->objectid;
6744 rec->type = key->type;
6745 rec->offset = key->offset;
6747 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6748 rec->chunk_objecteid =
6749 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6751 btrfs_dev_extent_chunk_offset(leaf, ptr);
6752 rec->length = btrfs_dev_extent_length(leaf, ptr);
6753 rec->cache.size = rec->length;
6755 INIT_LIST_HEAD(&rec->chunk_list);
6756 INIT_LIST_HEAD(&rec->device_list);
6762 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6763 struct btrfs_key *key, struct extent_buffer *eb,
6766 struct device_extent_record *rec;
6769 rec = btrfs_new_device_extent_record(eb, key, slot);
6770 ret = insert_device_extent_record(dev_extent_cache, rec);
6773 "Device extent[%llu, %llu, %llu] existed.\n",
6774 rec->objectid, rec->offset, rec->length);
6781 static int process_extent_item(struct btrfs_root *root,
6782 struct cache_tree *extent_cache,
6783 struct extent_buffer *eb, int slot)
6785 struct btrfs_extent_item *ei;
6786 struct btrfs_extent_inline_ref *iref;
6787 struct btrfs_extent_data_ref *dref;
6788 struct btrfs_shared_data_ref *sref;
6789 struct btrfs_key key;
6790 struct extent_record tmpl;
6795 u32 item_size = btrfs_item_size_nr(eb, slot);
6801 btrfs_item_key_to_cpu(eb, &key, slot);
6803 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6805 num_bytes = root->nodesize;
6807 num_bytes = key.offset;
6810 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6811 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6812 key.objectid, root->sectorsize);
6815 if (item_size < sizeof(*ei)) {
6816 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6817 struct btrfs_extent_item_v0 *ei0;
6818 BUG_ON(item_size != sizeof(*ei0));
6819 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6820 refs = btrfs_extent_refs_v0(eb, ei0);
6824 memset(&tmpl, 0, sizeof(tmpl));
6825 tmpl.start = key.objectid;
6826 tmpl.nr = num_bytes;
6827 tmpl.extent_item_refs = refs;
6828 tmpl.metadata = metadata;
6830 tmpl.max_size = num_bytes;
6832 return add_extent_rec(extent_cache, &tmpl);
6835 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6836 refs = btrfs_extent_refs(eb, ei);
6837 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6841 if (metadata && num_bytes != root->nodesize) {
6842 error("ignore invalid metadata extent, length %llu does not equal to %u",
6843 num_bytes, root->nodesize);
6846 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6847 error("ignore invalid data extent, length %llu is not aligned to %u",
6848 num_bytes, root->sectorsize);
6852 memset(&tmpl, 0, sizeof(tmpl));
6853 tmpl.start = key.objectid;
6854 tmpl.nr = num_bytes;
6855 tmpl.extent_item_refs = refs;
6856 tmpl.metadata = metadata;
6858 tmpl.max_size = num_bytes;
6859 add_extent_rec(extent_cache, &tmpl);
6861 ptr = (unsigned long)(ei + 1);
6862 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6863 key.type == BTRFS_EXTENT_ITEM_KEY)
6864 ptr += sizeof(struct btrfs_tree_block_info);
6866 end = (unsigned long)ei + item_size;
6868 iref = (struct btrfs_extent_inline_ref *)ptr;
6869 type = btrfs_extent_inline_ref_type(eb, iref);
6870 offset = btrfs_extent_inline_ref_offset(eb, iref);
6872 case BTRFS_TREE_BLOCK_REF_KEY:
6873 ret = add_tree_backref(extent_cache, key.objectid,
6877 "add_tree_backref failed (extent items tree block): %s",
6880 case BTRFS_SHARED_BLOCK_REF_KEY:
6881 ret = add_tree_backref(extent_cache, key.objectid,
6885 "add_tree_backref failed (extent items shared block): %s",
6888 case BTRFS_EXTENT_DATA_REF_KEY:
6889 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6890 add_data_backref(extent_cache, key.objectid, 0,
6891 btrfs_extent_data_ref_root(eb, dref),
6892 btrfs_extent_data_ref_objectid(eb,
6894 btrfs_extent_data_ref_offset(eb, dref),
6895 btrfs_extent_data_ref_count(eb, dref),
6898 case BTRFS_SHARED_DATA_REF_KEY:
6899 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6900 add_data_backref(extent_cache, key.objectid, offset,
6902 btrfs_shared_data_ref_count(eb, sref),
6906 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6907 key.objectid, key.type, num_bytes);
6910 ptr += btrfs_extent_inline_ref_size(type);
6917 static int check_cache_range(struct btrfs_root *root,
6918 struct btrfs_block_group_cache *cache,
6919 u64 offset, u64 bytes)
6921 struct btrfs_free_space *entry;
6927 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6928 bytenr = btrfs_sb_offset(i);
6929 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6930 cache->key.objectid, bytenr, 0,
6931 &logical, &nr, &stripe_len);
6936 if (logical[nr] + stripe_len <= offset)
6938 if (offset + bytes <= logical[nr])
6940 if (logical[nr] == offset) {
6941 if (stripe_len >= bytes) {
6945 bytes -= stripe_len;
6946 offset += stripe_len;
6947 } else if (logical[nr] < offset) {
6948 if (logical[nr] + stripe_len >=
6953 bytes = (offset + bytes) -
6954 (logical[nr] + stripe_len);
6955 offset = logical[nr] + stripe_len;
6958 * Could be tricky, the super may land in the
6959 * middle of the area we're checking. First
6960 * check the easiest case, it's at the end.
6962 if (logical[nr] + stripe_len >=
6964 bytes = logical[nr] - offset;
6968 /* Check the left side */
6969 ret = check_cache_range(root, cache,
6971 logical[nr] - offset);
6977 /* Now we continue with the right side */
6978 bytes = (offset + bytes) -
6979 (logical[nr] + stripe_len);
6980 offset = logical[nr] + stripe_len;
6987 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6989 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6990 offset, offset+bytes);
6994 if (entry->offset != offset) {
6995 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7000 if (entry->bytes != bytes) {
7001 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7002 bytes, entry->bytes, offset);
7006 unlink_free_space(cache->free_space_ctl, entry);
7011 static int verify_space_cache(struct btrfs_root *root,
7012 struct btrfs_block_group_cache *cache)
7014 struct btrfs_path path;
7015 struct extent_buffer *leaf;
7016 struct btrfs_key key;
7020 root = root->fs_info->extent_root;
7022 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7024 btrfs_init_path(&path);
7025 key.objectid = last;
7027 key.type = BTRFS_EXTENT_ITEM_KEY;
7028 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7033 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7034 ret = btrfs_next_leaf(root, &path);
7042 leaf = path.nodes[0];
7043 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7044 if (key.objectid >= cache->key.offset + cache->key.objectid)
7046 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7047 key.type != BTRFS_METADATA_ITEM_KEY) {
7052 if (last == key.objectid) {
7053 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7054 last = key.objectid + key.offset;
7056 last = key.objectid + root->nodesize;
7061 ret = check_cache_range(root, cache, last,
7062 key.objectid - last);
7065 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7066 last = key.objectid + key.offset;
7068 last = key.objectid + root->nodesize;
7072 if (last < cache->key.objectid + cache->key.offset)
7073 ret = check_cache_range(root, cache, last,
7074 cache->key.objectid +
7075 cache->key.offset - last);
7078 btrfs_release_path(&path);
7081 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7082 fprintf(stderr, "There are still entries left in the space "
7090 static int check_space_cache(struct btrfs_root *root)
7092 struct btrfs_block_group_cache *cache;
7093 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7097 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7098 btrfs_super_generation(root->fs_info->super_copy) !=
7099 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7100 printf("cache and super generation don't match, space cache "
7101 "will be invalidated\n");
7105 if (ctx.progress_enabled) {
7106 ctx.tp = TASK_FREE_SPACE;
7107 task_start(ctx.info);
7111 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7115 start = cache->key.objectid + cache->key.offset;
7116 if (!cache->free_space_ctl) {
7117 if (btrfs_init_free_space_ctl(cache,
7118 root->sectorsize)) {
7123 btrfs_remove_free_space_cache(cache);
7126 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7127 ret = exclude_super_stripes(root, cache);
7129 fprintf(stderr, "could not exclude super stripes: %s\n",
7134 ret = load_free_space_tree(root->fs_info, cache);
7135 free_excluded_extents(root, cache);
7137 fprintf(stderr, "could not load free space tree: %s\n",
7144 ret = load_free_space_cache(root->fs_info, cache);
7149 ret = verify_space_cache(root, cache);
7151 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7152 cache->key.objectid);
7157 task_stop(ctx.info);
7159 return error ? -EINVAL : 0;
7162 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7163 u64 num_bytes, unsigned long leaf_offset,
7164 struct extent_buffer *eb) {
7167 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7169 unsigned long csum_offset;
7173 u64 data_checked = 0;
7179 if (num_bytes % root->sectorsize)
7182 data = malloc(num_bytes);
7186 while (offset < num_bytes) {
7189 read_len = num_bytes - offset;
7190 /* read as much space once a time */
7191 ret = read_extent_data(root, data + offset,
7192 bytenr + offset, &read_len, mirror);
7196 /* verify every 4k data's checksum */
7197 while (data_checked < read_len) {
7199 tmp = offset + data_checked;
7201 csum = btrfs_csum_data((char *)data + tmp,
7202 csum, root->sectorsize);
7203 btrfs_csum_final(csum, (u8 *)&csum);
7205 csum_offset = leaf_offset +
7206 tmp / root->sectorsize * csum_size;
7207 read_extent_buffer(eb, (char *)&csum_expected,
7208 csum_offset, csum_size);
7209 /* try another mirror */
7210 if (csum != csum_expected) {
7211 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7212 mirror, bytenr + tmp,
7213 csum, csum_expected);
7214 num_copies = btrfs_num_copies(
7215 &root->fs_info->mapping_tree,
7217 if (mirror < num_copies - 1) {
7222 data_checked += root->sectorsize;
7231 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7234 struct btrfs_path path;
7235 struct extent_buffer *leaf;
7236 struct btrfs_key key;
7239 btrfs_init_path(&path);
7240 key.objectid = bytenr;
7241 key.type = BTRFS_EXTENT_ITEM_KEY;
7242 key.offset = (u64)-1;
7245 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7248 fprintf(stderr, "Error looking up extent record %d\n", ret);
7249 btrfs_release_path(&path);
7252 if (path.slots[0] > 0) {
7255 ret = btrfs_prev_leaf(root, &path);
7258 } else if (ret > 0) {
7265 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7268 * Block group items come before extent items if they have the same
7269 * bytenr, so walk back one more just in case. Dear future traveller,
7270 * first congrats on mastering time travel. Now if it's not too much
7271 * trouble could you go back to 2006 and tell Chris to make the
7272 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7273 * EXTENT_ITEM_KEY please?
7275 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7276 if (path.slots[0] > 0) {
7279 ret = btrfs_prev_leaf(root, &path);
7282 } else if (ret > 0) {
7287 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7291 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7292 ret = btrfs_next_leaf(root, &path);
7294 fprintf(stderr, "Error going to next leaf "
7296 btrfs_release_path(&path);
7302 leaf = path.nodes[0];
7303 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7304 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7308 if (key.objectid + key.offset < bytenr) {
7312 if (key.objectid > bytenr + num_bytes)
7315 if (key.objectid == bytenr) {
7316 if (key.offset >= num_bytes) {
7320 num_bytes -= key.offset;
7321 bytenr += key.offset;
7322 } else if (key.objectid < bytenr) {
7323 if (key.objectid + key.offset >= bytenr + num_bytes) {
7327 num_bytes = (bytenr + num_bytes) -
7328 (key.objectid + key.offset);
7329 bytenr = key.objectid + key.offset;
7331 if (key.objectid + key.offset < bytenr + num_bytes) {
7332 u64 new_start = key.objectid + key.offset;
7333 u64 new_bytes = bytenr + num_bytes - new_start;
7336 * Weird case, the extent is in the middle of
7337 * our range, we'll have to search one side
7338 * and then the other. Not sure if this happens
7339 * in real life, but no harm in coding it up
7340 * anyway just in case.
7342 btrfs_release_path(&path);
7343 ret = check_extent_exists(root, new_start,
7346 fprintf(stderr, "Right section didn't "
7350 num_bytes = key.objectid - bytenr;
7353 num_bytes = key.objectid - bytenr;
7360 if (num_bytes && !ret) {
7361 fprintf(stderr, "There are no extents for csum range "
7362 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7366 btrfs_release_path(&path);
7370 static int check_csums(struct btrfs_root *root)
7372 struct btrfs_path path;
7373 struct extent_buffer *leaf;
7374 struct btrfs_key key;
7375 u64 offset = 0, num_bytes = 0;
7376 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7380 unsigned long leaf_offset;
7382 root = root->fs_info->csum_root;
7383 if (!extent_buffer_uptodate(root->node)) {
7384 fprintf(stderr, "No valid csum tree found\n");
7388 btrfs_init_path(&path);
7389 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7390 key.type = BTRFS_EXTENT_CSUM_KEY;
7392 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7394 fprintf(stderr, "Error searching csum tree %d\n", ret);
7395 btrfs_release_path(&path);
7399 if (ret > 0 && path.slots[0])
7404 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7405 ret = btrfs_next_leaf(root, &path);
7407 fprintf(stderr, "Error going to next leaf "
7414 leaf = path.nodes[0];
7416 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7417 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7422 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7423 csum_size) * root->sectorsize;
7424 if (!check_data_csum)
7425 goto skip_csum_check;
7426 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7427 ret = check_extent_csums(root, key.offset, data_len,
7433 offset = key.offset;
7434 } else if (key.offset != offset + num_bytes) {
7435 ret = check_extent_exists(root, offset, num_bytes);
7437 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7438 "there is no extent record\n",
7439 offset, offset+num_bytes);
7442 offset = key.offset;
7445 num_bytes += data_len;
7449 btrfs_release_path(&path);
7453 static int is_dropped_key(struct btrfs_key *key,
7454 struct btrfs_key *drop_key) {
7455 if (key->objectid < drop_key->objectid)
7457 else if (key->objectid == drop_key->objectid) {
7458 if (key->type < drop_key->type)
7460 else if (key->type == drop_key->type) {
7461 if (key->offset < drop_key->offset)
7469 * Here are the rules for FULL_BACKREF.
7471 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7472 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7474 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7475 * if it happened after the relocation occurred since we'll have dropped the
7476 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7477 * have no real way to know for sure.
7479 * We process the blocks one root at a time, and we start from the lowest root
7480 * objectid and go to the highest. So we can just lookup the owner backref for
7481 * the record and if we don't find it then we know it doesn't exist and we have
7484 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7485 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7486 * be set or not and then we can check later once we've gathered all the refs.
7488 static int calc_extent_flag(struct cache_tree *extent_cache,
7489 struct extent_buffer *buf,
7490 struct root_item_record *ri,
7493 struct extent_record *rec;
7494 struct cache_extent *cache;
7495 struct tree_backref *tback;
7498 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7499 /* we have added this extent before */
7503 rec = container_of(cache, struct extent_record, cache);
7506 * Except file/reloc tree, we can not have
7509 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7514 if (buf->start == ri->bytenr)
7517 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7520 owner = btrfs_header_owner(buf);
7521 if (owner == ri->objectid)
7524 tback = find_tree_backref(rec, 0, owner);
7529 if (rec->flag_block_full_backref != FLAG_UNSET &&
7530 rec->flag_block_full_backref != 0)
7531 rec->bad_full_backref = 1;
7534 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7535 if (rec->flag_block_full_backref != FLAG_UNSET &&
7536 rec->flag_block_full_backref != 1)
7537 rec->bad_full_backref = 1;
7541 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7543 fprintf(stderr, "Invalid key type(");
7544 print_key_type(stderr, 0, key_type);
7545 fprintf(stderr, ") found in root(");
7546 print_objectid(stderr, rootid, 0);
7547 fprintf(stderr, ")\n");
7551 * Check if the key is valid with its extent buffer.
7553 * This is a early check in case invalid key exists in a extent buffer
7554 * This is not comprehensive yet, but should prevent wrong key/item passed
7557 static int check_type_with_root(u64 rootid, u8 key_type)
7560 /* Only valid in chunk tree */
7561 case BTRFS_DEV_ITEM_KEY:
7562 case BTRFS_CHUNK_ITEM_KEY:
7563 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7566 /* valid in csum and log tree */
7567 case BTRFS_CSUM_TREE_OBJECTID:
7568 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7572 case BTRFS_EXTENT_ITEM_KEY:
7573 case BTRFS_METADATA_ITEM_KEY:
7574 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7575 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7578 case BTRFS_ROOT_ITEM_KEY:
7579 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7582 case BTRFS_DEV_EXTENT_KEY:
7583 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7589 report_mismatch_key_root(key_type, rootid);
7593 static int run_next_block(struct btrfs_root *root,
7594 struct block_info *bits,
7597 struct cache_tree *pending,
7598 struct cache_tree *seen,
7599 struct cache_tree *reada,
7600 struct cache_tree *nodes,
7601 struct cache_tree *extent_cache,
7602 struct cache_tree *chunk_cache,
7603 struct rb_root *dev_cache,
7604 struct block_group_tree *block_group_cache,
7605 struct device_extent_tree *dev_extent_cache,
7606 struct root_item_record *ri)
7608 struct extent_buffer *buf;
7609 struct extent_record *rec = NULL;
7620 struct btrfs_key key;
7621 struct cache_extent *cache;
7624 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7625 bits_nr, &reada_bits);
7630 for(i = 0; i < nritems; i++) {
7631 ret = add_cache_extent(reada, bits[i].start,
7636 /* fixme, get the parent transid */
7637 readahead_tree_block(root, bits[i].start,
7641 *last = bits[0].start;
7642 bytenr = bits[0].start;
7643 size = bits[0].size;
7645 cache = lookup_cache_extent(pending, bytenr, size);
7647 remove_cache_extent(pending, cache);
7650 cache = lookup_cache_extent(reada, bytenr, size);
7652 remove_cache_extent(reada, cache);
7655 cache = lookup_cache_extent(nodes, bytenr, size);
7657 remove_cache_extent(nodes, cache);
7660 cache = lookup_cache_extent(extent_cache, bytenr, size);
7662 rec = container_of(cache, struct extent_record, cache);
7663 gen = rec->parent_generation;
7666 /* fixme, get the real parent transid */
7667 buf = read_tree_block(root, bytenr, size, gen);
7668 if (!extent_buffer_uptodate(buf)) {
7669 record_bad_block_io(root->fs_info,
7670 extent_cache, bytenr, size);
7674 nritems = btrfs_header_nritems(buf);
7677 if (!init_extent_tree) {
7678 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7679 btrfs_header_level(buf), 1, NULL,
7682 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7684 fprintf(stderr, "Couldn't calc extent flags\n");
7685 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7690 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7692 fprintf(stderr, "Couldn't calc extent flags\n");
7693 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7697 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7699 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7700 ri->objectid == btrfs_header_owner(buf)) {
7702 * Ok we got to this block from it's original owner and
7703 * we have FULL_BACKREF set. Relocation can leave
7704 * converted blocks over so this is altogether possible,
7705 * however it's not possible if the generation > the
7706 * last snapshot, so check for this case.
7708 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7709 btrfs_header_generation(buf) > ri->last_snapshot) {
7710 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7711 rec->bad_full_backref = 1;
7716 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7717 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7718 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7719 rec->bad_full_backref = 1;
7723 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7724 rec->flag_block_full_backref = 1;
7728 rec->flag_block_full_backref = 0;
7730 owner = btrfs_header_owner(buf);
7733 ret = check_block(root, extent_cache, buf, flags);
7737 if (btrfs_is_leaf(buf)) {
7738 btree_space_waste += btrfs_leaf_free_space(root, buf);
7739 for (i = 0; i < nritems; i++) {
7740 struct btrfs_file_extent_item *fi;
7741 btrfs_item_key_to_cpu(buf, &key, i);
7743 * Check key type against the leaf owner.
7744 * Could filter quite a lot of early error if
7747 if (check_type_with_root(btrfs_header_owner(buf),
7749 fprintf(stderr, "ignoring invalid key\n");
7752 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7753 process_extent_item(root, extent_cache, buf,
7757 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7758 process_extent_item(root, extent_cache, buf,
7762 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7764 btrfs_item_size_nr(buf, i);
7767 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7768 process_chunk_item(chunk_cache, &key, buf, i);
7771 if (key.type == BTRFS_DEV_ITEM_KEY) {
7772 process_device_item(dev_cache, &key, buf, i);
7775 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7776 process_block_group_item(block_group_cache,
7780 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7781 process_device_extent_item(dev_extent_cache,
7786 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7787 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7788 process_extent_ref_v0(extent_cache, buf, i);
7795 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7796 ret = add_tree_backref(extent_cache,
7797 key.objectid, 0, key.offset, 0);
7800 "add_tree_backref failed (leaf tree block): %s",
7804 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7805 ret = add_tree_backref(extent_cache,
7806 key.objectid, key.offset, 0, 0);
7809 "add_tree_backref failed (leaf shared block): %s",
7813 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7814 struct btrfs_extent_data_ref *ref;
7815 ref = btrfs_item_ptr(buf, i,
7816 struct btrfs_extent_data_ref);
7817 add_data_backref(extent_cache,
7819 btrfs_extent_data_ref_root(buf, ref),
7820 btrfs_extent_data_ref_objectid(buf,
7822 btrfs_extent_data_ref_offset(buf, ref),
7823 btrfs_extent_data_ref_count(buf, ref),
7824 0, root->sectorsize);
7827 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7828 struct btrfs_shared_data_ref *ref;
7829 ref = btrfs_item_ptr(buf, i,
7830 struct btrfs_shared_data_ref);
7831 add_data_backref(extent_cache,
7832 key.objectid, key.offset, 0, 0, 0,
7833 btrfs_shared_data_ref_count(buf, ref),
7834 0, root->sectorsize);
7837 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7838 struct bad_item *bad;
7840 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7844 bad = malloc(sizeof(struct bad_item));
7847 INIT_LIST_HEAD(&bad->list);
7848 memcpy(&bad->key, &key,
7849 sizeof(struct btrfs_key));
7850 bad->root_id = owner;
7851 list_add_tail(&bad->list, &delete_items);
7854 if (key.type != BTRFS_EXTENT_DATA_KEY)
7856 fi = btrfs_item_ptr(buf, i,
7857 struct btrfs_file_extent_item);
7858 if (btrfs_file_extent_type(buf, fi) ==
7859 BTRFS_FILE_EXTENT_INLINE)
7861 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7864 data_bytes_allocated +=
7865 btrfs_file_extent_disk_num_bytes(buf, fi);
7866 if (data_bytes_allocated < root->sectorsize) {
7869 data_bytes_referenced +=
7870 btrfs_file_extent_num_bytes(buf, fi);
7871 add_data_backref(extent_cache,
7872 btrfs_file_extent_disk_bytenr(buf, fi),
7873 parent, owner, key.objectid, key.offset -
7874 btrfs_file_extent_offset(buf, fi), 1, 1,
7875 btrfs_file_extent_disk_num_bytes(buf, fi));
7879 struct btrfs_key first_key;
7881 first_key.objectid = 0;
7884 btrfs_item_key_to_cpu(buf, &first_key, 0);
7885 level = btrfs_header_level(buf);
7886 for (i = 0; i < nritems; i++) {
7887 struct extent_record tmpl;
7889 ptr = btrfs_node_blockptr(buf, i);
7890 size = root->nodesize;
7891 btrfs_node_key_to_cpu(buf, &key, i);
7893 if ((level == ri->drop_level)
7894 && is_dropped_key(&key, &ri->drop_key)) {
7899 memset(&tmpl, 0, sizeof(tmpl));
7900 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7901 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7906 tmpl.max_size = size;
7907 ret = add_extent_rec(extent_cache, &tmpl);
7911 ret = add_tree_backref(extent_cache, ptr, parent,
7915 "add_tree_backref failed (non-leaf block): %s",
7921 add_pending(nodes, seen, ptr, size);
7923 add_pending(pending, seen, ptr, size);
7926 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7927 nritems) * sizeof(struct btrfs_key_ptr);
7929 total_btree_bytes += buf->len;
7930 if (fs_root_objectid(btrfs_header_owner(buf)))
7931 total_fs_tree_bytes += buf->len;
7932 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7933 total_extent_tree_bytes += buf->len;
7934 if (!found_old_backref &&
7935 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7936 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7937 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7938 found_old_backref = 1;
7940 free_extent_buffer(buf);
7944 static int add_root_to_pending(struct extent_buffer *buf,
7945 struct cache_tree *extent_cache,
7946 struct cache_tree *pending,
7947 struct cache_tree *seen,
7948 struct cache_tree *nodes,
7951 struct extent_record tmpl;
7954 if (btrfs_header_level(buf) > 0)
7955 add_pending(nodes, seen, buf->start, buf->len);
7957 add_pending(pending, seen, buf->start, buf->len);
7959 memset(&tmpl, 0, sizeof(tmpl));
7960 tmpl.start = buf->start;
7965 tmpl.max_size = buf->len;
7966 add_extent_rec(extent_cache, &tmpl);
7968 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7969 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7970 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7973 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7978 /* as we fix the tree, we might be deleting blocks that
7979 * we're tracking for repair. This hook makes sure we
7980 * remove any backrefs for blocks as we are fixing them.
7982 static int free_extent_hook(struct btrfs_trans_handle *trans,
7983 struct btrfs_root *root,
7984 u64 bytenr, u64 num_bytes, u64 parent,
7985 u64 root_objectid, u64 owner, u64 offset,
7988 struct extent_record *rec;
7989 struct cache_extent *cache;
7991 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7993 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7994 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7998 rec = container_of(cache, struct extent_record, cache);
8000 struct data_backref *back;
8001 back = find_data_backref(rec, parent, root_objectid, owner,
8002 offset, 1, bytenr, num_bytes);
8005 if (back->node.found_ref) {
8006 back->found_ref -= refs_to_drop;
8008 rec->refs -= refs_to_drop;
8010 if (back->node.found_extent_tree) {
8011 back->num_refs -= refs_to_drop;
8012 if (rec->extent_item_refs)
8013 rec->extent_item_refs -= refs_to_drop;
8015 if (back->found_ref == 0)
8016 back->node.found_ref = 0;
8017 if (back->num_refs == 0)
8018 back->node.found_extent_tree = 0;
8020 if (!back->node.found_extent_tree && back->node.found_ref) {
8021 list_del(&back->node.list);
8025 struct tree_backref *back;
8026 back = find_tree_backref(rec, parent, root_objectid);
8029 if (back->node.found_ref) {
8032 back->node.found_ref = 0;
8034 if (back->node.found_extent_tree) {
8035 if (rec->extent_item_refs)
8036 rec->extent_item_refs--;
8037 back->node.found_extent_tree = 0;
8039 if (!back->node.found_extent_tree && back->node.found_ref) {
8040 list_del(&back->node.list);
8044 maybe_free_extent_rec(extent_cache, rec);
8049 static int delete_extent_records(struct btrfs_trans_handle *trans,
8050 struct btrfs_root *root,
8051 struct btrfs_path *path,
8054 struct btrfs_key key;
8055 struct btrfs_key found_key;
8056 struct extent_buffer *leaf;
8061 key.objectid = bytenr;
8063 key.offset = (u64)-1;
8066 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8073 if (path->slots[0] == 0)
8079 leaf = path->nodes[0];
8080 slot = path->slots[0];
8082 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8083 if (found_key.objectid != bytenr)
8086 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8087 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8088 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8089 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8090 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8091 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8092 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8093 btrfs_release_path(path);
8094 if (found_key.type == 0) {
8095 if (found_key.offset == 0)
8097 key.offset = found_key.offset - 1;
8098 key.type = found_key.type;
8100 key.type = found_key.type - 1;
8101 key.offset = (u64)-1;
8105 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8106 found_key.objectid, found_key.type, found_key.offset);
8108 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8111 btrfs_release_path(path);
8113 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8114 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8115 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8116 found_key.offset : root->nodesize;
8118 ret = btrfs_update_block_group(trans, root, bytenr,
8125 btrfs_release_path(path);
8130 * for a single backref, this will allocate a new extent
8131 * and add the backref to it.
8133 static int record_extent(struct btrfs_trans_handle *trans,
8134 struct btrfs_fs_info *info,
8135 struct btrfs_path *path,
8136 struct extent_record *rec,
8137 struct extent_backref *back,
8138 int allocated, u64 flags)
8141 struct btrfs_root *extent_root = info->extent_root;
8142 struct extent_buffer *leaf;
8143 struct btrfs_key ins_key;
8144 struct btrfs_extent_item *ei;
8145 struct data_backref *dback;
8146 struct btrfs_tree_block_info *bi;
8149 rec->max_size = max_t(u64, rec->max_size,
8150 info->extent_root->nodesize);
8153 u32 item_size = sizeof(*ei);
8156 item_size += sizeof(*bi);
8158 ins_key.objectid = rec->start;
8159 ins_key.offset = rec->max_size;
8160 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8162 ret = btrfs_insert_empty_item(trans, extent_root, path,
8163 &ins_key, item_size);
8167 leaf = path->nodes[0];
8168 ei = btrfs_item_ptr(leaf, path->slots[0],
8169 struct btrfs_extent_item);
8171 btrfs_set_extent_refs(leaf, ei, 0);
8172 btrfs_set_extent_generation(leaf, ei, rec->generation);
8174 if (back->is_data) {
8175 btrfs_set_extent_flags(leaf, ei,
8176 BTRFS_EXTENT_FLAG_DATA);
8178 struct btrfs_disk_key copy_key;;
8180 bi = (struct btrfs_tree_block_info *)(ei + 1);
8181 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8184 btrfs_set_disk_key_objectid(©_key,
8185 rec->info_objectid);
8186 btrfs_set_disk_key_type(©_key, 0);
8187 btrfs_set_disk_key_offset(©_key, 0);
8189 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8190 btrfs_set_tree_block_key(leaf, bi, ©_key);
8192 btrfs_set_extent_flags(leaf, ei,
8193 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8196 btrfs_mark_buffer_dirty(leaf);
8197 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8198 rec->max_size, 1, 0);
8201 btrfs_release_path(path);
8204 if (back->is_data) {
8208 dback = to_data_backref(back);
8209 if (back->full_backref)
8210 parent = dback->parent;
8214 for (i = 0; i < dback->found_ref; i++) {
8215 /* if parent != 0, we're doing a full backref
8216 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8217 * just makes the backref allocator create a data
8220 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8221 rec->start, rec->max_size,
8225 BTRFS_FIRST_FREE_OBJECTID :
8231 fprintf(stderr, "adding new data backref"
8232 " on %llu %s %llu owner %llu"
8233 " offset %llu found %d\n",
8234 (unsigned long long)rec->start,
8235 back->full_backref ?
8237 back->full_backref ?
8238 (unsigned long long)parent :
8239 (unsigned long long)dback->root,
8240 (unsigned long long)dback->owner,
8241 (unsigned long long)dback->offset,
8245 struct tree_backref *tback;
8247 tback = to_tree_backref(back);
8248 if (back->full_backref)
8249 parent = tback->parent;
8253 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8254 rec->start, rec->max_size,
8255 parent, tback->root, 0, 0);
8256 fprintf(stderr, "adding new tree backref on "
8257 "start %llu len %llu parent %llu root %llu\n",
8258 rec->start, rec->max_size, parent, tback->root);
8261 btrfs_release_path(path);
8265 static struct extent_entry *find_entry(struct list_head *entries,
8266 u64 bytenr, u64 bytes)
8268 struct extent_entry *entry = NULL;
8270 list_for_each_entry(entry, entries, list) {
8271 if (entry->bytenr == bytenr && entry->bytes == bytes)
8278 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8280 struct extent_entry *entry, *best = NULL, *prev = NULL;
8282 list_for_each_entry(entry, entries, list) {
8284 * If there are as many broken entries as entries then we know
8285 * not to trust this particular entry.
8287 if (entry->broken == entry->count)
8291 * Special case, when there are only two entries and 'best' is
8301 * If our current entry == best then we can't be sure our best
8302 * is really the best, so we need to keep searching.
8304 if (best && best->count == entry->count) {
8310 /* Prev == entry, not good enough, have to keep searching */
8311 if (!prev->broken && prev->count == entry->count)
8315 best = (prev->count > entry->count) ? prev : entry;
8316 else if (best->count < entry->count)
8324 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8325 struct data_backref *dback, struct extent_entry *entry)
8327 struct btrfs_trans_handle *trans;
8328 struct btrfs_root *root;
8329 struct btrfs_file_extent_item *fi;
8330 struct extent_buffer *leaf;
8331 struct btrfs_key key;
8335 key.objectid = dback->root;
8336 key.type = BTRFS_ROOT_ITEM_KEY;
8337 key.offset = (u64)-1;
8338 root = btrfs_read_fs_root(info, &key);
8340 fprintf(stderr, "Couldn't find root for our ref\n");
8345 * The backref points to the original offset of the extent if it was
8346 * split, so we need to search down to the offset we have and then walk
8347 * forward until we find the backref we're looking for.
8349 key.objectid = dback->owner;
8350 key.type = BTRFS_EXTENT_DATA_KEY;
8351 key.offset = dback->offset;
8352 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8354 fprintf(stderr, "Error looking up ref %d\n", ret);
8359 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8360 ret = btrfs_next_leaf(root, path);
8362 fprintf(stderr, "Couldn't find our ref, next\n");
8366 leaf = path->nodes[0];
8367 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8368 if (key.objectid != dback->owner ||
8369 key.type != BTRFS_EXTENT_DATA_KEY) {
8370 fprintf(stderr, "Couldn't find our ref, search\n");
8373 fi = btrfs_item_ptr(leaf, path->slots[0],
8374 struct btrfs_file_extent_item);
8375 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8376 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8378 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8383 btrfs_release_path(path);
8385 trans = btrfs_start_transaction(root, 1);
8387 return PTR_ERR(trans);
8390 * Ok we have the key of the file extent we want to fix, now we can cow
8391 * down to the thing and fix it.
8393 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8395 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8396 key.objectid, key.type, key.offset, ret);
8400 fprintf(stderr, "Well that's odd, we just found this key "
8401 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8406 leaf = path->nodes[0];
8407 fi = btrfs_item_ptr(leaf, path->slots[0],
8408 struct btrfs_file_extent_item);
8410 if (btrfs_file_extent_compression(leaf, fi) &&
8411 dback->disk_bytenr != entry->bytenr) {
8412 fprintf(stderr, "Ref doesn't match the record start and is "
8413 "compressed, please take a btrfs-image of this file "
8414 "system and send it to a btrfs developer so they can "
8415 "complete this functionality for bytenr %Lu\n",
8416 dback->disk_bytenr);
8421 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8422 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8423 } else if (dback->disk_bytenr > entry->bytenr) {
8424 u64 off_diff, offset;
8426 off_diff = dback->disk_bytenr - entry->bytenr;
8427 offset = btrfs_file_extent_offset(leaf, fi);
8428 if (dback->disk_bytenr + offset +
8429 btrfs_file_extent_num_bytes(leaf, fi) >
8430 entry->bytenr + entry->bytes) {
8431 fprintf(stderr, "Ref is past the entry end, please "
8432 "take a btrfs-image of this file system and "
8433 "send it to a btrfs developer, ref %Lu\n",
8434 dback->disk_bytenr);
8439 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8440 btrfs_set_file_extent_offset(leaf, fi, offset);
8441 } else if (dback->disk_bytenr < entry->bytenr) {
8444 offset = btrfs_file_extent_offset(leaf, fi);
8445 if (dback->disk_bytenr + offset < entry->bytenr) {
8446 fprintf(stderr, "Ref is before the entry start, please"
8447 " take a btrfs-image of this file system and "
8448 "send it to a btrfs developer, ref %Lu\n",
8449 dback->disk_bytenr);
8454 offset += dback->disk_bytenr;
8455 offset -= entry->bytenr;
8456 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8457 btrfs_set_file_extent_offset(leaf, fi, offset);
8460 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8463 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8464 * only do this if we aren't using compression, otherwise it's a
8467 if (!btrfs_file_extent_compression(leaf, fi))
8468 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8470 printf("ram bytes may be wrong?\n");
8471 btrfs_mark_buffer_dirty(leaf);
8473 err = btrfs_commit_transaction(trans, root);
8474 btrfs_release_path(path);
8475 return ret ? ret : err;
8478 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8479 struct extent_record *rec)
8481 struct extent_backref *back;
8482 struct data_backref *dback;
8483 struct extent_entry *entry, *best = NULL;
8486 int broken_entries = 0;
8491 * Metadata is easy and the backrefs should always agree on bytenr and
8492 * size, if not we've got bigger issues.
8497 list_for_each_entry(back, &rec->backrefs, list) {
8498 if (back->full_backref || !back->is_data)
8501 dback = to_data_backref(back);
8504 * We only pay attention to backrefs that we found a real
8507 if (dback->found_ref == 0)
8511 * For now we only catch when the bytes don't match, not the
8512 * bytenr. We can easily do this at the same time, but I want
8513 * to have a fs image to test on before we just add repair
8514 * functionality willy-nilly so we know we won't screw up the
8518 entry = find_entry(&entries, dback->disk_bytenr,
8521 entry = malloc(sizeof(struct extent_entry));
8526 memset(entry, 0, sizeof(*entry));
8527 entry->bytenr = dback->disk_bytenr;
8528 entry->bytes = dback->bytes;
8529 list_add_tail(&entry->list, &entries);
8534 * If we only have on entry we may think the entries agree when
8535 * in reality they don't so we have to do some extra checking.
8537 if (dback->disk_bytenr != rec->start ||
8538 dback->bytes != rec->nr || back->broken)
8549 /* Yay all the backrefs agree, carry on good sir */
8550 if (nr_entries <= 1 && !mismatch)
8553 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8554 "%Lu\n", rec->start);
8557 * First we want to see if the backrefs can agree amongst themselves who
8558 * is right, so figure out which one of the entries has the highest
8561 best = find_most_right_entry(&entries);
8564 * Ok so we may have an even split between what the backrefs think, so
8565 * this is where we use the extent ref to see what it thinks.
8568 entry = find_entry(&entries, rec->start, rec->nr);
8569 if (!entry && (!broken_entries || !rec->found_rec)) {
8570 fprintf(stderr, "Backrefs don't agree with each other "
8571 "and extent record doesn't agree with anybody,"
8572 " so we can't fix bytenr %Lu bytes %Lu\n",
8573 rec->start, rec->nr);
8576 } else if (!entry) {
8578 * Ok our backrefs were broken, we'll assume this is the
8579 * correct value and add an entry for this range.
8581 entry = malloc(sizeof(struct extent_entry));
8586 memset(entry, 0, sizeof(*entry));
8587 entry->bytenr = rec->start;
8588 entry->bytes = rec->nr;
8589 list_add_tail(&entry->list, &entries);
8593 best = find_most_right_entry(&entries);
8595 fprintf(stderr, "Backrefs and extent record evenly "
8596 "split on who is right, this is going to "
8597 "require user input to fix bytenr %Lu bytes "
8598 "%Lu\n", rec->start, rec->nr);
8605 * I don't think this can happen currently as we'll abort() if we catch
8606 * this case higher up, but in case somebody removes that we still can't
8607 * deal with it properly here yet, so just bail out of that's the case.
8609 if (best->bytenr != rec->start) {
8610 fprintf(stderr, "Extent start and backref starts don't match, "
8611 "please use btrfs-image on this file system and send "
8612 "it to a btrfs developer so they can make fsck fix "
8613 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8614 rec->start, rec->nr);
8620 * Ok great we all agreed on an extent record, let's go find the real
8621 * references and fix up the ones that don't match.
8623 list_for_each_entry(back, &rec->backrefs, list) {
8624 if (back->full_backref || !back->is_data)
8627 dback = to_data_backref(back);
8630 * Still ignoring backrefs that don't have a real ref attached
8633 if (dback->found_ref == 0)
8636 if (dback->bytes == best->bytes &&
8637 dback->disk_bytenr == best->bytenr)
8640 ret = repair_ref(info, path, dback, best);
8646 * Ok we messed with the actual refs, which means we need to drop our
8647 * entire cache and go back and rescan. I know this is a huge pain and
8648 * adds a lot of extra work, but it's the only way to be safe. Once all
8649 * the backrefs agree we may not need to do anything to the extent
8654 while (!list_empty(&entries)) {
8655 entry = list_entry(entries.next, struct extent_entry, list);
8656 list_del_init(&entry->list);
8662 static int process_duplicates(struct cache_tree *extent_cache,
8663 struct extent_record *rec)
8665 struct extent_record *good, *tmp;
8666 struct cache_extent *cache;
8670 * If we found a extent record for this extent then return, or if we
8671 * have more than one duplicate we are likely going to need to delete
8674 if (rec->found_rec || rec->num_duplicates > 1)
8677 /* Shouldn't happen but just in case */
8678 BUG_ON(!rec->num_duplicates);
8681 * So this happens if we end up with a backref that doesn't match the
8682 * actual extent entry. So either the backref is bad or the extent
8683 * entry is bad. Either way we want to have the extent_record actually
8684 * reflect what we found in the extent_tree, so we need to take the
8685 * duplicate out and use that as the extent_record since the only way we
8686 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8688 remove_cache_extent(extent_cache, &rec->cache);
8690 good = to_extent_record(rec->dups.next);
8691 list_del_init(&good->list);
8692 INIT_LIST_HEAD(&good->backrefs);
8693 INIT_LIST_HEAD(&good->dups);
8694 good->cache.start = good->start;
8695 good->cache.size = good->nr;
8696 good->content_checked = 0;
8697 good->owner_ref_checked = 0;
8698 good->num_duplicates = 0;
8699 good->refs = rec->refs;
8700 list_splice_init(&rec->backrefs, &good->backrefs);
8702 cache = lookup_cache_extent(extent_cache, good->start,
8706 tmp = container_of(cache, struct extent_record, cache);
8709 * If we find another overlapping extent and it's found_rec is
8710 * set then it's a duplicate and we need to try and delete
8713 if (tmp->found_rec || tmp->num_duplicates > 0) {
8714 if (list_empty(&good->list))
8715 list_add_tail(&good->list,
8716 &duplicate_extents);
8717 good->num_duplicates += tmp->num_duplicates + 1;
8718 list_splice_init(&tmp->dups, &good->dups);
8719 list_del_init(&tmp->list);
8720 list_add_tail(&tmp->list, &good->dups);
8721 remove_cache_extent(extent_cache, &tmp->cache);
8726 * Ok we have another non extent item backed extent rec, so lets
8727 * just add it to this extent and carry on like we did above.
8729 good->refs += tmp->refs;
8730 list_splice_init(&tmp->backrefs, &good->backrefs);
8731 remove_cache_extent(extent_cache, &tmp->cache);
8734 ret = insert_cache_extent(extent_cache, &good->cache);
8737 return good->num_duplicates ? 0 : 1;
8740 static int delete_duplicate_records(struct btrfs_root *root,
8741 struct extent_record *rec)
8743 struct btrfs_trans_handle *trans;
8744 LIST_HEAD(delete_list);
8745 struct btrfs_path path;
8746 struct extent_record *tmp, *good, *n;
8749 struct btrfs_key key;
8751 btrfs_init_path(&path);
8754 /* Find the record that covers all of the duplicates. */
8755 list_for_each_entry(tmp, &rec->dups, list) {
8756 if (good->start < tmp->start)
8758 if (good->nr > tmp->nr)
8761 if (tmp->start + tmp->nr < good->start + good->nr) {
8762 fprintf(stderr, "Ok we have overlapping extents that "
8763 "aren't completely covered by each other, this "
8764 "is going to require more careful thought. "
8765 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8766 tmp->start, tmp->nr, good->start, good->nr);
8773 list_add_tail(&rec->list, &delete_list);
8775 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8778 list_move_tail(&tmp->list, &delete_list);
8781 root = root->fs_info->extent_root;
8782 trans = btrfs_start_transaction(root, 1);
8783 if (IS_ERR(trans)) {
8784 ret = PTR_ERR(trans);
8788 list_for_each_entry(tmp, &delete_list, list) {
8789 if (tmp->found_rec == 0)
8791 key.objectid = tmp->start;
8792 key.type = BTRFS_EXTENT_ITEM_KEY;
8793 key.offset = tmp->nr;
8795 /* Shouldn't happen but just in case */
8796 if (tmp->metadata) {
8797 fprintf(stderr, "Well this shouldn't happen, extent "
8798 "record overlaps but is metadata? "
8799 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8803 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8809 ret = btrfs_del_item(trans, root, &path);
8812 btrfs_release_path(&path);
8815 err = btrfs_commit_transaction(trans, root);
8819 while (!list_empty(&delete_list)) {
8820 tmp = to_extent_record(delete_list.next);
8821 list_del_init(&tmp->list);
8827 while (!list_empty(&rec->dups)) {
8828 tmp = to_extent_record(rec->dups.next);
8829 list_del_init(&tmp->list);
8833 btrfs_release_path(&path);
8835 if (!ret && !nr_del)
8836 rec->num_duplicates = 0;
8838 return ret ? ret : nr_del;
8841 static int find_possible_backrefs(struct btrfs_fs_info *info,
8842 struct btrfs_path *path,
8843 struct cache_tree *extent_cache,
8844 struct extent_record *rec)
8846 struct btrfs_root *root;
8847 struct extent_backref *back;
8848 struct data_backref *dback;
8849 struct cache_extent *cache;
8850 struct btrfs_file_extent_item *fi;
8851 struct btrfs_key key;
8855 list_for_each_entry(back, &rec->backrefs, list) {
8856 /* Don't care about full backrefs (poor unloved backrefs) */
8857 if (back->full_backref || !back->is_data)
8860 dback = to_data_backref(back);
8862 /* We found this one, we don't need to do a lookup */
8863 if (dback->found_ref)
8866 key.objectid = dback->root;
8867 key.type = BTRFS_ROOT_ITEM_KEY;
8868 key.offset = (u64)-1;
8870 root = btrfs_read_fs_root(info, &key);
8872 /* No root, definitely a bad ref, skip */
8873 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8875 /* Other err, exit */
8877 return PTR_ERR(root);
8879 key.objectid = dback->owner;
8880 key.type = BTRFS_EXTENT_DATA_KEY;
8881 key.offset = dback->offset;
8882 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8884 btrfs_release_path(path);
8887 /* Didn't find it, we can carry on */
8892 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8893 struct btrfs_file_extent_item);
8894 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8895 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8896 btrfs_release_path(path);
8897 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8899 struct extent_record *tmp;
8900 tmp = container_of(cache, struct extent_record, cache);
8903 * If we found an extent record for the bytenr for this
8904 * particular backref then we can't add it to our
8905 * current extent record. We only want to add backrefs
8906 * that don't have a corresponding extent item in the
8907 * extent tree since they likely belong to this record
8908 * and we need to fix it if it doesn't match bytenrs.
8914 dback->found_ref += 1;
8915 dback->disk_bytenr = bytenr;
8916 dback->bytes = bytes;
8919 * Set this so the verify backref code knows not to trust the
8920 * values in this backref.
8929 * Record orphan data ref into corresponding root.
8931 * Return 0 if the extent item contains data ref and recorded.
8932 * Return 1 if the extent item contains no useful data ref
8933 * On that case, it may contains only shared_dataref or metadata backref
8934 * or the file extent exists(this should be handled by the extent bytenr
8936 * Return <0 if something goes wrong.
8938 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8939 struct extent_record *rec)
8941 struct btrfs_key key;
8942 struct btrfs_root *dest_root;
8943 struct extent_backref *back;
8944 struct data_backref *dback;
8945 struct orphan_data_extent *orphan;
8946 struct btrfs_path path;
8947 int recorded_data_ref = 0;
8952 btrfs_init_path(&path);
8953 list_for_each_entry(back, &rec->backrefs, list) {
8954 if (back->full_backref || !back->is_data ||
8955 !back->found_extent_tree)
8957 dback = to_data_backref(back);
8958 if (dback->found_ref)
8960 key.objectid = dback->root;
8961 key.type = BTRFS_ROOT_ITEM_KEY;
8962 key.offset = (u64)-1;
8964 dest_root = btrfs_read_fs_root(fs_info, &key);
8966 /* For non-exist root we just skip it */
8967 if (IS_ERR(dest_root) || !dest_root)
8970 key.objectid = dback->owner;
8971 key.type = BTRFS_EXTENT_DATA_KEY;
8972 key.offset = dback->offset;
8974 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8975 btrfs_release_path(&path);
8977 * For ret < 0, it's OK since the fs-tree may be corrupted,
8978 * we need to record it for inode/file extent rebuild.
8979 * For ret > 0, we record it only for file extent rebuild.
8980 * For ret == 0, the file extent exists but only bytenr
8981 * mismatch, let the original bytenr fix routine to handle,
8987 orphan = malloc(sizeof(*orphan));
8992 INIT_LIST_HEAD(&orphan->list);
8993 orphan->root = dback->root;
8994 orphan->objectid = dback->owner;
8995 orphan->offset = dback->offset;
8996 orphan->disk_bytenr = rec->cache.start;
8997 orphan->disk_len = rec->cache.size;
8998 list_add(&dest_root->orphan_data_extents, &orphan->list);
8999 recorded_data_ref = 1;
9002 btrfs_release_path(&path);
9004 return !recorded_data_ref;
9010 * when an incorrect extent item is found, this will delete
9011 * all of the existing entries for it and recreate them
9012 * based on what the tree scan found.
9014 static int fixup_extent_refs(struct btrfs_fs_info *info,
9015 struct cache_tree *extent_cache,
9016 struct extent_record *rec)
9018 struct btrfs_trans_handle *trans = NULL;
9020 struct btrfs_path path;
9021 struct list_head *cur = rec->backrefs.next;
9022 struct cache_extent *cache;
9023 struct extent_backref *back;
9027 if (rec->flag_block_full_backref)
9028 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9030 btrfs_init_path(&path);
9031 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9033 * Sometimes the backrefs themselves are so broken they don't
9034 * get attached to any meaningful rec, so first go back and
9035 * check any of our backrefs that we couldn't find and throw
9036 * them into the list if we find the backref so that
9037 * verify_backrefs can figure out what to do.
9039 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9044 /* step one, make sure all of the backrefs agree */
9045 ret = verify_backrefs(info, &path, rec);
9049 trans = btrfs_start_transaction(info->extent_root, 1);
9050 if (IS_ERR(trans)) {
9051 ret = PTR_ERR(trans);
9055 /* step two, delete all the existing records */
9056 ret = delete_extent_records(trans, info->extent_root, &path,
9062 /* was this block corrupt? If so, don't add references to it */
9063 cache = lookup_cache_extent(info->corrupt_blocks,
9064 rec->start, rec->max_size);
9070 /* step three, recreate all the refs we did find */
9071 while(cur != &rec->backrefs) {
9072 back = to_extent_backref(cur);
9076 * if we didn't find any references, don't create a
9079 if (!back->found_ref)
9082 rec->bad_full_backref = 0;
9083 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9091 int err = btrfs_commit_transaction(trans, info->extent_root);
9097 fprintf(stderr, "Repaired extent references for %llu\n",
9098 (unsigned long long)rec->start);
9100 btrfs_release_path(&path);
9104 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9105 struct extent_record *rec)
9107 struct btrfs_trans_handle *trans;
9108 struct btrfs_root *root = fs_info->extent_root;
9109 struct btrfs_path path;
9110 struct btrfs_extent_item *ei;
9111 struct btrfs_key key;
9115 key.objectid = rec->start;
9116 if (rec->metadata) {
9117 key.type = BTRFS_METADATA_ITEM_KEY;
9118 key.offset = rec->info_level;
9120 key.type = BTRFS_EXTENT_ITEM_KEY;
9121 key.offset = rec->max_size;
9124 trans = btrfs_start_transaction(root, 0);
9126 return PTR_ERR(trans);
9128 btrfs_init_path(&path);
9129 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9131 btrfs_release_path(&path);
9132 btrfs_commit_transaction(trans, root);
9135 fprintf(stderr, "Didn't find extent for %llu\n",
9136 (unsigned long long)rec->start);
9137 btrfs_release_path(&path);
9138 btrfs_commit_transaction(trans, root);
9142 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9143 struct btrfs_extent_item);
9144 flags = btrfs_extent_flags(path.nodes[0], ei);
9145 if (rec->flag_block_full_backref) {
9146 fprintf(stderr, "setting full backref on %llu\n",
9147 (unsigned long long)key.objectid);
9148 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9150 fprintf(stderr, "clearing full backref on %llu\n",
9151 (unsigned long long)key.objectid);
9152 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9154 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9155 btrfs_mark_buffer_dirty(path.nodes[0]);
9156 btrfs_release_path(&path);
9157 ret = btrfs_commit_transaction(trans, root);
9159 fprintf(stderr, "Repaired extent flags for %llu\n",
9160 (unsigned long long)rec->start);
9165 /* right now we only prune from the extent allocation tree */
9166 static int prune_one_block(struct btrfs_trans_handle *trans,
9167 struct btrfs_fs_info *info,
9168 struct btrfs_corrupt_block *corrupt)
9171 struct btrfs_path path;
9172 struct extent_buffer *eb;
9176 int level = corrupt->level + 1;
9178 btrfs_init_path(&path);
9180 /* we want to stop at the parent to our busted block */
9181 path.lowest_level = level;
9183 ret = btrfs_search_slot(trans, info->extent_root,
9184 &corrupt->key, &path, -1, 1);
9189 eb = path.nodes[level];
9196 * hopefully the search gave us the block we want to prune,
9197 * lets try that first
9199 slot = path.slots[level];
9200 found = btrfs_node_blockptr(eb, slot);
9201 if (found == corrupt->cache.start)
9204 nritems = btrfs_header_nritems(eb);
9206 /* the search failed, lets scan this node and hope we find it */
9207 for (slot = 0; slot < nritems; slot++) {
9208 found = btrfs_node_blockptr(eb, slot);
9209 if (found == corrupt->cache.start)
9213 * we couldn't find the bad block. TODO, search all the nodes for pointers
9216 if (eb == info->extent_root->node) {
9221 btrfs_release_path(&path);
9226 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9227 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9230 btrfs_release_path(&path);
9234 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9236 struct btrfs_trans_handle *trans = NULL;
9237 struct cache_extent *cache;
9238 struct btrfs_corrupt_block *corrupt;
9241 cache = search_cache_extent(info->corrupt_blocks, 0);
9245 trans = btrfs_start_transaction(info->extent_root, 1);
9247 return PTR_ERR(trans);
9249 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9250 prune_one_block(trans, info, corrupt);
9251 remove_cache_extent(info->corrupt_blocks, cache);
9254 return btrfs_commit_transaction(trans, info->extent_root);
9258 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9260 struct btrfs_block_group_cache *cache;
9265 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9266 &start, &end, EXTENT_DIRTY);
9269 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9274 cache = btrfs_lookup_first_block_group(fs_info, start);
9279 start = cache->key.objectid + cache->key.offset;
9283 static int check_extent_refs(struct btrfs_root *root,
9284 struct cache_tree *extent_cache)
9286 struct extent_record *rec;
9287 struct cache_extent *cache;
9293 * if we're doing a repair, we have to make sure
9294 * we don't allocate from the problem extents.
9295 * In the worst case, this will be all the
9298 cache = search_cache_extent(extent_cache, 0);
9300 rec = container_of(cache, struct extent_record, cache);
9301 set_extent_dirty(root->fs_info->excluded_extents,
9303 rec->start + rec->max_size - 1);
9304 cache = next_cache_extent(cache);
9307 /* pin down all the corrupted blocks too */
9308 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9310 set_extent_dirty(root->fs_info->excluded_extents,
9312 cache->start + cache->size - 1);
9313 cache = next_cache_extent(cache);
9315 prune_corrupt_blocks(root->fs_info);
9316 reset_cached_block_groups(root->fs_info);
9319 reset_cached_block_groups(root->fs_info);
9322 * We need to delete any duplicate entries we find first otherwise we
9323 * could mess up the extent tree when we have backrefs that actually
9324 * belong to a different extent item and not the weird duplicate one.
9326 while (repair && !list_empty(&duplicate_extents)) {
9327 rec = to_extent_record(duplicate_extents.next);
9328 list_del_init(&rec->list);
9330 /* Sometimes we can find a backref before we find an actual
9331 * extent, so we need to process it a little bit to see if there
9332 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9333 * if this is a backref screwup. If we need to delete stuff
9334 * process_duplicates() will return 0, otherwise it will return
9337 if (process_duplicates(extent_cache, rec))
9339 ret = delete_duplicate_records(root, rec);
9343 * delete_duplicate_records will return the number of entries
9344 * deleted, so if it's greater than 0 then we know we actually
9345 * did something and we need to remove.
9358 cache = search_cache_extent(extent_cache, 0);
9361 rec = container_of(cache, struct extent_record, cache);
9362 if (rec->num_duplicates) {
9363 fprintf(stderr, "extent item %llu has multiple extent "
9364 "items\n", (unsigned long long)rec->start);
9368 if (rec->refs != rec->extent_item_refs) {
9369 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9370 (unsigned long long)rec->start,
9371 (unsigned long long)rec->nr);
9372 fprintf(stderr, "extent item %llu, found %llu\n",
9373 (unsigned long long)rec->extent_item_refs,
9374 (unsigned long long)rec->refs);
9375 ret = record_orphan_data_extents(root->fs_info, rec);
9381 if (all_backpointers_checked(rec, 1)) {
9382 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9383 (unsigned long long)rec->start,
9384 (unsigned long long)rec->nr);
9388 if (!rec->owner_ref_checked) {
9389 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9390 (unsigned long long)rec->start,
9391 (unsigned long long)rec->nr);
9396 if (repair && fix) {
9397 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9403 if (rec->bad_full_backref) {
9404 fprintf(stderr, "bad full backref, on [%llu]\n",
9405 (unsigned long long)rec->start);
9407 ret = fixup_extent_flags(root->fs_info, rec);
9415 * Although it's not a extent ref's problem, we reuse this
9416 * routine for error reporting.
9417 * No repair function yet.
9419 if (rec->crossing_stripes) {
9421 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9422 rec->start, rec->start + rec->max_size);
9426 if (rec->wrong_chunk_type) {
9428 "bad extent [%llu, %llu), type mismatch with chunk\n",
9429 rec->start, rec->start + rec->max_size);
9433 remove_cache_extent(extent_cache, cache);
9434 free_all_extent_backrefs(rec);
9435 if (!init_extent_tree && repair && (!cur_err || fix))
9436 clear_extent_dirty(root->fs_info->excluded_extents,
9438 rec->start + rec->max_size - 1);
9443 if (ret && ret != -EAGAIN) {
9444 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9447 struct btrfs_trans_handle *trans;
9449 root = root->fs_info->extent_root;
9450 trans = btrfs_start_transaction(root, 1);
9451 if (IS_ERR(trans)) {
9452 ret = PTR_ERR(trans);
9456 btrfs_fix_block_accounting(trans, root);
9457 ret = btrfs_commit_transaction(trans, root);
9466 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9470 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9471 stripe_size = length;
9472 stripe_size /= num_stripes;
9473 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9474 stripe_size = length * 2;
9475 stripe_size /= num_stripes;
9476 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9477 stripe_size = length;
9478 stripe_size /= (num_stripes - 1);
9479 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9480 stripe_size = length;
9481 stripe_size /= (num_stripes - 2);
9483 stripe_size = length;
9489 * Check the chunk with its block group/dev list ref:
9490 * Return 0 if all refs seems valid.
9491 * Return 1 if part of refs seems valid, need later check for rebuild ref
9492 * like missing block group and needs to search extent tree to rebuild them.
9493 * Return -1 if essential refs are missing and unable to rebuild.
9495 static int check_chunk_refs(struct chunk_record *chunk_rec,
9496 struct block_group_tree *block_group_cache,
9497 struct device_extent_tree *dev_extent_cache,
9500 struct cache_extent *block_group_item;
9501 struct block_group_record *block_group_rec;
9502 struct cache_extent *dev_extent_item;
9503 struct device_extent_record *dev_extent_rec;
9507 int metadump_v2 = 0;
9511 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9514 if (block_group_item) {
9515 block_group_rec = container_of(block_group_item,
9516 struct block_group_record,
9518 if (chunk_rec->length != block_group_rec->offset ||
9519 chunk_rec->offset != block_group_rec->objectid ||
9521 chunk_rec->type_flags != block_group_rec->flags)) {
9524 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9525 chunk_rec->objectid,
9530 chunk_rec->type_flags,
9531 block_group_rec->objectid,
9532 block_group_rec->type,
9533 block_group_rec->offset,
9534 block_group_rec->offset,
9535 block_group_rec->objectid,
9536 block_group_rec->flags);
9539 list_del_init(&block_group_rec->list);
9540 chunk_rec->bg_rec = block_group_rec;
9545 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9546 chunk_rec->objectid,
9551 chunk_rec->type_flags);
9558 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9559 chunk_rec->num_stripes);
9560 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9561 devid = chunk_rec->stripes[i].devid;
9562 offset = chunk_rec->stripes[i].offset;
9563 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9564 devid, offset, length);
9565 if (dev_extent_item) {
9566 dev_extent_rec = container_of(dev_extent_item,
9567 struct device_extent_record,
9569 if (dev_extent_rec->objectid != devid ||
9570 dev_extent_rec->offset != offset ||
9571 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9572 dev_extent_rec->length != length) {
9575 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9576 chunk_rec->objectid,
9579 chunk_rec->stripes[i].devid,
9580 chunk_rec->stripes[i].offset,
9581 dev_extent_rec->objectid,
9582 dev_extent_rec->offset,
9583 dev_extent_rec->length);
9586 list_move(&dev_extent_rec->chunk_list,
9587 &chunk_rec->dextents);
9592 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9593 chunk_rec->objectid,
9596 chunk_rec->stripes[i].devid,
9597 chunk_rec->stripes[i].offset);
9604 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9605 int check_chunks(struct cache_tree *chunk_cache,
9606 struct block_group_tree *block_group_cache,
9607 struct device_extent_tree *dev_extent_cache,
9608 struct list_head *good, struct list_head *bad,
9609 struct list_head *rebuild, int silent)
9611 struct cache_extent *chunk_item;
9612 struct chunk_record *chunk_rec;
9613 struct block_group_record *bg_rec;
9614 struct device_extent_record *dext_rec;
9618 chunk_item = first_cache_extent(chunk_cache);
9619 while (chunk_item) {
9620 chunk_rec = container_of(chunk_item, struct chunk_record,
9622 err = check_chunk_refs(chunk_rec, block_group_cache,
9623 dev_extent_cache, silent);
9626 if (err == 0 && good)
9627 list_add_tail(&chunk_rec->list, good);
9628 if (err > 0 && rebuild)
9629 list_add_tail(&chunk_rec->list, rebuild);
9631 list_add_tail(&chunk_rec->list, bad);
9632 chunk_item = next_cache_extent(chunk_item);
9635 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9638 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9646 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9650 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9661 static int check_device_used(struct device_record *dev_rec,
9662 struct device_extent_tree *dext_cache)
9664 struct cache_extent *cache;
9665 struct device_extent_record *dev_extent_rec;
9668 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9670 dev_extent_rec = container_of(cache,
9671 struct device_extent_record,
9673 if (dev_extent_rec->objectid != dev_rec->devid)
9676 list_del_init(&dev_extent_rec->device_list);
9677 total_byte += dev_extent_rec->length;
9678 cache = next_cache_extent(cache);
9681 if (total_byte != dev_rec->byte_used) {
9683 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9684 total_byte, dev_rec->byte_used, dev_rec->objectid,
9685 dev_rec->type, dev_rec->offset);
9692 /* check btrfs_dev_item -> btrfs_dev_extent */
9693 static int check_devices(struct rb_root *dev_cache,
9694 struct device_extent_tree *dev_extent_cache)
9696 struct rb_node *dev_node;
9697 struct device_record *dev_rec;
9698 struct device_extent_record *dext_rec;
9702 dev_node = rb_first(dev_cache);
9704 dev_rec = container_of(dev_node, struct device_record, node);
9705 err = check_device_used(dev_rec, dev_extent_cache);
9709 dev_node = rb_next(dev_node);
9711 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9714 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9715 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9722 static int add_root_item_to_list(struct list_head *head,
9723 u64 objectid, u64 bytenr, u64 last_snapshot,
9724 u8 level, u8 drop_level,
9725 int level_size, struct btrfs_key *drop_key)
9728 struct root_item_record *ri_rec;
9729 ri_rec = malloc(sizeof(*ri_rec));
9732 ri_rec->bytenr = bytenr;
9733 ri_rec->objectid = objectid;
9734 ri_rec->level = level;
9735 ri_rec->level_size = level_size;
9736 ri_rec->drop_level = drop_level;
9737 ri_rec->last_snapshot = last_snapshot;
9739 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9740 list_add_tail(&ri_rec->list, head);
9745 static void free_root_item_list(struct list_head *list)
9747 struct root_item_record *ri_rec;
9749 while (!list_empty(list)) {
9750 ri_rec = list_first_entry(list, struct root_item_record,
9752 list_del_init(&ri_rec->list);
9757 static int deal_root_from_list(struct list_head *list,
9758 struct btrfs_root *root,
9759 struct block_info *bits,
9761 struct cache_tree *pending,
9762 struct cache_tree *seen,
9763 struct cache_tree *reada,
9764 struct cache_tree *nodes,
9765 struct cache_tree *extent_cache,
9766 struct cache_tree *chunk_cache,
9767 struct rb_root *dev_cache,
9768 struct block_group_tree *block_group_cache,
9769 struct device_extent_tree *dev_extent_cache)
9774 while (!list_empty(list)) {
9775 struct root_item_record *rec;
9776 struct extent_buffer *buf;
9777 rec = list_entry(list->next,
9778 struct root_item_record, list);
9780 buf = read_tree_block(root->fs_info->tree_root,
9781 rec->bytenr, rec->level_size, 0);
9782 if (!extent_buffer_uptodate(buf)) {
9783 free_extent_buffer(buf);
9787 ret = add_root_to_pending(buf, extent_cache, pending,
9788 seen, nodes, rec->objectid);
9792 * To rebuild extent tree, we need deal with snapshot
9793 * one by one, otherwise we deal with node firstly which
9794 * can maximize readahead.
9797 ret = run_next_block(root, bits, bits_nr, &last,
9798 pending, seen, reada, nodes,
9799 extent_cache, chunk_cache,
9800 dev_cache, block_group_cache,
9801 dev_extent_cache, rec);
9805 free_extent_buffer(buf);
9806 list_del(&rec->list);
9812 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9813 reada, nodes, extent_cache, chunk_cache,
9814 dev_cache, block_group_cache,
9815 dev_extent_cache, NULL);
9825 static int check_chunks_and_extents(struct btrfs_root *root)
9827 struct rb_root dev_cache;
9828 struct cache_tree chunk_cache;
9829 struct block_group_tree block_group_cache;
9830 struct device_extent_tree dev_extent_cache;
9831 struct cache_tree extent_cache;
9832 struct cache_tree seen;
9833 struct cache_tree pending;
9834 struct cache_tree reada;
9835 struct cache_tree nodes;
9836 struct extent_io_tree excluded_extents;
9837 struct cache_tree corrupt_blocks;
9838 struct btrfs_path path;
9839 struct btrfs_key key;
9840 struct btrfs_key found_key;
9842 struct block_info *bits;
9844 struct extent_buffer *leaf;
9846 struct btrfs_root_item ri;
9847 struct list_head dropping_trees;
9848 struct list_head normal_trees;
9849 struct btrfs_root *root1;
9854 dev_cache = RB_ROOT;
9855 cache_tree_init(&chunk_cache);
9856 block_group_tree_init(&block_group_cache);
9857 device_extent_tree_init(&dev_extent_cache);
9859 cache_tree_init(&extent_cache);
9860 cache_tree_init(&seen);
9861 cache_tree_init(&pending);
9862 cache_tree_init(&nodes);
9863 cache_tree_init(&reada);
9864 cache_tree_init(&corrupt_blocks);
9865 extent_io_tree_init(&excluded_extents);
9866 INIT_LIST_HEAD(&dropping_trees);
9867 INIT_LIST_HEAD(&normal_trees);
9870 root->fs_info->excluded_extents = &excluded_extents;
9871 root->fs_info->fsck_extent_cache = &extent_cache;
9872 root->fs_info->free_extent_hook = free_extent_hook;
9873 root->fs_info->corrupt_blocks = &corrupt_blocks;
9877 bits = malloc(bits_nr * sizeof(struct block_info));
9883 if (ctx.progress_enabled) {
9884 ctx.tp = TASK_EXTENTS;
9885 task_start(ctx.info);
9889 root1 = root->fs_info->tree_root;
9890 level = btrfs_header_level(root1->node);
9891 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9892 root1->node->start, 0, level, 0,
9893 root1->nodesize, NULL);
9896 root1 = root->fs_info->chunk_root;
9897 level = btrfs_header_level(root1->node);
9898 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9899 root1->node->start, 0, level, 0,
9900 root1->nodesize, NULL);
9903 btrfs_init_path(&path);
9906 key.type = BTRFS_ROOT_ITEM_KEY;
9907 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9912 leaf = path.nodes[0];
9913 slot = path.slots[0];
9914 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9915 ret = btrfs_next_leaf(root, &path);
9918 leaf = path.nodes[0];
9919 slot = path.slots[0];
9921 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9922 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9923 unsigned long offset;
9926 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9927 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9928 last_snapshot = btrfs_root_last_snapshot(&ri);
9929 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9930 level = btrfs_root_level(&ri);
9931 level_size = root->nodesize;
9932 ret = add_root_item_to_list(&normal_trees,
9934 btrfs_root_bytenr(&ri),
9935 last_snapshot, level,
9936 0, level_size, NULL);
9940 level = btrfs_root_level(&ri);
9941 level_size = root->nodesize;
9942 objectid = found_key.objectid;
9943 btrfs_disk_key_to_cpu(&found_key,
9945 ret = add_root_item_to_list(&dropping_trees,
9947 btrfs_root_bytenr(&ri),
9948 last_snapshot, level,
9950 level_size, &found_key);
9957 btrfs_release_path(&path);
9960 * check_block can return -EAGAIN if it fixes something, please keep
9961 * this in mind when dealing with return values from these functions, if
9962 * we get -EAGAIN we want to fall through and restart the loop.
9964 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9965 &seen, &reada, &nodes, &extent_cache,
9966 &chunk_cache, &dev_cache, &block_group_cache,
9973 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9974 &pending, &seen, &reada, &nodes,
9975 &extent_cache, &chunk_cache, &dev_cache,
9976 &block_group_cache, &dev_extent_cache);
9983 ret = check_chunks(&chunk_cache, &block_group_cache,
9984 &dev_extent_cache, NULL, NULL, NULL, 0);
9991 ret = check_extent_refs(root, &extent_cache);
9998 ret = check_devices(&dev_cache, &dev_extent_cache);
10003 task_stop(ctx.info);
10005 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10006 extent_io_tree_cleanup(&excluded_extents);
10007 root->fs_info->fsck_extent_cache = NULL;
10008 root->fs_info->free_extent_hook = NULL;
10009 root->fs_info->corrupt_blocks = NULL;
10010 root->fs_info->excluded_extents = NULL;
10013 free_chunk_cache_tree(&chunk_cache);
10014 free_device_cache_tree(&dev_cache);
10015 free_block_group_tree(&block_group_cache);
10016 free_device_extent_tree(&dev_extent_cache);
10017 free_extent_cache_tree(&seen);
10018 free_extent_cache_tree(&pending);
10019 free_extent_cache_tree(&reada);
10020 free_extent_cache_tree(&nodes);
10021 free_root_item_list(&normal_trees);
10022 free_root_item_list(&dropping_trees);
10025 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10026 free_extent_cache_tree(&seen);
10027 free_extent_cache_tree(&pending);
10028 free_extent_cache_tree(&reada);
10029 free_extent_cache_tree(&nodes);
10030 free_chunk_cache_tree(&chunk_cache);
10031 free_block_group_tree(&block_group_cache);
10032 free_device_cache_tree(&dev_cache);
10033 free_device_extent_tree(&dev_extent_cache);
10034 free_extent_record_cache(&extent_cache);
10035 free_root_item_list(&normal_trees);
10036 free_root_item_list(&dropping_trees);
10037 extent_io_tree_cleanup(&excluded_extents);
10042 * Check backrefs of a tree block given by @bytenr or @eb.
10044 * @root: the root containing the @bytenr or @eb
10045 * @eb: tree block extent buffer, can be NULL
10046 * @bytenr: bytenr of the tree block to search
10047 * @level: tree level of the tree block
10048 * @owner: owner of the tree block
10050 * Return >0 for any error found and output error message
10051 * Return 0 for no error found
10053 static int check_tree_block_ref(struct btrfs_root *root,
10054 struct extent_buffer *eb, u64 bytenr,
10055 int level, u64 owner)
10057 struct btrfs_key key;
10058 struct btrfs_root *extent_root = root->fs_info->extent_root;
10059 struct btrfs_path path;
10060 struct btrfs_extent_item *ei;
10061 struct btrfs_extent_inline_ref *iref;
10062 struct extent_buffer *leaf;
10068 u32 nodesize = root->nodesize;
10071 int tree_reloc_root = 0;
10076 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10077 btrfs_header_bytenr(root->node) == bytenr)
10078 tree_reloc_root = 1;
10080 btrfs_init_path(&path);
10081 key.objectid = bytenr;
10082 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10083 key.type = BTRFS_METADATA_ITEM_KEY;
10085 key.type = BTRFS_EXTENT_ITEM_KEY;
10086 key.offset = (u64)-1;
10088 /* Search for the backref in extent tree */
10089 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10091 err |= BACKREF_MISSING;
10094 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10096 err |= BACKREF_MISSING;
10100 leaf = path.nodes[0];
10101 slot = path.slots[0];
10102 btrfs_item_key_to_cpu(leaf, &key, slot);
10104 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10106 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10107 skinny_level = (int)key.offset;
10108 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10110 struct btrfs_tree_block_info *info;
10112 info = (struct btrfs_tree_block_info *)(ei + 1);
10113 skinny_level = btrfs_tree_block_level(leaf, info);
10114 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10121 if (!(btrfs_extent_flags(leaf, ei) &
10122 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10124 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10125 key.objectid, nodesize,
10126 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10127 err = BACKREF_MISMATCH;
10129 header_gen = btrfs_header_generation(eb);
10130 extent_gen = btrfs_extent_generation(leaf, ei);
10131 if (header_gen != extent_gen) {
10133 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10134 key.objectid, nodesize, header_gen,
10136 err = BACKREF_MISMATCH;
10138 if (level != skinny_level) {
10140 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10141 key.objectid, nodesize, level, skinny_level);
10142 err = BACKREF_MISMATCH;
10144 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10146 "extent[%llu %u] is referred by other roots than %llu",
10147 key.objectid, nodesize, root->objectid);
10148 err = BACKREF_MISMATCH;
10153 * Iterate the extent/metadata item to find the exact backref
10155 item_size = btrfs_item_size_nr(leaf, slot);
10156 ptr = (unsigned long)iref;
10157 end = (unsigned long)ei + item_size;
10158 while (ptr < end) {
10159 iref = (struct btrfs_extent_inline_ref *)ptr;
10160 type = btrfs_extent_inline_ref_type(leaf, iref);
10161 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10163 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10164 (offset == root->objectid || offset == owner)) {
10166 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10168 * Backref of tree reloc root points to itself, no need
10169 * to check backref any more.
10171 if (tree_reloc_root)
10174 /* Check if the backref points to valid referencer */
10175 found_ref = !check_tree_block_ref(root, NULL,
10176 offset, level + 1, owner);
10181 ptr += btrfs_extent_inline_ref_size(type);
10185 * Inlined extent item doesn't have what we need, check
10186 * TREE_BLOCK_REF_KEY
10189 btrfs_release_path(&path);
10190 key.objectid = bytenr;
10191 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10192 key.offset = root->objectid;
10194 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10199 err |= BACKREF_MISSING;
10201 btrfs_release_path(&path);
10202 if (eb && (err & BACKREF_MISSING))
10203 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10204 bytenr, nodesize, owner, level);
10209 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10211 * Return >0 any error found and output error message
10212 * Return 0 for no error found
10214 static int check_extent_data_item(struct btrfs_root *root,
10215 struct extent_buffer *eb, int slot)
10217 struct btrfs_file_extent_item *fi;
10218 struct btrfs_path path;
10219 struct btrfs_root *extent_root = root->fs_info->extent_root;
10220 struct btrfs_key fi_key;
10221 struct btrfs_key dbref_key;
10222 struct extent_buffer *leaf;
10223 struct btrfs_extent_item *ei;
10224 struct btrfs_extent_inline_ref *iref;
10225 struct btrfs_extent_data_ref *dref;
10228 u64 disk_num_bytes;
10229 u64 extent_num_bytes;
10236 int found_dbackref = 0;
10240 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10241 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10243 /* Nothing to check for hole and inline data extents */
10244 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10245 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10248 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10249 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10250 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10252 /* Check unaligned disk_num_bytes and num_bytes */
10253 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10255 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10256 fi_key.objectid, fi_key.offset, disk_num_bytes,
10258 err |= BYTES_UNALIGNED;
10260 data_bytes_allocated += disk_num_bytes;
10262 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10264 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10265 fi_key.objectid, fi_key.offset, extent_num_bytes,
10267 err |= BYTES_UNALIGNED;
10269 data_bytes_referenced += extent_num_bytes;
10271 owner = btrfs_header_owner(eb);
10273 /* Check the extent item of the file extent in extent tree */
10274 btrfs_init_path(&path);
10275 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10276 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10277 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10279 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10283 leaf = path.nodes[0];
10284 slot = path.slots[0];
10285 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10287 extent_flags = btrfs_extent_flags(leaf, ei);
10289 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10291 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10292 disk_bytenr, disk_num_bytes,
10293 BTRFS_EXTENT_FLAG_DATA);
10294 err |= BACKREF_MISMATCH;
10297 /* Check data backref inside that extent item */
10298 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10299 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10300 ptr = (unsigned long)iref;
10301 end = (unsigned long)ei + item_size;
10302 while (ptr < end) {
10303 iref = (struct btrfs_extent_inline_ref *)ptr;
10304 type = btrfs_extent_inline_ref_type(leaf, iref);
10305 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10307 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10308 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10309 if (ref_root == owner || ref_root == root->objectid)
10310 found_dbackref = 1;
10311 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10312 found_dbackref = !check_tree_block_ref(root, NULL,
10313 btrfs_extent_inline_ref_offset(leaf, iref),
10317 if (found_dbackref)
10319 ptr += btrfs_extent_inline_ref_size(type);
10322 if (!found_dbackref) {
10323 btrfs_release_path(&path);
10325 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10326 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10327 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10328 dbref_key.offset = hash_extent_data_ref(root->objectid,
10329 fi_key.objectid, fi_key.offset);
10331 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10332 &dbref_key, &path, 0, 0);
10334 found_dbackref = 1;
10338 btrfs_release_path(&path);
10341 * Neither inlined nor EXTENT_DATA_REF found, try
10342 * SHARED_DATA_REF as last chance.
10344 dbref_key.objectid = disk_bytenr;
10345 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10346 dbref_key.offset = eb->start;
10348 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10349 &dbref_key, &path, 0, 0);
10351 found_dbackref = 1;
10357 if (!found_dbackref)
10358 err |= BACKREF_MISSING;
10359 btrfs_release_path(&path);
10360 if (err & BACKREF_MISSING) {
10361 error("data extent[%llu %llu] backref lost",
10362 disk_bytenr, disk_num_bytes);
10368 * Get real tree block level for the case like shared block
10369 * Return >= 0 as tree level
10370 * Return <0 for error
10372 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10374 struct extent_buffer *eb;
10375 struct btrfs_path path;
10376 struct btrfs_key key;
10377 struct btrfs_extent_item *ei;
10380 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10385 /* Search extent tree for extent generation and level */
10386 key.objectid = bytenr;
10387 key.type = BTRFS_METADATA_ITEM_KEY;
10388 key.offset = (u64)-1;
10390 btrfs_init_path(&path);
10391 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10394 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10402 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10403 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10404 struct btrfs_extent_item);
10405 flags = btrfs_extent_flags(path.nodes[0], ei);
10406 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10411 /* Get transid for later read_tree_block() check */
10412 transid = btrfs_extent_generation(path.nodes[0], ei);
10414 /* Get backref level as one source */
10415 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10416 backref_level = key.offset;
10418 struct btrfs_tree_block_info *info;
10420 info = (struct btrfs_tree_block_info *)(ei + 1);
10421 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10423 btrfs_release_path(&path);
10425 /* Get level from tree block as an alternative source */
10426 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10427 if (!extent_buffer_uptodate(eb)) {
10428 free_extent_buffer(eb);
10431 header_level = btrfs_header_level(eb);
10432 free_extent_buffer(eb);
10434 if (header_level != backref_level)
10436 return header_level;
10439 btrfs_release_path(&path);
10444 * Check if a tree block backref is valid (points to a valid tree block)
10445 * if level == -1, level will be resolved
10446 * Return >0 for any error found and print error message
10448 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10449 u64 bytenr, int level)
10451 struct btrfs_root *root;
10452 struct btrfs_key key;
10453 struct btrfs_path path;
10454 struct extent_buffer *eb;
10455 struct extent_buffer *node;
10456 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10460 /* Query level for level == -1 special case */
10462 level = query_tree_block_level(fs_info, bytenr);
10464 err |= REFERENCER_MISSING;
10468 key.objectid = root_id;
10469 key.type = BTRFS_ROOT_ITEM_KEY;
10470 key.offset = (u64)-1;
10472 root = btrfs_read_fs_root(fs_info, &key);
10473 if (IS_ERR(root)) {
10474 err |= REFERENCER_MISSING;
10478 /* Read out the tree block to get item/node key */
10479 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10480 if (!extent_buffer_uptodate(eb)) {
10481 err |= REFERENCER_MISSING;
10482 free_extent_buffer(eb);
10486 /* Empty tree, no need to check key */
10487 if (!btrfs_header_nritems(eb) && !level) {
10488 free_extent_buffer(eb);
10493 btrfs_node_key_to_cpu(eb, &key, 0);
10495 btrfs_item_key_to_cpu(eb, &key, 0);
10497 free_extent_buffer(eb);
10499 btrfs_init_path(&path);
10500 path.lowest_level = level;
10501 /* Search with the first key, to ensure we can reach it */
10502 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10504 err |= REFERENCER_MISSING;
10508 node = path.nodes[level];
10509 if (btrfs_header_bytenr(node) != bytenr) {
10511 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10512 bytenr, nodesize, bytenr,
10513 btrfs_header_bytenr(node));
10514 err |= REFERENCER_MISMATCH;
10516 if (btrfs_header_level(node) != level) {
10518 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10519 bytenr, nodesize, level,
10520 btrfs_header_level(node));
10521 err |= REFERENCER_MISMATCH;
10525 btrfs_release_path(&path);
10527 if (err & REFERENCER_MISSING) {
10529 error("extent [%llu %d] lost referencer (owner: %llu)",
10530 bytenr, nodesize, root_id);
10533 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10534 bytenr, nodesize, root_id, level);
10541 * Check if tree block @eb is tree reloc root.
10542 * Return 0 if it's not or any problem happens
10543 * Return 1 if it's a tree reloc root
10545 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10546 struct extent_buffer *eb)
10548 struct btrfs_root *tree_reloc_root;
10549 struct btrfs_key key;
10550 u64 bytenr = btrfs_header_bytenr(eb);
10551 u64 owner = btrfs_header_owner(eb);
10554 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10555 key.offset = owner;
10556 key.type = BTRFS_ROOT_ITEM_KEY;
10558 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10559 if (IS_ERR(tree_reloc_root))
10562 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10564 btrfs_free_fs_root(tree_reloc_root);
10569 * Check referencer for shared block backref
10570 * If level == -1, this function will resolve the level.
10572 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10573 u64 parent, u64 bytenr, int level)
10575 struct extent_buffer *eb;
10576 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10578 int found_parent = 0;
10581 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10582 if (!extent_buffer_uptodate(eb))
10586 level = query_tree_block_level(fs_info, bytenr);
10590 /* It's possible it's a tree reloc root */
10591 if (parent == bytenr) {
10592 if (is_tree_reloc_root(fs_info, eb))
10597 if (level + 1 != btrfs_header_level(eb))
10600 nr = btrfs_header_nritems(eb);
10601 for (i = 0; i < nr; i++) {
10602 if (bytenr == btrfs_node_blockptr(eb, i)) {
10608 free_extent_buffer(eb);
10609 if (!found_parent) {
10611 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10612 bytenr, nodesize, parent, level);
10613 return REFERENCER_MISSING;
10619 * Check referencer for normal (inlined) data ref
10620 * If len == 0, it will be resolved by searching in extent tree
10622 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10623 u64 root_id, u64 objectid, u64 offset,
10624 u64 bytenr, u64 len, u32 count)
10626 struct btrfs_root *root;
10627 struct btrfs_root *extent_root = fs_info->extent_root;
10628 struct btrfs_key key;
10629 struct btrfs_path path;
10630 struct extent_buffer *leaf;
10631 struct btrfs_file_extent_item *fi;
10632 u32 found_count = 0;
10637 key.objectid = bytenr;
10638 key.type = BTRFS_EXTENT_ITEM_KEY;
10639 key.offset = (u64)-1;
10641 btrfs_init_path(&path);
10642 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10645 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10648 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10649 if (key.objectid != bytenr ||
10650 key.type != BTRFS_EXTENT_ITEM_KEY)
10653 btrfs_release_path(&path);
10655 key.objectid = root_id;
10656 key.type = BTRFS_ROOT_ITEM_KEY;
10657 key.offset = (u64)-1;
10658 btrfs_init_path(&path);
10660 root = btrfs_read_fs_root(fs_info, &key);
10664 key.objectid = objectid;
10665 key.type = BTRFS_EXTENT_DATA_KEY;
10667 * It can be nasty as data backref offset is
10668 * file offset - file extent offset, which is smaller or
10669 * equal to original backref offset. The only special case is
10670 * overflow. So we need to special check and do further search.
10672 key.offset = offset & (1ULL << 63) ? 0 : offset;
10674 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10679 * Search afterwards to get correct one
10680 * NOTE: As we must do a comprehensive check on the data backref to
10681 * make sure the dref count also matches, we must iterate all file
10682 * extents for that inode.
10685 leaf = path.nodes[0];
10686 slot = path.slots[0];
10688 if (slot >= btrfs_header_nritems(leaf))
10690 btrfs_item_key_to_cpu(leaf, &key, slot);
10691 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10693 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10695 * Except normal disk bytenr and disk num bytes, we still
10696 * need to do extra check on dbackref offset as
10697 * dbackref offset = file_offset - file_extent_offset
10699 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10700 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10701 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10706 ret = btrfs_next_item(root, &path);
10711 btrfs_release_path(&path);
10712 if (found_count != count) {
10714 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10715 bytenr, len, root_id, objectid, offset, count, found_count);
10716 return REFERENCER_MISSING;
10722 * Check if the referencer of a shared data backref exists
10724 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10725 u64 parent, u64 bytenr)
10727 struct extent_buffer *eb;
10728 struct btrfs_key key;
10729 struct btrfs_file_extent_item *fi;
10730 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10732 int found_parent = 0;
10735 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10736 if (!extent_buffer_uptodate(eb))
10739 nr = btrfs_header_nritems(eb);
10740 for (i = 0; i < nr; i++) {
10741 btrfs_item_key_to_cpu(eb, &key, i);
10742 if (key.type != BTRFS_EXTENT_DATA_KEY)
10745 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10746 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10749 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10756 free_extent_buffer(eb);
10757 if (!found_parent) {
10758 error("shared extent %llu referencer lost (parent: %llu)",
10760 return REFERENCER_MISSING;
10766 * This function will check a given extent item, including its backref and
10767 * itself (like crossing stripe boundary and type)
10769 * Since we don't use extent_record anymore, introduce new error bit
10771 static int check_extent_item(struct btrfs_fs_info *fs_info,
10772 struct extent_buffer *eb, int slot)
10774 struct btrfs_extent_item *ei;
10775 struct btrfs_extent_inline_ref *iref;
10776 struct btrfs_extent_data_ref *dref;
10780 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10781 u32 item_size = btrfs_item_size_nr(eb, slot);
10786 struct btrfs_key key;
10790 btrfs_item_key_to_cpu(eb, &key, slot);
10791 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10792 bytes_used += key.offset;
10794 bytes_used += nodesize;
10796 if (item_size < sizeof(*ei)) {
10798 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10799 * old thing when on disk format is still un-determined.
10800 * No need to care about it anymore
10802 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10806 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10807 flags = btrfs_extent_flags(eb, ei);
10809 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10811 if (metadata && check_crossing_stripes(global_info, key.objectid,
10813 error("bad metadata [%llu, %llu) crossing stripe boundary",
10814 key.objectid, key.objectid + nodesize);
10815 err |= CROSSING_STRIPE_BOUNDARY;
10818 ptr = (unsigned long)(ei + 1);
10820 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10821 /* Old EXTENT_ITEM metadata */
10822 struct btrfs_tree_block_info *info;
10824 info = (struct btrfs_tree_block_info *)ptr;
10825 level = btrfs_tree_block_level(eb, info);
10826 ptr += sizeof(struct btrfs_tree_block_info);
10828 /* New METADATA_ITEM */
10829 level = key.offset;
10831 end = (unsigned long)ei + item_size;
10834 /* Reached extent item end normally */
10838 /* Beyond extent item end, wrong item size */
10840 err |= ITEM_SIZE_MISMATCH;
10841 error("extent item at bytenr %llu slot %d has wrong size",
10846 /* Now check every backref in this extent item */
10847 iref = (struct btrfs_extent_inline_ref *)ptr;
10848 type = btrfs_extent_inline_ref_type(eb, iref);
10849 offset = btrfs_extent_inline_ref_offset(eb, iref);
10851 case BTRFS_TREE_BLOCK_REF_KEY:
10852 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10856 case BTRFS_SHARED_BLOCK_REF_KEY:
10857 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10861 case BTRFS_EXTENT_DATA_REF_KEY:
10862 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10863 ret = check_extent_data_backref(fs_info,
10864 btrfs_extent_data_ref_root(eb, dref),
10865 btrfs_extent_data_ref_objectid(eb, dref),
10866 btrfs_extent_data_ref_offset(eb, dref),
10867 key.objectid, key.offset,
10868 btrfs_extent_data_ref_count(eb, dref));
10871 case BTRFS_SHARED_DATA_REF_KEY:
10872 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10876 error("extent[%llu %d %llu] has unknown ref type: %d",
10877 key.objectid, key.type, key.offset, type);
10878 err |= UNKNOWN_TYPE;
10882 ptr += btrfs_extent_inline_ref_size(type);
10890 * Check if a dev extent item is referred correctly by its chunk
10892 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10893 struct extent_buffer *eb, int slot)
10895 struct btrfs_root *chunk_root = fs_info->chunk_root;
10896 struct btrfs_dev_extent *ptr;
10897 struct btrfs_path path;
10898 struct btrfs_key chunk_key;
10899 struct btrfs_key devext_key;
10900 struct btrfs_chunk *chunk;
10901 struct extent_buffer *l;
10905 int found_chunk = 0;
10908 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10909 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10910 length = btrfs_dev_extent_length(eb, ptr);
10912 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10913 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10914 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10916 btrfs_init_path(&path);
10917 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10922 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10923 if (btrfs_chunk_length(l, chunk) != length)
10926 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10927 for (i = 0; i < num_stripes; i++) {
10928 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10929 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10931 if (devid == devext_key.objectid &&
10932 offset == devext_key.offset) {
10938 btrfs_release_path(&path);
10939 if (!found_chunk) {
10941 "device extent[%llu, %llu, %llu] did not find the related chunk",
10942 devext_key.objectid, devext_key.offset, length);
10943 return REFERENCER_MISSING;
10949 * Check if the used space is correct with the dev item
10951 static int check_dev_item(struct btrfs_fs_info *fs_info,
10952 struct extent_buffer *eb, int slot)
10954 struct btrfs_root *dev_root = fs_info->dev_root;
10955 struct btrfs_dev_item *dev_item;
10956 struct btrfs_path path;
10957 struct btrfs_key key;
10958 struct btrfs_dev_extent *ptr;
10964 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10965 dev_id = btrfs_device_id(eb, dev_item);
10966 used = btrfs_device_bytes_used(eb, dev_item);
10968 key.objectid = dev_id;
10969 key.type = BTRFS_DEV_EXTENT_KEY;
10972 btrfs_init_path(&path);
10973 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10975 btrfs_item_key_to_cpu(eb, &key, slot);
10976 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10977 key.objectid, key.type, key.offset);
10978 btrfs_release_path(&path);
10979 return REFERENCER_MISSING;
10982 /* Iterate dev_extents to calculate the used space of a device */
10984 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10987 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10988 if (key.objectid > dev_id)
10990 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10993 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10994 struct btrfs_dev_extent);
10995 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10997 ret = btrfs_next_item(dev_root, &path);
11001 btrfs_release_path(&path);
11003 if (used != total) {
11004 btrfs_item_key_to_cpu(eb, &key, slot);
11006 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11007 total, used, BTRFS_ROOT_TREE_OBJECTID,
11008 BTRFS_DEV_EXTENT_KEY, dev_id);
11009 return ACCOUNTING_MISMATCH;
11015 * Check a block group item with its referener (chunk) and its used space
11016 * with extent/metadata item
11018 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11019 struct extent_buffer *eb, int slot)
11021 struct btrfs_root *extent_root = fs_info->extent_root;
11022 struct btrfs_root *chunk_root = fs_info->chunk_root;
11023 struct btrfs_block_group_item *bi;
11024 struct btrfs_block_group_item bg_item;
11025 struct btrfs_path path;
11026 struct btrfs_key bg_key;
11027 struct btrfs_key chunk_key;
11028 struct btrfs_key extent_key;
11029 struct btrfs_chunk *chunk;
11030 struct extent_buffer *leaf;
11031 struct btrfs_extent_item *ei;
11032 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11040 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11041 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11042 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11043 used = btrfs_block_group_used(&bg_item);
11044 bg_flags = btrfs_block_group_flags(&bg_item);
11046 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11047 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11048 chunk_key.offset = bg_key.objectid;
11050 btrfs_init_path(&path);
11051 /* Search for the referencer chunk */
11052 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11055 "block group[%llu %llu] did not find the related chunk item",
11056 bg_key.objectid, bg_key.offset);
11057 err |= REFERENCER_MISSING;
11059 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11060 struct btrfs_chunk);
11061 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11064 "block group[%llu %llu] related chunk item length does not match",
11065 bg_key.objectid, bg_key.offset);
11066 err |= REFERENCER_MISMATCH;
11069 btrfs_release_path(&path);
11071 /* Search from the block group bytenr */
11072 extent_key.objectid = bg_key.objectid;
11073 extent_key.type = 0;
11074 extent_key.offset = 0;
11076 btrfs_init_path(&path);
11077 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11081 /* Iterate extent tree to account used space */
11083 leaf = path.nodes[0];
11085 /* Search slot can point to the last item beyond leaf nritems */
11086 if (path.slots[0] >= btrfs_header_nritems(leaf))
11089 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11090 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11093 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11094 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11096 if (extent_key.objectid < bg_key.objectid)
11099 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11102 total += extent_key.offset;
11104 ei = btrfs_item_ptr(leaf, path.slots[0],
11105 struct btrfs_extent_item);
11106 flags = btrfs_extent_flags(leaf, ei);
11107 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11108 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11110 "bad extent[%llu, %llu) type mismatch with chunk",
11111 extent_key.objectid,
11112 extent_key.objectid + extent_key.offset);
11113 err |= CHUNK_TYPE_MISMATCH;
11115 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11116 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11117 BTRFS_BLOCK_GROUP_METADATA))) {
11119 "bad extent[%llu, %llu) type mismatch with chunk",
11120 extent_key.objectid,
11121 extent_key.objectid + nodesize);
11122 err |= CHUNK_TYPE_MISMATCH;
11126 ret = btrfs_next_item(extent_root, &path);
11132 btrfs_release_path(&path);
11134 if (total != used) {
11136 "block group[%llu %llu] used %llu but extent items used %llu",
11137 bg_key.objectid, bg_key.offset, used, total);
11138 err |= ACCOUNTING_MISMATCH;
11144 * Check a chunk item.
11145 * Including checking all referred dev_extents and block group
11147 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11148 struct extent_buffer *eb, int slot)
11150 struct btrfs_root *extent_root = fs_info->extent_root;
11151 struct btrfs_root *dev_root = fs_info->dev_root;
11152 struct btrfs_path path;
11153 struct btrfs_key chunk_key;
11154 struct btrfs_key bg_key;
11155 struct btrfs_key devext_key;
11156 struct btrfs_chunk *chunk;
11157 struct extent_buffer *leaf;
11158 struct btrfs_block_group_item *bi;
11159 struct btrfs_block_group_item bg_item;
11160 struct btrfs_dev_extent *ptr;
11161 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11173 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11174 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11175 length = btrfs_chunk_length(eb, chunk);
11176 chunk_end = chunk_key.offset + length;
11177 if (!IS_ALIGNED(length, sectorsize)) {
11178 error("chunk[%llu %llu) not aligned to %u",
11179 chunk_key.offset, chunk_end, sectorsize);
11180 err |= BYTES_UNALIGNED;
11184 type = btrfs_chunk_type(eb, chunk);
11185 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11186 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11187 error("chunk[%llu %llu) has no chunk type",
11188 chunk_key.offset, chunk_end);
11189 err |= UNKNOWN_TYPE;
11191 if (profile && (profile & (profile - 1))) {
11192 error("chunk[%llu %llu) multiple profiles detected: %llx",
11193 chunk_key.offset, chunk_end, profile);
11194 err |= UNKNOWN_TYPE;
11197 bg_key.objectid = chunk_key.offset;
11198 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11199 bg_key.offset = length;
11201 btrfs_init_path(&path);
11202 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11205 "chunk[%llu %llu) did not find the related block group item",
11206 chunk_key.offset, chunk_end);
11207 err |= REFERENCER_MISSING;
11209 leaf = path.nodes[0];
11210 bi = btrfs_item_ptr(leaf, path.slots[0],
11211 struct btrfs_block_group_item);
11212 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11214 if (btrfs_block_group_flags(&bg_item) != type) {
11216 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11217 chunk_key.offset, chunk_end, type,
11218 btrfs_block_group_flags(&bg_item));
11219 err |= REFERENCER_MISSING;
11223 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11224 for (i = 0; i < num_stripes; i++) {
11225 btrfs_release_path(&path);
11226 btrfs_init_path(&path);
11227 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11228 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11229 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11231 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11234 goto not_match_dev;
11236 leaf = path.nodes[0];
11237 ptr = btrfs_item_ptr(leaf, path.slots[0],
11238 struct btrfs_dev_extent);
11239 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11240 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11241 if (objectid != chunk_key.objectid ||
11242 offset != chunk_key.offset ||
11243 btrfs_dev_extent_length(leaf, ptr) != length)
11244 goto not_match_dev;
11247 err |= BACKREF_MISSING;
11249 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11250 chunk_key.objectid, chunk_end, i);
11253 btrfs_release_path(&path);
11259 * Main entry function to check known items and update related accounting info
11261 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11263 struct btrfs_fs_info *fs_info = root->fs_info;
11264 struct btrfs_key key;
11267 struct btrfs_extent_data_ref *dref;
11272 btrfs_item_key_to_cpu(eb, &key, slot);
11276 case BTRFS_EXTENT_DATA_KEY:
11277 ret = check_extent_data_item(root, eb, slot);
11280 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11281 ret = check_block_group_item(fs_info, eb, slot);
11284 case BTRFS_DEV_ITEM_KEY:
11285 ret = check_dev_item(fs_info, eb, slot);
11288 case BTRFS_CHUNK_ITEM_KEY:
11289 ret = check_chunk_item(fs_info, eb, slot);
11292 case BTRFS_DEV_EXTENT_KEY:
11293 ret = check_dev_extent_item(fs_info, eb, slot);
11296 case BTRFS_EXTENT_ITEM_KEY:
11297 case BTRFS_METADATA_ITEM_KEY:
11298 ret = check_extent_item(fs_info, eb, slot);
11301 case BTRFS_EXTENT_CSUM_KEY:
11302 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11304 case BTRFS_TREE_BLOCK_REF_KEY:
11305 ret = check_tree_block_backref(fs_info, key.offset,
11309 case BTRFS_EXTENT_DATA_REF_KEY:
11310 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11311 ret = check_extent_data_backref(fs_info,
11312 btrfs_extent_data_ref_root(eb, dref),
11313 btrfs_extent_data_ref_objectid(eb, dref),
11314 btrfs_extent_data_ref_offset(eb, dref),
11316 btrfs_extent_data_ref_count(eb, dref));
11319 case BTRFS_SHARED_BLOCK_REF_KEY:
11320 ret = check_shared_block_backref(fs_info, key.offset,
11324 case BTRFS_SHARED_DATA_REF_KEY:
11325 ret = check_shared_data_backref(fs_info, key.offset,
11333 if (++slot < btrfs_header_nritems(eb))
11340 * Helper function for later fs/subvol tree check. To determine if a tree
11341 * block should be checked.
11342 * This function will ensure only the direct referencer with lowest rootid to
11343 * check a fs/subvolume tree block.
11345 * Backref check at extent tree would detect errors like missing subvolume
11346 * tree, so we can do aggressive check to reduce duplicated checks.
11348 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11350 struct btrfs_root *extent_root = root->fs_info->extent_root;
11351 struct btrfs_key key;
11352 struct btrfs_path path;
11353 struct extent_buffer *leaf;
11355 struct btrfs_extent_item *ei;
11361 struct btrfs_extent_inline_ref *iref;
11364 btrfs_init_path(&path);
11365 key.objectid = btrfs_header_bytenr(eb);
11366 key.type = BTRFS_METADATA_ITEM_KEY;
11367 key.offset = (u64)-1;
11370 * Any failure in backref resolving means we can't determine
11371 * whom the tree block belongs to.
11372 * So in that case, we need to check that tree block
11374 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11378 ret = btrfs_previous_extent_item(extent_root, &path,
11379 btrfs_header_bytenr(eb));
11383 leaf = path.nodes[0];
11384 slot = path.slots[0];
11385 btrfs_item_key_to_cpu(leaf, &key, slot);
11386 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11388 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11389 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11391 struct btrfs_tree_block_info *info;
11393 info = (struct btrfs_tree_block_info *)(ei + 1);
11394 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11397 item_size = btrfs_item_size_nr(leaf, slot);
11398 ptr = (unsigned long)iref;
11399 end = (unsigned long)ei + item_size;
11400 while (ptr < end) {
11401 iref = (struct btrfs_extent_inline_ref *)ptr;
11402 type = btrfs_extent_inline_ref_type(leaf, iref);
11403 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11406 * We only check the tree block if current root is
11407 * the lowest referencer of it.
11409 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11410 offset < root->objectid) {
11411 btrfs_release_path(&path);
11415 ptr += btrfs_extent_inline_ref_size(type);
11418 * Normally we should also check keyed tree block ref, but that may be
11419 * very time consuming. Inlined ref should already make us skip a lot
11420 * of refs now. So skip search keyed tree block ref.
11424 btrfs_release_path(&path);
11429 * Traversal function for tree block. We will do:
11430 * 1) Skip shared fs/subvolume tree blocks
11431 * 2) Update related bytes accounting
11432 * 3) Pre-order traversal
11434 static int traverse_tree_block(struct btrfs_root *root,
11435 struct extent_buffer *node)
11437 struct extent_buffer *eb;
11438 struct btrfs_key key;
11439 struct btrfs_key drop_key;
11447 * Skip shared fs/subvolume tree block, in that case they will
11448 * be checked by referencer with lowest rootid
11450 if (is_fstree(root->objectid) && !should_check(root, node))
11453 /* Update bytes accounting */
11454 total_btree_bytes += node->len;
11455 if (fs_root_objectid(btrfs_header_owner(node)))
11456 total_fs_tree_bytes += node->len;
11457 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11458 total_extent_tree_bytes += node->len;
11459 if (!found_old_backref &&
11460 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11461 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11462 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11463 found_old_backref = 1;
11465 /* pre-order tranversal, check itself first */
11466 level = btrfs_header_level(node);
11467 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11468 btrfs_header_level(node),
11469 btrfs_header_owner(node));
11473 "check %s failed root %llu bytenr %llu level %d, force continue check",
11474 level ? "node":"leaf", root->objectid,
11475 btrfs_header_bytenr(node), btrfs_header_level(node));
11478 btree_space_waste += btrfs_leaf_free_space(root, node);
11479 ret = check_leaf_items(root, node);
11484 nr = btrfs_header_nritems(node);
11485 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11486 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11487 sizeof(struct btrfs_key_ptr);
11489 /* Then check all its children */
11490 for (i = 0; i < nr; i++) {
11491 u64 blocknr = btrfs_node_blockptr(node, i);
11493 btrfs_node_key_to_cpu(node, &key, i);
11494 if (level == root->root_item.drop_level &&
11495 is_dropped_key(&key, &drop_key))
11499 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11500 * to call the function itself.
11502 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11503 if (extent_buffer_uptodate(eb)) {
11504 ret = traverse_tree_block(root, eb);
11507 free_extent_buffer(eb);
11514 * Low memory usage version check_chunks_and_extents.
11516 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11518 struct btrfs_path path;
11519 struct btrfs_key key;
11520 struct btrfs_root *root1;
11521 struct btrfs_root *cur_root;
11525 root1 = root->fs_info->chunk_root;
11526 ret = traverse_tree_block(root1, root1->node);
11529 root1 = root->fs_info->tree_root;
11530 ret = traverse_tree_block(root1, root1->node);
11533 btrfs_init_path(&path);
11534 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11536 key.type = BTRFS_ROOT_ITEM_KEY;
11538 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11540 error("cannot find extent treet in tree_root");
11545 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11546 if (key.type != BTRFS_ROOT_ITEM_KEY)
11548 key.offset = (u64)-1;
11550 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11551 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11554 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11555 if (IS_ERR(cur_root) || !cur_root) {
11556 error("failed to read tree: %lld", key.objectid);
11560 ret = traverse_tree_block(cur_root, cur_root->node);
11563 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11564 btrfs_free_fs_root(cur_root);
11566 ret = btrfs_next_item(root1, &path);
11572 btrfs_release_path(&path);
11576 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11577 struct btrfs_root *root, int overwrite)
11579 struct extent_buffer *c;
11580 struct extent_buffer *old = root->node;
11583 struct btrfs_disk_key disk_key = {0,0,0};
11589 extent_buffer_get(c);
11592 c = btrfs_alloc_free_block(trans, root,
11594 root->root_key.objectid,
11595 &disk_key, level, 0, 0);
11598 extent_buffer_get(c);
11602 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11603 btrfs_set_header_level(c, level);
11604 btrfs_set_header_bytenr(c, c->start);
11605 btrfs_set_header_generation(c, trans->transid);
11606 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11607 btrfs_set_header_owner(c, root->root_key.objectid);
11609 write_extent_buffer(c, root->fs_info->fsid,
11610 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11612 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11613 btrfs_header_chunk_tree_uuid(c),
11616 btrfs_mark_buffer_dirty(c);
11618 * this case can happen in the following case:
11620 * 1.overwrite previous root.
11622 * 2.reinit reloc data root, this is because we skip pin
11623 * down reloc data tree before which means we can allocate
11624 * same block bytenr here.
11626 if (old->start == c->start) {
11627 btrfs_set_root_generation(&root->root_item,
11629 root->root_item.level = btrfs_header_level(root->node);
11630 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11631 &root->root_key, &root->root_item);
11633 free_extent_buffer(c);
11637 free_extent_buffer(old);
11639 add_root_to_dirty_list(root);
11643 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11644 struct extent_buffer *eb, int tree_root)
11646 struct extent_buffer *tmp;
11647 struct btrfs_root_item *ri;
11648 struct btrfs_key key;
11651 int level = btrfs_header_level(eb);
11657 * If we have pinned this block before, don't pin it again.
11658 * This can not only avoid forever loop with broken filesystem
11659 * but also give us some speedups.
11661 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11662 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11665 btrfs_pin_extent(fs_info, eb->start, eb->len);
11667 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11668 nritems = btrfs_header_nritems(eb);
11669 for (i = 0; i < nritems; i++) {
11671 btrfs_item_key_to_cpu(eb, &key, i);
11672 if (key.type != BTRFS_ROOT_ITEM_KEY)
11674 /* Skip the extent root and reloc roots */
11675 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11676 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11677 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11679 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11680 bytenr = btrfs_disk_root_bytenr(eb, ri);
11683 * If at any point we start needing the real root we
11684 * will have to build a stump root for the root we are
11685 * in, but for now this doesn't actually use the root so
11686 * just pass in extent_root.
11688 tmp = read_tree_block(fs_info->extent_root, bytenr,
11690 if (!extent_buffer_uptodate(tmp)) {
11691 fprintf(stderr, "Error reading root block\n");
11694 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11695 free_extent_buffer(tmp);
11699 bytenr = btrfs_node_blockptr(eb, i);
11701 /* If we aren't the tree root don't read the block */
11702 if (level == 1 && !tree_root) {
11703 btrfs_pin_extent(fs_info, bytenr, nodesize);
11707 tmp = read_tree_block(fs_info->extent_root, bytenr,
11709 if (!extent_buffer_uptodate(tmp)) {
11710 fprintf(stderr, "Error reading tree block\n");
11713 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11714 free_extent_buffer(tmp);
11723 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11727 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11731 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11734 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11736 struct btrfs_block_group_cache *cache;
11737 struct btrfs_path path;
11738 struct extent_buffer *leaf;
11739 struct btrfs_chunk *chunk;
11740 struct btrfs_key key;
11744 btrfs_init_path(&path);
11746 key.type = BTRFS_CHUNK_ITEM_KEY;
11748 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11750 btrfs_release_path(&path);
11755 * We do this in case the block groups were screwed up and had alloc
11756 * bits that aren't actually set on the chunks. This happens with
11757 * restored images every time and could happen in real life I guess.
11759 fs_info->avail_data_alloc_bits = 0;
11760 fs_info->avail_metadata_alloc_bits = 0;
11761 fs_info->avail_system_alloc_bits = 0;
11763 /* First we need to create the in-memory block groups */
11765 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11766 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11768 btrfs_release_path(&path);
11776 leaf = path.nodes[0];
11777 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11778 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11783 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11784 btrfs_add_block_group(fs_info, 0,
11785 btrfs_chunk_type(leaf, chunk),
11786 key.objectid, key.offset,
11787 btrfs_chunk_length(leaf, chunk));
11788 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11789 key.offset + btrfs_chunk_length(leaf, chunk));
11794 cache = btrfs_lookup_first_block_group(fs_info, start);
11798 start = cache->key.objectid + cache->key.offset;
11801 btrfs_release_path(&path);
11805 static int reset_balance(struct btrfs_trans_handle *trans,
11806 struct btrfs_fs_info *fs_info)
11808 struct btrfs_root *root = fs_info->tree_root;
11809 struct btrfs_path path;
11810 struct extent_buffer *leaf;
11811 struct btrfs_key key;
11812 int del_slot, del_nr = 0;
11816 btrfs_init_path(&path);
11817 key.objectid = BTRFS_BALANCE_OBJECTID;
11818 key.type = BTRFS_BALANCE_ITEM_KEY;
11820 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11825 goto reinit_data_reloc;
11830 ret = btrfs_del_item(trans, root, &path);
11833 btrfs_release_path(&path);
11835 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11836 key.type = BTRFS_ROOT_ITEM_KEY;
11838 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11842 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11847 ret = btrfs_del_items(trans, root, &path,
11854 btrfs_release_path(&path);
11857 ret = btrfs_search_slot(trans, root, &key, &path,
11864 leaf = path.nodes[0];
11865 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11866 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11868 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11873 del_slot = path.slots[0];
11882 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11886 btrfs_release_path(&path);
11889 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11890 key.type = BTRFS_ROOT_ITEM_KEY;
11891 key.offset = (u64)-1;
11892 root = btrfs_read_fs_root(fs_info, &key);
11893 if (IS_ERR(root)) {
11894 fprintf(stderr, "Error reading data reloc tree\n");
11895 ret = PTR_ERR(root);
11898 record_root_in_trans(trans, root);
11899 ret = btrfs_fsck_reinit_root(trans, root, 0);
11902 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11904 btrfs_release_path(&path);
11908 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11909 struct btrfs_fs_info *fs_info)
11915 * The only reason we don't do this is because right now we're just
11916 * walking the trees we find and pinning down their bytes, we don't look
11917 * at any of the leaves. In order to do mixed groups we'd have to check
11918 * the leaves of any fs roots and pin down the bytes for any file
11919 * extents we find. Not hard but why do it if we don't have to?
11921 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11922 fprintf(stderr, "We don't support re-initing the extent tree "
11923 "for mixed block groups yet, please notify a btrfs "
11924 "developer you want to do this so they can add this "
11925 "functionality.\n");
11930 * first we need to walk all of the trees except the extent tree and pin
11931 * down the bytes that are in use so we don't overwrite any existing
11934 ret = pin_metadata_blocks(fs_info);
11936 fprintf(stderr, "error pinning down used bytes\n");
11941 * Need to drop all the block groups since we're going to recreate all
11944 btrfs_free_block_groups(fs_info);
11945 ret = reset_block_groups(fs_info);
11947 fprintf(stderr, "error resetting the block groups\n");
11951 /* Ok we can allocate now, reinit the extent root */
11952 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11954 fprintf(stderr, "extent root initialization failed\n");
11956 * When the transaction code is updated we should end the
11957 * transaction, but for now progs only knows about commit so
11958 * just return an error.
11964 * Now we have all the in-memory block groups setup so we can make
11965 * allocations properly, and the metadata we care about is safe since we
11966 * pinned all of it above.
11969 struct btrfs_block_group_cache *cache;
11971 cache = btrfs_lookup_first_block_group(fs_info, start);
11974 start = cache->key.objectid + cache->key.offset;
11975 ret = btrfs_insert_item(trans, fs_info->extent_root,
11976 &cache->key, &cache->item,
11977 sizeof(cache->item));
11979 fprintf(stderr, "Error adding block group\n");
11982 btrfs_extent_post_op(trans, fs_info->extent_root);
11985 ret = reset_balance(trans, fs_info);
11987 fprintf(stderr, "error resetting the pending balance\n");
11992 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11994 struct btrfs_path path;
11995 struct btrfs_trans_handle *trans;
11996 struct btrfs_key key;
11999 printf("Recowing metadata block %llu\n", eb->start);
12000 key.objectid = btrfs_header_owner(eb);
12001 key.type = BTRFS_ROOT_ITEM_KEY;
12002 key.offset = (u64)-1;
12004 root = btrfs_read_fs_root(root->fs_info, &key);
12005 if (IS_ERR(root)) {
12006 fprintf(stderr, "Couldn't find owner root %llu\n",
12008 return PTR_ERR(root);
12011 trans = btrfs_start_transaction(root, 1);
12013 return PTR_ERR(trans);
12015 btrfs_init_path(&path);
12016 path.lowest_level = btrfs_header_level(eb);
12017 if (path.lowest_level)
12018 btrfs_node_key_to_cpu(eb, &key, 0);
12020 btrfs_item_key_to_cpu(eb, &key, 0);
12022 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12023 btrfs_commit_transaction(trans, root);
12024 btrfs_release_path(&path);
12028 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12030 struct btrfs_path path;
12031 struct btrfs_trans_handle *trans;
12032 struct btrfs_key key;
12035 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12036 bad->key.type, bad->key.offset);
12037 key.objectid = bad->root_id;
12038 key.type = BTRFS_ROOT_ITEM_KEY;
12039 key.offset = (u64)-1;
12041 root = btrfs_read_fs_root(root->fs_info, &key);
12042 if (IS_ERR(root)) {
12043 fprintf(stderr, "Couldn't find owner root %llu\n",
12045 return PTR_ERR(root);
12048 trans = btrfs_start_transaction(root, 1);
12050 return PTR_ERR(trans);
12052 btrfs_init_path(&path);
12053 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12059 ret = btrfs_del_item(trans, root, &path);
12061 btrfs_commit_transaction(trans, root);
12062 btrfs_release_path(&path);
12066 static int zero_log_tree(struct btrfs_root *root)
12068 struct btrfs_trans_handle *trans;
12071 trans = btrfs_start_transaction(root, 1);
12072 if (IS_ERR(trans)) {
12073 ret = PTR_ERR(trans);
12076 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12077 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12078 ret = btrfs_commit_transaction(trans, root);
12082 static int populate_csum(struct btrfs_trans_handle *trans,
12083 struct btrfs_root *csum_root, char *buf, u64 start,
12090 while (offset < len) {
12091 sectorsize = csum_root->sectorsize;
12092 ret = read_extent_data(csum_root, buf, start + offset,
12096 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12097 start + offset, buf, sectorsize);
12100 offset += sectorsize;
12105 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12106 struct btrfs_root *csum_root,
12107 struct btrfs_root *cur_root)
12109 struct btrfs_path path;
12110 struct btrfs_key key;
12111 struct extent_buffer *node;
12112 struct btrfs_file_extent_item *fi;
12119 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12123 btrfs_init_path(&path);
12127 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12130 /* Iterate all regular file extents and fill its csum */
12132 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12134 if (key.type != BTRFS_EXTENT_DATA_KEY)
12136 node = path.nodes[0];
12137 slot = path.slots[0];
12138 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12139 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12141 start = btrfs_file_extent_disk_bytenr(node, fi);
12142 len = btrfs_file_extent_disk_num_bytes(node, fi);
12144 ret = populate_csum(trans, csum_root, buf, start, len);
12145 if (ret == -EEXIST)
12151 * TODO: if next leaf is corrupted, jump to nearest next valid
12154 ret = btrfs_next_item(cur_root, &path);
12164 btrfs_release_path(&path);
12169 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12170 struct btrfs_root *csum_root)
12172 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12173 struct btrfs_path path;
12174 struct btrfs_root *tree_root = fs_info->tree_root;
12175 struct btrfs_root *cur_root;
12176 struct extent_buffer *node;
12177 struct btrfs_key key;
12181 btrfs_init_path(&path);
12182 key.objectid = BTRFS_FS_TREE_OBJECTID;
12184 key.type = BTRFS_ROOT_ITEM_KEY;
12185 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12194 node = path.nodes[0];
12195 slot = path.slots[0];
12196 btrfs_item_key_to_cpu(node, &key, slot);
12197 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12199 if (key.type != BTRFS_ROOT_ITEM_KEY)
12201 if (!is_fstree(key.objectid))
12203 key.offset = (u64)-1;
12205 cur_root = btrfs_read_fs_root(fs_info, &key);
12206 if (IS_ERR(cur_root) || !cur_root) {
12207 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12211 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12216 ret = btrfs_next_item(tree_root, &path);
12226 btrfs_release_path(&path);
12230 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12231 struct btrfs_root *csum_root)
12233 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12234 struct btrfs_path path;
12235 struct btrfs_extent_item *ei;
12236 struct extent_buffer *leaf;
12238 struct btrfs_key key;
12241 btrfs_init_path(&path);
12243 key.type = BTRFS_EXTENT_ITEM_KEY;
12245 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12247 btrfs_release_path(&path);
12251 buf = malloc(csum_root->sectorsize);
12253 btrfs_release_path(&path);
12258 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12259 ret = btrfs_next_leaf(extent_root, &path);
12267 leaf = path.nodes[0];
12269 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12270 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12275 ei = btrfs_item_ptr(leaf, path.slots[0],
12276 struct btrfs_extent_item);
12277 if (!(btrfs_extent_flags(leaf, ei) &
12278 BTRFS_EXTENT_FLAG_DATA)) {
12283 ret = populate_csum(trans, csum_root, buf, key.objectid,
12290 btrfs_release_path(&path);
12296 * Recalculate the csum and put it into the csum tree.
12298 * Extent tree init will wipe out all the extent info, so in that case, we
12299 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12300 * will use fs/subvol trees to init the csum tree.
12302 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12303 struct btrfs_root *csum_root,
12304 int search_fs_tree)
12306 if (search_fs_tree)
12307 return fill_csum_tree_from_fs(trans, csum_root);
12309 return fill_csum_tree_from_extent(trans, csum_root);
12312 static void free_roots_info_cache(void)
12314 if (!roots_info_cache)
12317 while (!cache_tree_empty(roots_info_cache)) {
12318 struct cache_extent *entry;
12319 struct root_item_info *rii;
12321 entry = first_cache_extent(roots_info_cache);
12324 remove_cache_extent(roots_info_cache, entry);
12325 rii = container_of(entry, struct root_item_info, cache_extent);
12329 free(roots_info_cache);
12330 roots_info_cache = NULL;
12333 static int build_roots_info_cache(struct btrfs_fs_info *info)
12336 struct btrfs_key key;
12337 struct extent_buffer *leaf;
12338 struct btrfs_path path;
12340 if (!roots_info_cache) {
12341 roots_info_cache = malloc(sizeof(*roots_info_cache));
12342 if (!roots_info_cache)
12344 cache_tree_init(roots_info_cache);
12347 btrfs_init_path(&path);
12349 key.type = BTRFS_EXTENT_ITEM_KEY;
12351 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12354 leaf = path.nodes[0];
12357 struct btrfs_key found_key;
12358 struct btrfs_extent_item *ei;
12359 struct btrfs_extent_inline_ref *iref;
12360 int slot = path.slots[0];
12365 struct cache_extent *entry;
12366 struct root_item_info *rii;
12368 if (slot >= btrfs_header_nritems(leaf)) {
12369 ret = btrfs_next_leaf(info->extent_root, &path);
12376 leaf = path.nodes[0];
12377 slot = path.slots[0];
12380 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12382 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12383 found_key.type != BTRFS_METADATA_ITEM_KEY)
12386 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12387 flags = btrfs_extent_flags(leaf, ei);
12389 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12390 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12393 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12394 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12395 level = found_key.offset;
12397 struct btrfs_tree_block_info *binfo;
12399 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12400 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12401 level = btrfs_tree_block_level(leaf, binfo);
12405 * For a root extent, it must be of the following type and the
12406 * first (and only one) iref in the item.
12408 type = btrfs_extent_inline_ref_type(leaf, iref);
12409 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12412 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12413 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12415 rii = malloc(sizeof(struct root_item_info));
12420 rii->cache_extent.start = root_id;
12421 rii->cache_extent.size = 1;
12422 rii->level = (u8)-1;
12423 entry = &rii->cache_extent;
12424 ret = insert_cache_extent(roots_info_cache, entry);
12427 rii = container_of(entry, struct root_item_info,
12431 ASSERT(rii->cache_extent.start == root_id);
12432 ASSERT(rii->cache_extent.size == 1);
12434 if (level > rii->level || rii->level == (u8)-1) {
12435 rii->level = level;
12436 rii->bytenr = found_key.objectid;
12437 rii->gen = btrfs_extent_generation(leaf, ei);
12438 rii->node_count = 1;
12439 } else if (level == rii->level) {
12447 btrfs_release_path(&path);
12452 static int maybe_repair_root_item(struct btrfs_path *path,
12453 const struct btrfs_key *root_key,
12454 const int read_only_mode)
12456 const u64 root_id = root_key->objectid;
12457 struct cache_extent *entry;
12458 struct root_item_info *rii;
12459 struct btrfs_root_item ri;
12460 unsigned long offset;
12462 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12465 "Error: could not find extent items for root %llu\n",
12466 root_key->objectid);
12470 rii = container_of(entry, struct root_item_info, cache_extent);
12471 ASSERT(rii->cache_extent.start == root_id);
12472 ASSERT(rii->cache_extent.size == 1);
12474 if (rii->node_count != 1) {
12476 "Error: could not find btree root extent for root %llu\n",
12481 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12482 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12484 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12485 btrfs_root_level(&ri) != rii->level ||
12486 btrfs_root_generation(&ri) != rii->gen) {
12489 * If we're in repair mode but our caller told us to not update
12490 * the root item, i.e. just check if it needs to be updated, don't
12491 * print this message, since the caller will call us again shortly
12492 * for the same root item without read only mode (the caller will
12493 * open a transaction first).
12495 if (!(read_only_mode && repair))
12497 "%sroot item for root %llu,"
12498 " current bytenr %llu, current gen %llu, current level %u,"
12499 " new bytenr %llu, new gen %llu, new level %u\n",
12500 (read_only_mode ? "" : "fixing "),
12502 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12503 btrfs_root_level(&ri),
12504 rii->bytenr, rii->gen, rii->level);
12506 if (btrfs_root_generation(&ri) > rii->gen) {
12508 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12509 root_id, btrfs_root_generation(&ri), rii->gen);
12513 if (!read_only_mode) {
12514 btrfs_set_root_bytenr(&ri, rii->bytenr);
12515 btrfs_set_root_level(&ri, rii->level);
12516 btrfs_set_root_generation(&ri, rii->gen);
12517 write_extent_buffer(path->nodes[0], &ri,
12518 offset, sizeof(ri));
12528 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12529 * caused read-only snapshots to be corrupted if they were created at a moment
12530 * when the source subvolume/snapshot had orphan items. The issue was that the
12531 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12532 * node instead of the post orphan cleanup root node.
12533 * So this function, and its callees, just detects and fixes those cases. Even
12534 * though the regression was for read-only snapshots, this function applies to
12535 * any snapshot/subvolume root.
12536 * This must be run before any other repair code - not doing it so, makes other
12537 * repair code delete or modify backrefs in the extent tree for example, which
12538 * will result in an inconsistent fs after repairing the root items.
12540 static int repair_root_items(struct btrfs_fs_info *info)
12542 struct btrfs_path path;
12543 struct btrfs_key key;
12544 struct extent_buffer *leaf;
12545 struct btrfs_trans_handle *trans = NULL;
12548 int need_trans = 0;
12550 btrfs_init_path(&path);
12552 ret = build_roots_info_cache(info);
12556 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12557 key.type = BTRFS_ROOT_ITEM_KEY;
12562 * Avoid opening and committing transactions if a leaf doesn't have
12563 * any root items that need to be fixed, so that we avoid rotating
12564 * backup roots unnecessarily.
12567 trans = btrfs_start_transaction(info->tree_root, 1);
12568 if (IS_ERR(trans)) {
12569 ret = PTR_ERR(trans);
12574 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12578 leaf = path.nodes[0];
12581 struct btrfs_key found_key;
12583 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12584 int no_more_keys = find_next_key(&path, &key);
12586 btrfs_release_path(&path);
12588 ret = btrfs_commit_transaction(trans,
12600 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12602 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12604 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12607 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12611 if (!trans && repair) {
12614 btrfs_release_path(&path);
12624 free_roots_info_cache();
12625 btrfs_release_path(&path);
12627 btrfs_commit_transaction(trans, info->tree_root);
12634 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12636 struct btrfs_trans_handle *trans;
12637 struct btrfs_block_group_cache *bg_cache;
12641 /* Clear all free space cache inodes and its extent data */
12643 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12646 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12649 current = bg_cache->key.objectid + bg_cache->key.offset;
12652 /* Don't forget to set cache_generation to -1 */
12653 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12654 if (IS_ERR(trans)) {
12655 error("failed to update super block cache generation");
12656 return PTR_ERR(trans);
12658 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12659 btrfs_commit_transaction(trans, fs_info->tree_root);
12664 const char * const cmd_check_usage[] = {
12665 "btrfs check [options] <device>",
12666 "Check structural integrity of a filesystem (unmounted).",
12667 "Check structural integrity of an unmounted filesystem. Verify internal",
12668 "trees' consistency and item connectivity. In the repair mode try to",
12669 "fix the problems found. ",
12670 "WARNING: the repair mode is considered dangerous",
12672 "-s|--super <superblock> use this superblock copy",
12673 "-b|--backup use the first valid backup root copy",
12674 "--repair try to repair the filesystem",
12675 "--readonly run in read-only mode (default)",
12676 "--init-csum-tree create a new CRC tree",
12677 "--init-extent-tree create a new extent tree",
12678 "--mode <MODE> allows choice of memory/IO trade-offs",
12679 " where MODE is one of:",
12680 " original - read inodes and extents to memory (requires",
12681 " more memory, does less IO)",
12682 " lowmem - try to use less memory but read blocks again",
12684 "--check-data-csum verify checksums of data blocks",
12685 "-Q|--qgroup-report print a report on qgroup consistency",
12686 "-E|--subvol-extents <subvolid>",
12687 " print subvolume extents and sharing state",
12688 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12689 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12690 "-p|--progress indicate progress",
12691 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12695 int cmd_check(int argc, char **argv)
12697 struct cache_tree root_cache;
12698 struct btrfs_root *root;
12699 struct btrfs_fs_info *info;
12702 u64 tree_root_bytenr = 0;
12703 u64 chunk_root_bytenr = 0;
12704 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12708 int init_csum_tree = 0;
12710 int clear_space_cache = 0;
12711 int qgroup_report = 0;
12712 int qgroups_repaired = 0;
12713 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12717 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12718 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12719 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12720 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12721 static const struct option long_options[] = {
12722 { "super", required_argument, NULL, 's' },
12723 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12724 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12725 { "init-csum-tree", no_argument, NULL,
12726 GETOPT_VAL_INIT_CSUM },
12727 { "init-extent-tree", no_argument, NULL,
12728 GETOPT_VAL_INIT_EXTENT },
12729 { "check-data-csum", no_argument, NULL,
12730 GETOPT_VAL_CHECK_CSUM },
12731 { "backup", no_argument, NULL, 'b' },
12732 { "subvol-extents", required_argument, NULL, 'E' },
12733 { "qgroup-report", no_argument, NULL, 'Q' },
12734 { "tree-root", required_argument, NULL, 'r' },
12735 { "chunk-root", required_argument, NULL,
12736 GETOPT_VAL_CHUNK_TREE },
12737 { "progress", no_argument, NULL, 'p' },
12738 { "mode", required_argument, NULL,
12740 { "clear-space-cache", required_argument, NULL,
12741 GETOPT_VAL_CLEAR_SPACE_CACHE},
12742 { NULL, 0, NULL, 0}
12745 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12749 case 'a': /* ignored */ break;
12751 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12754 num = arg_strtou64(optarg);
12755 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12757 "super mirror should be less than %d",
12758 BTRFS_SUPER_MIRROR_MAX);
12761 bytenr = btrfs_sb_offset(((int)num));
12762 printf("using SB copy %llu, bytenr %llu\n", num,
12763 (unsigned long long)bytenr);
12769 subvolid = arg_strtou64(optarg);
12772 tree_root_bytenr = arg_strtou64(optarg);
12774 case GETOPT_VAL_CHUNK_TREE:
12775 chunk_root_bytenr = arg_strtou64(optarg);
12778 ctx.progress_enabled = true;
12782 usage(cmd_check_usage);
12783 case GETOPT_VAL_REPAIR:
12784 printf("enabling repair mode\n");
12786 ctree_flags |= OPEN_CTREE_WRITES;
12788 case GETOPT_VAL_READONLY:
12791 case GETOPT_VAL_INIT_CSUM:
12792 printf("Creating a new CRC tree\n");
12793 init_csum_tree = 1;
12795 ctree_flags |= OPEN_CTREE_WRITES;
12797 case GETOPT_VAL_INIT_EXTENT:
12798 init_extent_tree = 1;
12799 ctree_flags |= (OPEN_CTREE_WRITES |
12800 OPEN_CTREE_NO_BLOCK_GROUPS);
12803 case GETOPT_VAL_CHECK_CSUM:
12804 check_data_csum = 1;
12806 case GETOPT_VAL_MODE:
12807 check_mode = parse_check_mode(optarg);
12808 if (check_mode == CHECK_MODE_UNKNOWN) {
12809 error("unknown mode: %s", optarg);
12813 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12814 if (strcmp(optarg, "v1") == 0) {
12815 clear_space_cache = 1;
12816 } else if (strcmp(optarg, "v2") == 0) {
12817 clear_space_cache = 2;
12818 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12821 "invalid argument to --clear-space-cache, must be v1 or v2");
12824 ctree_flags |= OPEN_CTREE_WRITES;
12829 if (check_argc_exact(argc - optind, 1))
12830 usage(cmd_check_usage);
12832 if (ctx.progress_enabled) {
12833 ctx.tp = TASK_NOTHING;
12834 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12837 /* This check is the only reason for --readonly to exist */
12838 if (readonly && repair) {
12839 error("repair options are not compatible with --readonly");
12844 * Not supported yet
12846 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12847 error("low memory mode doesn't support repair yet");
12852 cache_tree_init(&root_cache);
12854 if((ret = check_mounted(argv[optind])) < 0) {
12855 error("could not check mount status: %s", strerror(-ret));
12859 error("%s is currently mounted, aborting", argv[optind]);
12865 /* only allow partial opening under repair mode */
12867 ctree_flags |= OPEN_CTREE_PARTIAL;
12869 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12870 chunk_root_bytenr, ctree_flags);
12872 error("cannot open file system");
12878 global_info = info;
12879 root = info->fs_root;
12880 if (clear_space_cache == 1) {
12881 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12883 "free space cache v2 detected, use --clear-space-cache v2");
12887 printf("Clearing free space cache\n");
12888 ret = clear_free_space_cache(info);
12890 error("failed to clear free space cache");
12893 printf("Free space cache cleared\n");
12896 } else if (clear_space_cache == 2) {
12897 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12898 printf("no free space cache v2 to clear\n");
12902 printf("Clear free space cache v2\n");
12903 ret = btrfs_clear_free_space_tree(info);
12905 error("failed to clear free space cache v2: %d", ret);
12908 printf("free space cache v2 cleared\n");
12914 * repair mode will force us to commit transaction which
12915 * will make us fail to load log tree when mounting.
12917 if (repair && btrfs_super_log_root(info->super_copy)) {
12918 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12924 ret = zero_log_tree(root);
12927 error("failed to zero log tree: %d", ret);
12932 uuid_unparse(info->super_copy->fsid, uuidbuf);
12933 if (qgroup_report) {
12934 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12936 ret = qgroup_verify_all(info);
12943 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12944 subvolid, argv[optind], uuidbuf);
12945 ret = print_extent_state(info, subvolid);
12949 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12951 if (!extent_buffer_uptodate(info->tree_root->node) ||
12952 !extent_buffer_uptodate(info->dev_root->node) ||
12953 !extent_buffer_uptodate(info->chunk_root->node)) {
12954 error("critical roots corrupted, unable to check the filesystem");
12960 if (init_extent_tree || init_csum_tree) {
12961 struct btrfs_trans_handle *trans;
12963 trans = btrfs_start_transaction(info->extent_root, 0);
12964 if (IS_ERR(trans)) {
12965 error("error starting transaction");
12966 ret = PTR_ERR(trans);
12971 if (init_extent_tree) {
12972 printf("Creating a new extent tree\n");
12973 ret = reinit_extent_tree(trans, info);
12979 if (init_csum_tree) {
12980 printf("Reinitialize checksum tree\n");
12981 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12983 error("checksum tree initialization failed: %d",
12990 ret = fill_csum_tree(trans, info->csum_root,
12994 error("checksum tree refilling failed: %d", ret);
12999 * Ok now we commit and run the normal fsck, which will add
13000 * extent entries for all of the items it finds.
13002 ret = btrfs_commit_transaction(trans, info->extent_root);
13007 if (!extent_buffer_uptodate(info->extent_root->node)) {
13008 error("critical: extent_root, unable to check the filesystem");
13013 if (!extent_buffer_uptodate(info->csum_root->node)) {
13014 error("critical: csum_root, unable to check the filesystem");
13020 if (!ctx.progress_enabled)
13021 fprintf(stderr, "checking extents\n");
13022 if (check_mode == CHECK_MODE_LOWMEM)
13023 ret = check_chunks_and_extents_v2(root);
13025 ret = check_chunks_and_extents(root);
13029 "errors found in extent allocation tree or chunk allocation");
13031 ret = repair_root_items(info);
13034 error("failed to repair root items: %s", strerror(-ret));
13038 fprintf(stderr, "Fixed %d roots.\n", ret);
13040 } else if (ret > 0) {
13042 "Found %d roots with an outdated root item.\n",
13045 "Please run a filesystem check with the option --repair to fix them.\n");
13051 if (!ctx.progress_enabled) {
13052 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13053 fprintf(stderr, "checking free space tree\n");
13055 fprintf(stderr, "checking free space cache\n");
13057 ret = check_space_cache(root);
13060 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13061 error("errors found in free space tree");
13063 error("errors found in free space cache");
13068 * We used to have to have these hole extents in between our real
13069 * extents so if we don't have this flag set we need to make sure there
13070 * are no gaps in the file extents for inodes, otherwise we can just
13071 * ignore it when this happens.
13073 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13074 if (!ctx.progress_enabled)
13075 fprintf(stderr, "checking fs roots\n");
13076 if (check_mode == CHECK_MODE_LOWMEM)
13077 ret = check_fs_roots_v2(root->fs_info);
13079 ret = check_fs_roots(root, &root_cache);
13082 error("errors found in fs roots");
13086 fprintf(stderr, "checking csums\n");
13087 ret = check_csums(root);
13090 error("errors found in csum tree");
13094 fprintf(stderr, "checking root refs\n");
13095 /* For low memory mode, check_fs_roots_v2 handles root refs */
13096 if (check_mode != CHECK_MODE_LOWMEM) {
13097 ret = check_root_refs(root, &root_cache);
13100 error("errors found in root refs");
13105 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13106 struct extent_buffer *eb;
13108 eb = list_first_entry(&root->fs_info->recow_ebs,
13109 struct extent_buffer, recow);
13110 list_del_init(&eb->recow);
13111 ret = recow_extent_buffer(root, eb);
13114 error("fails to fix transid errors");
13119 while (!list_empty(&delete_items)) {
13120 struct bad_item *bad;
13122 bad = list_first_entry(&delete_items, struct bad_item, list);
13123 list_del_init(&bad->list);
13125 ret = delete_bad_item(root, bad);
13131 if (info->quota_enabled) {
13132 fprintf(stderr, "checking quota groups\n");
13133 ret = qgroup_verify_all(info);
13136 error("failed to check quota groups");
13140 ret = repair_qgroups(info, &qgroups_repaired);
13143 error("failed to repair quota groups");
13149 if (!list_empty(&root->fs_info->recow_ebs)) {
13150 error("transid errors in file system");
13155 if (found_old_backref) { /*
13156 * there was a disk format change when mixed
13157 * backref was in testing tree. The old format
13158 * existed about one week.
13160 printf("\n * Found old mixed backref format. "
13161 "The old format is not supported! *"
13162 "\n * Please mount the FS in readonly mode, "
13163 "backup data and re-format the FS. *\n\n");
13166 printf("found %llu bytes used, ",
13167 (unsigned long long)bytes_used);
13169 printf("error(s) found\n");
13171 printf("no error found\n");
13172 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13173 printf("total tree bytes: %llu\n",
13174 (unsigned long long)total_btree_bytes);
13175 printf("total fs tree bytes: %llu\n",
13176 (unsigned long long)total_fs_tree_bytes);
13177 printf("total extent tree bytes: %llu\n",
13178 (unsigned long long)total_extent_tree_bytes);
13179 printf("btree space waste bytes: %llu\n",
13180 (unsigned long long)btree_space_waste);
13181 printf("file data blocks allocated: %llu\n referenced %llu\n",
13182 (unsigned long long)data_bytes_allocated,
13183 (unsigned long long)data_bytes_referenced);
13185 free_qgroup_counts();
13186 free_root_recs_tree(&root_cache);
13190 if (ctx.progress_enabled)
13191 task_deinit(ctx.info);