2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1534 fprintf(stderr, "invalid location in dir item %u\n",
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1683 if (key.offset > start)
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1698 btrfs_release_path(&path);
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1833 BUG_ON(IS_ERR(active_node->current));
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1872 * Returns >0 Found error, not fatal, should continue
1873 * Returns <0 Fatal error, must exit the whole check
1874 * Returns 0 No errors found
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877 struct node_refs *nrefs, int *level, int ext_ref)
1879 struct extent_buffer *cur = path->nodes[0];
1880 struct btrfs_key key;
1884 int root_level = btrfs_header_level(root->node);
1886 int ret = 0; /* Final return value */
1887 int err = 0; /* Positive error bitmap */
1889 cur_bytenr = cur->start;
1891 /* skip to first inode item or the first inode number change */
1892 nritems = btrfs_header_nritems(cur);
1893 for (i = 0; i < nritems; i++) {
1894 btrfs_item_key_to_cpu(cur, &key, i);
1896 first_ino = key.objectid;
1897 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898 (first_ino && first_ino != key.objectid))
1902 path->slots[0] = nritems;
1908 err |= check_inode_item(root, path, ext_ref);
1910 if (err & LAST_ITEM)
1913 /* still have inode items in thie leaf */
1914 if (cur->start == cur_bytenr)
1918 * we have switched to another leaf, above nodes may
1919 * have changed, here walk down the path, if a node
1920 * or leaf is shared, check whether we can skip this
1923 for (i = root_level; i >= 0; i--) {
1924 if (path->nodes[i]->start == nrefs->bytenr[i])
1927 ret = update_nodes_refs(root,
1928 path->nodes[i]->start,
1933 if (!nrefs->need_check[i]) {
1939 for (i = 0; i < *level; i++) {
1940 free_extent_buffer(path->nodes[i]);
1941 path->nodes[i] = NULL;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1960 level = btrfs_header_level(node);
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2072 check = need_check(root, roots);
2074 nrefs->need_check[level] = check;
2076 nrefs->need_check[level] = 1;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134 ret = process_one_leaf(root, cur, wc);
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2157 ret = enter_shared_node(root, bytenr, refs,
2160 path->slots[*level]++;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2186 ret = check_child_node(cur, path->slots[*level], next);
2188 free_extent_buffer(next);
2193 if (btrfs_is_leaf(next))
2194 status = btrfs_check_leaf(root, NULL, next);
2196 status = btrfs_check_node(root, NULL, next);
2197 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198 free_extent_buffer(next);
2203 *level = *level - 1;
2204 free_extent_buffer(path->nodes[*level]);
2205 path->nodes[*level] = next;
2206 path->slots[*level] = 0;
2209 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214 unsigned int ext_ref);
2217 * Returns >0 Found error, should continue
2218 * Returns <0 Fatal error, must exit the whole check
2219 * Returns 0 No errors found
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222 int *level, struct node_refs *nrefs, int ext_ref)
2224 enum btrfs_tree_block_status status;
2227 struct extent_buffer *next;
2228 struct extent_buffer *cur;
2232 WARN_ON(*level < 0);
2233 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235 ret = update_nodes_refs(root, path->nodes[*level]->start,
2240 while (*level >= 0) {
2241 WARN_ON(*level < 0);
2242 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243 cur = path->nodes[*level];
2245 if (btrfs_header_level(cur) != *level)
2248 if (path->slots[*level] >= btrfs_header_nritems(cur))
2250 /* Don't forgot to check leaf/node validation */
2252 ret = btrfs_check_leaf(root, NULL, cur);
2253 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = process_one_leaf_v2(root, path, nrefs,
2261 ret = btrfs_check_node(root, NULL, cur);
2262 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2267 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269 blocksize = root->nodesize;
2271 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2274 if (!nrefs->need_check[*level - 1]) {
2275 path->slots[*level]++;
2279 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root, bytenr, blocksize,
2285 if (!extent_buffer_uptodate(next)) {
2286 struct btrfs_key node_key;
2288 btrfs_node_key_to_cpu(path->nodes[*level],
2290 path->slots[*level]);
2291 btrfs_add_corrupt_extent_record(root->fs_info,
2293 path->nodes[*level]->start,
2294 root->nodesize, *level);
2300 ret = check_child_node(cur, path->slots[*level], next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323 struct walk_control *wc, int *level)
2326 struct extent_buffer *leaf;
2328 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329 leaf = path->nodes[i];
2330 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = NULL;
2337 BUG_ON(*level > wc->active_node);
2338 if (*level == wc->active_node)
2339 leave_shared_node(root, wc, *level);
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2350 struct extent_buffer *leaf;
2352 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353 leaf = path->nodes[i];
2354 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2359 free_extent_buffer(path->nodes[*level]);
2360 path->nodes[*level] = NULL;
2367 static int check_root_dir(struct inode_record *rec)
2369 struct inode_backref *backref;
2372 if (!rec->found_inode_item || rec->errors)
2374 if (rec->nlink != 1 || rec->found_link != 0)
2376 if (list_empty(&rec->backrefs))
2378 backref = to_inode_backref(rec->backrefs.next);
2379 if (!backref->found_inode_ref)
2381 if (backref->index != 0 || backref->namelen != 2 ||
2382 memcmp(backref->name, "..", 2))
2384 if (backref->found_dir_index || backref->found_dir_item)
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root, struct btrfs_path *path,
2393 struct inode_record *rec)
2395 struct btrfs_inode_item *ei;
2396 struct btrfs_key key;
2399 key.objectid = rec->ino;
2400 key.type = BTRFS_INODE_ITEM_KEY;
2401 key.offset = (u64)-1;
2403 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2407 if (!path->slots[0]) {
2414 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415 if (key.objectid != rec->ino) {
2420 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421 struct btrfs_inode_item);
2422 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423 btrfs_mark_buffer_dirty(path->nodes[0]);
2424 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426 root->root_key.objectid);
2428 btrfs_release_path(path);
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433 struct btrfs_root *root,
2434 struct btrfs_path *path,
2435 struct inode_record *rec)
2439 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440 btrfs_release_path(path);
2442 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447 struct btrfs_root *root,
2448 struct btrfs_path *path,
2449 struct inode_record *rec)
2451 struct btrfs_inode_item *ei;
2452 struct btrfs_key key;
2455 key.objectid = rec->ino;
2456 key.type = BTRFS_INODE_ITEM_KEY;
2459 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2466 /* Since ret == 0, no need to check anything */
2467 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468 struct btrfs_inode_item);
2469 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470 btrfs_mark_buffer_dirty(path->nodes[0]);
2471 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472 printf("reset nbytes for ino %llu root %llu\n",
2473 rec->ino, root->root_key.objectid);
2475 btrfs_release_path(path);
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480 struct cache_tree *inode_cache,
2481 struct inode_record *rec,
2482 struct inode_backref *backref)
2484 struct btrfs_path path;
2485 struct btrfs_trans_handle *trans;
2486 struct btrfs_dir_item *dir_item;
2487 struct extent_buffer *leaf;
2488 struct btrfs_key key;
2489 struct btrfs_disk_key disk_key;
2490 struct inode_record *dir_rec;
2491 unsigned long name_ptr;
2492 u32 data_size = sizeof(*dir_item) + backref->namelen;
2495 trans = btrfs_start_transaction(root, 1);
2497 return PTR_ERR(trans);
2499 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500 (unsigned long long)rec->ino);
2502 btrfs_init_path(&path);
2503 key.objectid = backref->dir;
2504 key.type = BTRFS_DIR_INDEX_KEY;
2505 key.offset = backref->index;
2506 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2509 leaf = path.nodes[0];
2510 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2512 disk_key.objectid = cpu_to_le64(rec->ino);
2513 disk_key.type = BTRFS_INODE_ITEM_KEY;
2514 disk_key.offset = 0;
2516 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518 btrfs_set_dir_data_len(leaf, dir_item, 0);
2519 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520 name_ptr = (unsigned long)(dir_item + 1);
2521 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522 btrfs_mark_buffer_dirty(leaf);
2523 btrfs_release_path(&path);
2524 btrfs_commit_transaction(trans, root);
2526 backref->found_dir_index = 1;
2527 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528 BUG_ON(IS_ERR(dir_rec));
2531 dir_rec->found_size += backref->namelen;
2532 if (dir_rec->found_size == dir_rec->isize &&
2533 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535 if (dir_rec->found_size != dir_rec->isize)
2536 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2541 static int delete_dir_index(struct btrfs_root *root,
2542 struct inode_backref *backref)
2544 struct btrfs_trans_handle *trans;
2545 struct btrfs_dir_item *di;
2546 struct btrfs_path path;
2549 trans = btrfs_start_transaction(root, 1);
2551 return PTR_ERR(trans);
2553 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554 (unsigned long long)backref->dir,
2555 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556 (unsigned long long)root->objectid);
2558 btrfs_init_path(&path);
2559 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560 backref->name, backref->namelen,
2561 backref->index, -1);
2564 btrfs_release_path(&path);
2565 btrfs_commit_transaction(trans, root);
2572 ret = btrfs_del_item(trans, root, &path);
2574 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2576 btrfs_release_path(&path);
2577 btrfs_commit_transaction(trans, root);
2581 static int create_inode_item(struct btrfs_root *root,
2582 struct inode_record *rec,
2585 struct btrfs_trans_handle *trans;
2586 struct btrfs_inode_item inode_item;
2587 time_t now = time(NULL);
2590 trans = btrfs_start_transaction(root, 1);
2591 if (IS_ERR(trans)) {
2592 ret = PTR_ERR(trans);
2596 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597 "be incomplete, please check permissions and content after "
2598 "the fsck completes.\n", (unsigned long long)root->objectid,
2599 (unsigned long long)rec->ino);
2601 memset(&inode_item, 0, sizeof(inode_item));
2602 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2604 btrfs_set_stack_inode_nlink(&inode_item, 1);
2606 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608 if (rec->found_dir_item) {
2609 if (rec->found_file_extent)
2610 fprintf(stderr, "root %llu inode %llu has both a dir "
2611 "item and extents, unsure if it is a dir or a "
2612 "regular file so setting it as a directory\n",
2613 (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2615 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617 } else if (!rec->found_dir_item) {
2618 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2621 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2630 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2632 btrfs_commit_transaction(trans, root);
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637 struct inode_record *rec,
2638 struct cache_tree *inode_cache,
2641 struct inode_backref *tmp, *backref;
2642 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2646 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647 if (!delete && rec->ino == root_dirid) {
2648 if (!rec->found_inode_item) {
2649 ret = create_inode_item(root, rec, 1);
2656 /* Index 0 for root dir's are special, don't mess with it */
2657 if (rec->ino == root_dirid && backref->index == 0)
2661 ((backref->found_dir_index && !backref->found_inode_ref) ||
2662 (backref->found_dir_index && backref->found_inode_ref &&
2663 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664 ret = delete_dir_index(root, backref);
2668 list_del(&backref->list);
2672 if (!delete && !backref->found_dir_index &&
2673 backref->found_dir_item && backref->found_inode_ref) {
2674 ret = add_missing_dir_index(root, inode_cache, rec,
2679 if (backref->found_dir_item &&
2680 backref->found_dir_index &&
2681 backref->found_dir_index) {
2682 if (!backref->errors &&
2683 backref->found_inode_ref) {
2684 list_del(&backref->list);
2690 if (!delete && (!backref->found_dir_index &&
2691 !backref->found_dir_item &&
2692 backref->found_inode_ref)) {
2693 struct btrfs_trans_handle *trans;
2694 struct btrfs_key location;
2696 ret = check_dir_conflict(root, backref->name,
2702 * let nlink fixing routine to handle it,
2703 * which can do it better.
2708 location.objectid = rec->ino;
2709 location.type = BTRFS_INODE_ITEM_KEY;
2710 location.offset = 0;
2712 trans = btrfs_start_transaction(root, 1);
2713 if (IS_ERR(trans)) {
2714 ret = PTR_ERR(trans);
2717 fprintf(stderr, "adding missing dir index/item pair "
2719 (unsigned long long)rec->ino);
2720 ret = btrfs_insert_dir_item(trans, root, backref->name,
2722 backref->dir, &location,
2723 imode_to_type(rec->imode),
2726 btrfs_commit_transaction(trans, root);
2730 if (!delete && (backref->found_inode_ref &&
2731 backref->found_dir_index &&
2732 backref->found_dir_item &&
2733 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734 !rec->found_inode_item)) {
2735 ret = create_inode_item(root, rec, 0);
2742 return ret ? ret : repaired;
2746 * To determine the file type for nlink/inode_item repair
2748 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749 * Return -ENOENT if file type is not found.
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2753 struct inode_backref *backref;
2755 /* For inode item recovered case */
2756 if (rec->found_inode_item) {
2757 *type = imode_to_type(rec->imode);
2761 list_for_each_entry(backref, &rec->backrefs, list) {
2762 if (backref->found_dir_index || backref->found_dir_item) {
2763 *type = backref->filetype;
2771 * To determine the file name for nlink repair
2773 * Return 0 if file name is found, set name and namelen.
2774 * Return -ENOENT if file name is not found.
2776 static int find_file_name(struct inode_record *rec,
2777 char *name, int *namelen)
2779 struct inode_backref *backref;
2781 list_for_each_entry(backref, &rec->backrefs, list) {
2782 if (backref->found_dir_index || backref->found_dir_item ||
2783 backref->found_inode_ref) {
2784 memcpy(name, backref->name, backref->namelen);
2785 *namelen = backref->namelen;
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794 struct btrfs_root *root,
2795 struct btrfs_path *path,
2796 struct inode_record *rec)
2798 struct inode_backref *backref;
2799 struct inode_backref *tmp;
2800 struct btrfs_key key;
2801 struct btrfs_inode_item *inode_item;
2804 /* We don't believe this either, reset it and iterate backref */
2805 rec->found_link = 0;
2807 /* Remove all backref including the valid ones */
2808 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810 backref->index, backref->name,
2811 backref->namelen, 0);
2815 /* remove invalid backref, so it won't be added back */
2816 if (!(backref->found_dir_index &&
2817 backref->found_dir_item &&
2818 backref->found_inode_ref)) {
2819 list_del(&backref->list);
2826 /* Set nlink to 0 */
2827 key.objectid = rec->ino;
2828 key.type = BTRFS_INODE_ITEM_KEY;
2830 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2837 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838 struct btrfs_inode_item);
2839 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840 btrfs_mark_buffer_dirty(path->nodes[0]);
2841 btrfs_release_path(path);
2844 * Add back valid inode_ref/dir_item/dir_index,
2845 * add_link() will handle the nlink inc, so new nlink must be correct
2847 list_for_each_entry(backref, &rec->backrefs, list) {
2848 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849 backref->name, backref->namelen,
2850 backref->filetype, &backref->index, 1);
2855 btrfs_release_path(path);
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860 struct btrfs_root *root,
2861 struct btrfs_path *path,
2864 struct btrfs_key key, found_key;
2867 btrfs_init_path(path);
2868 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2870 key.type = BTRFS_INODE_ITEM_KEY;
2871 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2873 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874 path->slots[0] - 1);
2875 *highest_ino = found_key.objectid;
2878 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2880 btrfs_release_path(path);
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885 struct btrfs_root *root,
2886 struct btrfs_path *path,
2887 struct inode_record *rec)
2889 char *dir_name = "lost+found";
2890 char namebuf[BTRFS_NAME_LEN] = {0};
2895 int name_recovered = 0;
2896 int type_recovered = 0;
2900 * Get file name and type first before these invalid inode ref
2901 * are deleted by remove_all_invalid_backref()
2903 name_recovered = !find_file_name(rec, namebuf, &namelen);
2904 type_recovered = !find_file_type(rec, &type);
2906 if (!name_recovered) {
2907 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908 rec->ino, rec->ino);
2909 namelen = count_digits(rec->ino);
2910 sprintf(namebuf, "%llu", rec->ino);
2913 if (!type_recovered) {
2914 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2916 type = BTRFS_FT_REG_FILE;
2920 ret = reset_nlink(trans, root, path, rec);
2923 "Failed to reset nlink for inode %llu: %s\n",
2924 rec->ino, strerror(-ret));
2928 if (rec->found_link == 0) {
2929 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2933 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2937 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938 dir_name, strerror(-ret));
2941 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942 namebuf, namelen, type, NULL, 1);
2944 * Add ".INO" suffix several times to handle case where
2945 * "FILENAME.INO" is already taken by another file.
2947 while (ret == -EEXIST) {
2949 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2951 if (namelen + count_digits(rec->ino) + 1 >
2956 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2958 namelen += count_digits(rec->ino) + 1;
2959 ret = btrfs_add_link(trans, root, rec->ino,
2960 lost_found_ino, namebuf,
2961 namelen, type, NULL, 1);
2965 "Failed to link the inode %llu to %s dir: %s\n",
2966 rec->ino, dir_name, strerror(-ret));
2970 * Just increase the found_link, don't actually add the
2971 * backref. This will make things easier and this inode
2972 * record will be freed after the repair is done.
2973 * So fsck will not report problem about this inode.
2976 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977 namelen, namebuf, dir_name);
2979 printf("Fixed the nlink of inode %llu\n", rec->ino);
2982 * Clear the flag anyway, or we will loop forever for the same inode
2983 * as it will not be removed from the bad inode list and the dead loop
2986 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987 btrfs_release_path(path);
2992 * Check if there is any normal(reg or prealloc) file extent for given
2994 * This is used to determine the file type when neither its dir_index/item or
2995 * inode_item exists.
2997 * This will *NOT* report error, if any error happens, just consider it does
2998 * not have any normal file extent.
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3002 struct btrfs_path path;
3003 struct btrfs_key key;
3004 struct btrfs_key found_key;
3005 struct btrfs_file_extent_item *fi;
3009 btrfs_init_path(&path);
3011 key.type = BTRFS_EXTENT_DATA_KEY;
3014 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3019 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020 ret = btrfs_next_leaf(root, &path);
3027 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3029 if (found_key.objectid != ino ||
3030 found_key.type != BTRFS_EXTENT_DATA_KEY)
3032 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033 struct btrfs_file_extent_item);
3034 type = btrfs_file_extent_type(path.nodes[0], fi);
3035 if (type != BTRFS_FILE_EXTENT_INLINE) {
3041 btrfs_release_path(&path);
3045 static u32 btrfs_type_to_imode(u8 type)
3047 static u32 imode_by_btrfs_type[] = {
3048 [BTRFS_FT_REG_FILE] = S_IFREG,
3049 [BTRFS_FT_DIR] = S_IFDIR,
3050 [BTRFS_FT_CHRDEV] = S_IFCHR,
3051 [BTRFS_FT_BLKDEV] = S_IFBLK,
3052 [BTRFS_FT_FIFO] = S_IFIFO,
3053 [BTRFS_FT_SOCK] = S_IFSOCK,
3054 [BTRFS_FT_SYMLINK] = S_IFLNK,
3057 return imode_by_btrfs_type[(type)];
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061 struct btrfs_root *root,
3062 struct btrfs_path *path,
3063 struct inode_record *rec)
3067 int type_recovered = 0;
3070 printf("Trying to rebuild inode:%llu\n", rec->ino);
3072 type_recovered = !find_file_type(rec, &filetype);
3075 * Try to determine inode type if type not found.
3077 * For found regular file extent, it must be FILE.
3078 * For found dir_item/index, it must be DIR.
3080 * For undetermined one, use FILE as fallback.
3083 * 1. If found backref(inode_index/item is already handled) to it,
3085 * Need new inode-inode ref structure to allow search for that.
3087 if (!type_recovered) {
3088 if (rec->found_file_extent &&
3089 find_normal_file_extent(root, rec->ino)) {
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else if (rec->found_dir_item) {
3094 filetype = BTRFS_FT_DIR;
3095 } else if (!list_empty(&rec->orphan_extents)) {
3097 filetype = BTRFS_FT_REG_FILE;
3099 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3102 filetype = BTRFS_FT_REG_FILE;
3106 ret = btrfs_new_inode(trans, root, rec->ino,
3107 mode | btrfs_type_to_imode(filetype));
3112 * Here inode rebuild is done, we only rebuild the inode item,
3113 * don't repair the nlink(like move to lost+found).
3114 * That is the job of nlink repair.
3116 * We just fill the record and return
3118 rec->found_dir_item = 1;
3119 rec->imode = mode | btrfs_type_to_imode(filetype);
3121 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122 /* Ensure the inode_nlinks repair function will be called */
3123 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129 struct btrfs_root *root,
3130 struct btrfs_path *path,
3131 struct inode_record *rec)
3133 struct orphan_data_extent *orphan;
3134 struct orphan_data_extent *tmp;
3137 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3139 * Check for conflicting file extents
3141 * Here we don't know whether the extents is compressed or not,
3142 * so we can only assume it not compressed nor data offset,
3143 * and use its disk_len as extent length.
3145 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146 orphan->offset, orphan->disk_len, 0);
3147 btrfs_release_path(path);
3152 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153 orphan->disk_bytenr, orphan->disk_len);
3154 ret = btrfs_free_extent(trans,
3155 root->fs_info->extent_root,
3156 orphan->disk_bytenr, orphan->disk_len,
3157 0, root->objectid, orphan->objectid,
3162 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163 orphan->offset, orphan->disk_bytenr,
3164 orphan->disk_len, orphan->disk_len);
3168 /* Update file size info */
3169 rec->found_size += orphan->disk_len;
3170 if (rec->found_size == rec->nbytes)
3171 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3173 /* Update the file extent hole info too */
3174 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3178 if (RB_EMPTY_ROOT(&rec->holes))
3179 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3181 list_del(&orphan->list);
3184 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190 struct btrfs_root *root,
3191 struct btrfs_path *path,
3192 struct inode_record *rec)
3194 struct rb_node *node;
3195 struct file_extent_hole *hole;
3199 node = rb_first(&rec->holes);
3203 hole = rb_entry(node, struct file_extent_hole, node);
3204 ret = btrfs_punch_hole(trans, root, rec->ino,
3205 hole->start, hole->len);
3208 ret = del_file_extent_hole(&rec->holes, hole->start,
3212 if (RB_EMPTY_ROOT(&rec->holes))
3213 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214 node = rb_first(&rec->holes);
3216 /* special case for a file losing all its file extent */
3218 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219 round_up(rec->isize, root->sectorsize));
3223 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224 rec->ino, root->objectid);
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3231 struct btrfs_trans_handle *trans;
3232 struct btrfs_path path;
3235 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236 I_ERR_NO_ORPHAN_ITEM |
3237 I_ERR_LINK_COUNT_WRONG |
3238 I_ERR_NO_INODE_ITEM |
3239 I_ERR_FILE_EXTENT_ORPHAN |
3240 I_ERR_FILE_EXTENT_DISCOUNT|
3241 I_ERR_FILE_NBYTES_WRONG)))
3245 * For nlink repair, it may create a dir and add link, so
3246 * 2 for parent(256)'s dir_index and dir_item
3247 * 2 for lost+found dir's inode_item and inode_ref
3248 * 1 for the new inode_ref of the file
3249 * 2 for lost+found dir's dir_index and dir_item for the file
3251 trans = btrfs_start_transaction(root, 7);
3253 return PTR_ERR(trans);
3255 btrfs_init_path(&path);
3256 if (rec->errors & I_ERR_NO_INODE_ITEM)
3257 ret = repair_inode_no_item(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263 ret = repair_inode_isize(trans, root, &path, rec);
3264 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267 ret = repair_inode_nlinks(trans, root, &path, rec);
3268 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269 ret = repair_inode_nbytes(trans, root, &path, rec);
3270 btrfs_commit_transaction(trans, root);
3271 btrfs_release_path(&path);
3275 static int check_inode_recs(struct btrfs_root *root,
3276 struct cache_tree *inode_cache)
3278 struct cache_extent *cache;
3279 struct ptr_node *node;
3280 struct inode_record *rec;
3281 struct inode_backref *backref;
3286 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3288 if (btrfs_root_refs(&root->root_item) == 0) {
3289 if (!cache_tree_empty(inode_cache))
3290 fprintf(stderr, "warning line %d\n", __LINE__);
3295 * We need to repair backrefs first because we could change some of the
3296 * errors in the inode recs.
3298 * We also need to go through and delete invalid backrefs first and then
3299 * add the correct ones second. We do this because we may get EEXIST
3300 * when adding back the correct index because we hadn't yet deleted the
3303 * For example, if we were missing a dir index then the directories
3304 * isize would be wrong, so if we fixed the isize to what we thought it
3305 * would be and then fixed the backref we'd still have a invalid fs, so
3306 * we need to add back the dir index and then check to see if the isize
3311 if (stage == 3 && !err)
3314 cache = search_cache_extent(inode_cache, 0);
3315 while (repair && cache) {
3316 node = container_of(cache, struct ptr_node, cache);
3318 cache = next_cache_extent(cache);
3320 /* Need to free everything up and rescan */
3322 remove_cache_extent(inode_cache, &node->cache);
3324 free_inode_rec(rec);
3328 if (list_empty(&rec->backrefs))
3331 ret = repair_inode_backrefs(root, rec, inode_cache,
3345 rec = get_inode_rec(inode_cache, root_dirid, 0);
3346 BUG_ON(IS_ERR(rec));
3348 ret = check_root_dir(rec);
3350 fprintf(stderr, "root %llu root dir %llu error\n",
3351 (unsigned long long)root->root_key.objectid,
3352 (unsigned long long)root_dirid);
3353 print_inode_error(root, rec);
3358 struct btrfs_trans_handle *trans;
3360 trans = btrfs_start_transaction(root, 1);
3361 if (IS_ERR(trans)) {
3362 err = PTR_ERR(trans);
3367 "root %llu missing its root dir, recreating\n",
3368 (unsigned long long)root->objectid);
3370 ret = btrfs_make_root_dir(trans, root, root_dirid);
3373 btrfs_commit_transaction(trans, root);
3377 fprintf(stderr, "root %llu root dir %llu not found\n",
3378 (unsigned long long)root->root_key.objectid,
3379 (unsigned long long)root_dirid);
3383 cache = search_cache_extent(inode_cache, 0);
3386 node = container_of(cache, struct ptr_node, cache);
3388 remove_cache_extent(inode_cache, &node->cache);
3390 if (rec->ino == root_dirid ||
3391 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392 free_inode_rec(rec);
3396 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397 ret = check_orphan_item(root, rec->ino);
3399 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400 if (can_free_inode_rec(rec)) {
3401 free_inode_rec(rec);
3406 if (!rec->found_inode_item)
3407 rec->errors |= I_ERR_NO_INODE_ITEM;
3408 if (rec->found_link != rec->nlink)
3409 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3411 ret = try_repair_inode(root, rec);
3412 if (ret == 0 && can_free_inode_rec(rec)) {
3413 free_inode_rec(rec);
3419 if (!(repair && ret == 0))
3421 print_inode_error(root, rec);
3422 list_for_each_entry(backref, &rec->backrefs, list) {
3423 if (!backref->found_dir_item)
3424 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425 if (!backref->found_dir_index)
3426 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427 if (!backref->found_inode_ref)
3428 backref->errors |= REF_ERR_NO_INODE_REF;
3429 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430 " namelen %u name %s filetype %d errors %x",
3431 (unsigned long long)backref->dir,
3432 (unsigned long long)backref->index,
3433 backref->namelen, backref->name,
3434 backref->filetype, backref->errors);
3435 print_ref_error(backref->errors);
3437 free_inode_rec(rec);
3439 return (error > 0) ? -1 : 0;
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3445 struct cache_extent *cache;
3446 struct root_record *rec = NULL;
3449 cache = lookup_cache_extent(root_cache, objectid, 1);
3451 rec = container_of(cache, struct root_record, cache);
3453 rec = calloc(1, sizeof(*rec));
3455 return ERR_PTR(-ENOMEM);
3456 rec->objectid = objectid;
3457 INIT_LIST_HEAD(&rec->backrefs);
3458 rec->cache.start = objectid;
3459 rec->cache.size = 1;
3461 ret = insert_cache_extent(root_cache, &rec->cache);
3463 return ERR_PTR(-EEXIST);
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469 u64 ref_root, u64 dir, u64 index,
3470 const char *name, int namelen)
3472 struct root_backref *backref;
3474 list_for_each_entry(backref, &rec->backrefs, list) {
3475 if (backref->ref_root != ref_root || backref->dir != dir ||
3476 backref->namelen != namelen)
3478 if (memcmp(name, backref->name, namelen))
3483 backref = calloc(1, sizeof(*backref) + namelen + 1);
3486 backref->ref_root = ref_root;
3488 backref->index = index;
3489 backref->namelen = namelen;
3490 memcpy(backref->name, name, namelen);
3491 backref->name[namelen] = '\0';
3492 list_add_tail(&backref->list, &rec->backrefs);
3496 static void free_root_record(struct cache_extent *cache)
3498 struct root_record *rec;
3499 struct root_backref *backref;
3501 rec = container_of(cache, struct root_record, cache);
3502 while (!list_empty(&rec->backrefs)) {
3503 backref = to_root_backref(rec->backrefs.next);
3504 list_del(&backref->list);
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3513 static int add_root_backref(struct cache_tree *root_cache,
3514 u64 root_id, u64 ref_root, u64 dir, u64 index,
3515 const char *name, int namelen,
3516 int item_type, int errors)
3518 struct root_record *rec;
3519 struct root_backref *backref;
3521 rec = get_root_rec(root_cache, root_id);
3522 BUG_ON(IS_ERR(rec));
3523 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3526 backref->errors |= errors;
3528 if (item_type != BTRFS_DIR_ITEM_KEY) {
3529 if (backref->found_dir_index || backref->found_back_ref ||
3530 backref->found_forward_ref) {
3531 if (backref->index != index)
3532 backref->errors |= REF_ERR_INDEX_UNMATCH;
3534 backref->index = index;
3538 if (item_type == BTRFS_DIR_ITEM_KEY) {
3539 if (backref->found_forward_ref)
3541 backref->found_dir_item = 1;
3542 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543 backref->found_dir_index = 1;
3544 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545 if (backref->found_forward_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_REF;
3547 else if (backref->found_dir_item)
3549 backref->found_forward_ref = 1;
3550 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551 if (backref->found_back_ref)
3552 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553 backref->found_back_ref = 1;
3558 if (backref->found_forward_ref && backref->found_dir_item)
3559 backref->reachable = 1;
3563 static int merge_root_recs(struct btrfs_root *root,
3564 struct cache_tree *src_cache,
3565 struct cache_tree *dst_cache)
3567 struct cache_extent *cache;
3568 struct ptr_node *node;
3569 struct inode_record *rec;
3570 struct inode_backref *backref;
3573 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574 free_inode_recs_tree(src_cache);
3579 cache = search_cache_extent(src_cache, 0);
3582 node = container_of(cache, struct ptr_node, cache);
3584 remove_cache_extent(src_cache, &node->cache);
3587 ret = is_child_root(root, root->objectid, rec->ino);
3593 list_for_each_entry(backref, &rec->backrefs, list) {
3594 BUG_ON(backref->found_inode_ref);
3595 if (backref->found_dir_item)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_ITEM_KEY,
3601 if (backref->found_dir_index)
3602 add_root_backref(dst_cache, rec->ino,
3603 root->root_key.objectid, backref->dir,
3604 backref->index, backref->name,
3605 backref->namelen, BTRFS_DIR_INDEX_KEY,
3609 free_inode_rec(rec);
3616 static int check_root_refs(struct btrfs_root *root,
3617 struct cache_tree *root_cache)
3619 struct root_record *rec;
3620 struct root_record *ref_root;
3621 struct root_backref *backref;
3622 struct cache_extent *cache;
3628 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629 BUG_ON(IS_ERR(rec));
3632 /* fixme: this can not detect circular references */
3635 cache = search_cache_extent(root_cache, 0);
3639 rec = container_of(cache, struct root_record, cache);
3640 cache = next_cache_extent(cache);
3642 if (rec->found_ref == 0)
3645 list_for_each_entry(backref, &rec->backrefs, list) {
3646 if (!backref->reachable)
3649 ref_root = get_root_rec(root_cache,
3651 BUG_ON(IS_ERR(ref_root));
3652 if (ref_root->found_ref > 0)
3655 backref->reachable = 0;
3657 if (rec->found_ref == 0)
3663 cache = search_cache_extent(root_cache, 0);
3667 rec = container_of(cache, struct root_record, cache);
3668 cache = next_cache_extent(cache);
3670 if (rec->found_ref == 0 &&
3671 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673 ret = check_orphan_item(root->fs_info->tree_root,
3679 * If we don't have a root item then we likely just have
3680 * a dir item in a snapshot for this root but no actual
3681 * ref key or anything so it's meaningless.
3683 if (!rec->found_root_item)
3686 fprintf(stderr, "fs tree %llu not referenced\n",
3687 (unsigned long long)rec->objectid);
3691 if (rec->found_ref > 0 && !rec->found_root_item)
3693 list_for_each_entry(backref, &rec->backrefs, list) {
3694 if (!backref->found_dir_item)
3695 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696 if (!backref->found_dir_index)
3697 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698 if (!backref->found_back_ref)
3699 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700 if (!backref->found_forward_ref)
3701 backref->errors |= REF_ERR_NO_ROOT_REF;
3702 if (backref->reachable && backref->errors)
3709 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710 (unsigned long long)rec->objectid, rec->found_ref,
3711 rec->found_root_item ? "" : "not found");
3713 list_for_each_entry(backref, &rec->backrefs, list) {
3714 if (!backref->reachable)
3716 if (!backref->errors && rec->found_root_item)
3718 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719 " index %llu namelen %u name %s errors %x\n",
3720 (unsigned long long)backref->ref_root,
3721 (unsigned long long)backref->dir,
3722 (unsigned long long)backref->index,
3723 backref->namelen, backref->name,
3725 print_ref_error(backref->errors);
3728 return errors > 0 ? 1 : 0;
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732 struct btrfs_key *key,
3733 struct cache_tree *root_cache)
3739 struct btrfs_root_ref *ref;
3740 char namebuf[BTRFS_NAME_LEN];
3743 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3745 dirid = btrfs_root_ref_dirid(eb, ref);
3746 index = btrfs_root_ref_sequence(eb, ref);
3747 name_len = btrfs_root_ref_name_len(eb, ref);
3749 if (name_len <= BTRFS_NAME_LEN) {
3753 len = BTRFS_NAME_LEN;
3754 error = REF_ERR_NAME_TOO_LONG;
3756 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3758 if (key->type == BTRFS_ROOT_REF_KEY) {
3759 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760 index, namebuf, len, key->type, error);
3762 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763 index, namebuf, len, key->type, error);
3768 static void free_corrupt_block(struct cache_extent *cache)
3770 struct btrfs_corrupt_block *corrupt;
3772 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3779 * Repair the btree of the given root.
3781 * The fix is to remove the node key in corrupt_blocks cache_tree.
3782 * and rebalance the tree.
3783 * After the fix, the btree should be writeable.
3785 static int repair_btree(struct btrfs_root *root,
3786 struct cache_tree *corrupt_blocks)
3788 struct btrfs_trans_handle *trans;
3789 struct btrfs_path path;
3790 struct btrfs_corrupt_block *corrupt;
3791 struct cache_extent *cache;
3792 struct btrfs_key key;
3797 if (cache_tree_empty(corrupt_blocks))
3800 trans = btrfs_start_transaction(root, 1);
3801 if (IS_ERR(trans)) {
3802 ret = PTR_ERR(trans);
3803 fprintf(stderr, "Error starting transaction: %s\n",
3807 btrfs_init_path(&path);
3808 cache = first_cache_extent(corrupt_blocks);
3810 corrupt = container_of(cache, struct btrfs_corrupt_block,
3812 level = corrupt->level;
3813 path.lowest_level = level;
3814 key.objectid = corrupt->key.objectid;
3815 key.type = corrupt->key.type;
3816 key.offset = corrupt->key.offset;
3819 * Here we don't want to do any tree balance, since it may
3820 * cause a balance with corrupted brother leaf/node,
3821 * so ins_len set to 0 here.
3822 * Balance will be done after all corrupt node/leaf is deleted.
3824 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3827 offset = btrfs_node_blockptr(path.nodes[level],
3830 /* Remove the ptr */
3831 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3835 * Remove the corresponding extent
3836 * return value is not concerned.
3838 btrfs_release_path(&path);
3839 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840 0, root->root_key.objectid,
3842 cache = next_cache_extent(cache);
3845 /* Balance the btree using btrfs_search_slot() */
3846 cache = first_cache_extent(corrupt_blocks);
3848 corrupt = container_of(cache, struct btrfs_corrupt_block,
3850 memcpy(&key, &corrupt->key, sizeof(key));
3851 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3854 /* return will always >0 since it won't find the item */
3856 btrfs_release_path(&path);
3857 cache = next_cache_extent(cache);
3860 btrfs_commit_transaction(trans, root);
3861 btrfs_release_path(&path);
3865 static int check_fs_root(struct btrfs_root *root,
3866 struct cache_tree *root_cache,
3867 struct walk_control *wc)
3873 struct btrfs_path path;
3874 struct shared_node root_node;
3875 struct root_record *rec;
3876 struct btrfs_root_item *root_item = &root->root_item;
3877 struct cache_tree corrupt_blocks;
3878 struct orphan_data_extent *orphan;
3879 struct orphan_data_extent *tmp;
3880 enum btrfs_tree_block_status status;
3881 struct node_refs nrefs;
3884 * Reuse the corrupt_block cache tree to record corrupted tree block
3886 * Unlike the usage in extent tree check, here we do it in a per
3887 * fs/subvol tree base.
3889 cache_tree_init(&corrupt_blocks);
3890 root->fs_info->corrupt_blocks = &corrupt_blocks;
3892 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893 rec = get_root_rec(root_cache, root->root_key.objectid);
3894 BUG_ON(IS_ERR(rec));
3895 if (btrfs_root_refs(root_item) > 0)
3896 rec->found_root_item = 1;
3899 btrfs_init_path(&path);
3900 memset(&root_node, 0, sizeof(root_node));
3901 cache_tree_init(&root_node.root_cache);
3902 cache_tree_init(&root_node.inode_cache);
3903 memset(&nrefs, 0, sizeof(nrefs));
3905 /* Move the orphan extent record to corresponding inode_record */
3906 list_for_each_entry_safe(orphan, tmp,
3907 &root->orphan_data_extents, list) {
3908 struct inode_record *inode;
3910 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3912 BUG_ON(IS_ERR(inode));
3913 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914 list_move(&orphan->list, &inode->orphan_extents);
3917 level = btrfs_header_level(root->node);
3918 memset(wc->nodes, 0, sizeof(wc->nodes));
3919 wc->nodes[level] = &root_node;
3920 wc->active_node = level;
3921 wc->root_level = level;
3923 /* We may not have checked the root block, lets do that now */
3924 if (btrfs_is_leaf(root->node))
3925 status = btrfs_check_leaf(root, NULL, root->node);
3927 status = btrfs_check_node(root, NULL, root->node);
3928 if (status != BTRFS_TREE_BLOCK_CLEAN)
3931 if (btrfs_root_refs(root_item) > 0 ||
3932 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933 path.nodes[level] = root->node;
3934 extent_buffer_get(root->node);
3935 path.slots[level] = 0;
3937 struct btrfs_key key;
3938 struct btrfs_disk_key found_key;
3940 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941 level = root_item->drop_level;
3942 path.lowest_level = level;
3943 if (level > btrfs_header_level(root->node) ||
3944 level >= BTRFS_MAX_LEVEL) {
3945 error("ignoring invalid drop level: %u", level);
3948 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3951 btrfs_node_key(path.nodes[level], &found_key,
3953 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954 sizeof(found_key)));
3958 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3964 wret = walk_up_tree(root, &path, wc, &level);
3971 btrfs_release_path(&path);
3973 if (!cache_tree_empty(&corrupt_blocks)) {
3974 struct cache_extent *cache;
3975 struct btrfs_corrupt_block *corrupt;
3977 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978 root->root_key.objectid);
3979 cache = first_cache_extent(&corrupt_blocks);
3981 corrupt = container_of(cache,
3982 struct btrfs_corrupt_block,
3984 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985 cache->start, corrupt->level,
3986 corrupt->key.objectid, corrupt->key.type,
3987 corrupt->key.offset);
3988 cache = next_cache_extent(cache);
3991 printf("Try to repair the btree for root %llu\n",
3992 root->root_key.objectid);
3993 ret = repair_btree(root, &corrupt_blocks);
3995 fprintf(stderr, "Failed to repair btree: %s\n",
3998 printf("Btree for root %llu is fixed\n",
3999 root->root_key.objectid);
4003 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4007 if (root_node.current) {
4008 root_node.current->checked = 1;
4009 maybe_free_inode_rec(&root_node.inode_cache,
4013 err = check_inode_recs(root, &root_node.inode_cache);
4017 free_corrupt_blocks_tree(&corrupt_blocks);
4018 root->fs_info->corrupt_blocks = NULL;
4019 free_orphan_data_extents(&root->orphan_data_extents);
4023 static int fs_root_objectid(u64 objectid)
4025 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4028 return is_fstree(objectid);
4031 static int check_fs_roots(struct btrfs_root *root,
4032 struct cache_tree *root_cache)
4034 struct btrfs_path path;
4035 struct btrfs_key key;
4036 struct walk_control wc;
4037 struct extent_buffer *leaf, *tree_node;
4038 struct btrfs_root *tmp_root;
4039 struct btrfs_root *tree_root = root->fs_info->tree_root;
4043 if (ctx.progress_enabled) {
4044 ctx.tp = TASK_FS_ROOTS;
4045 task_start(ctx.info);
4049 * Just in case we made any changes to the extent tree that weren't
4050 * reflected into the free space cache yet.
4053 reset_cached_block_groups(root->fs_info);
4054 memset(&wc, 0, sizeof(wc));
4055 cache_tree_init(&wc.shared);
4056 btrfs_init_path(&path);
4061 key.type = BTRFS_ROOT_ITEM_KEY;
4062 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4067 tree_node = tree_root->node;
4069 if (tree_node != tree_root->node) {
4070 free_root_recs_tree(root_cache);
4071 btrfs_release_path(&path);
4074 leaf = path.nodes[0];
4075 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076 ret = btrfs_next_leaf(tree_root, &path);
4082 leaf = path.nodes[0];
4084 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086 fs_root_objectid(key.objectid)) {
4087 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088 tmp_root = btrfs_read_fs_root_no_cache(
4089 root->fs_info, &key);
4091 key.offset = (u64)-1;
4092 tmp_root = btrfs_read_fs_root(
4093 root->fs_info, &key);
4095 if (IS_ERR(tmp_root)) {
4099 ret = check_fs_root(tmp_root, root_cache, &wc);
4100 if (ret == -EAGAIN) {
4101 free_root_recs_tree(root_cache);
4102 btrfs_release_path(&path);
4107 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108 btrfs_free_fs_root(tmp_root);
4109 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110 key.type == BTRFS_ROOT_BACKREF_KEY) {
4111 process_root_ref(leaf, path.slots[0], &key,
4118 btrfs_release_path(&path);
4120 free_extent_cache_tree(&wc.shared);
4121 if (!cache_tree_empty(&wc.shared))
4122 fprintf(stderr, "warning line %d\n", __LINE__);
4124 task_stop(ctx.info);
4130 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131 * INODE_REF/INODE_EXTREF match.
4133 * @root: the root of the fs/file tree
4134 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4135 * @key: the key of the DIR_ITEM/DIR_INDEX
4136 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4137 * distinguish root_dir between normal dir/file
4138 * @name: the name in the INODE_REF/INODE_EXTREF
4139 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4140 * @mode: the st_mode of INODE_ITEM
4142 * Return 0 if no error occurred.
4143 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4146 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147 * not match for normal dir/file.
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150 struct btrfs_key *key, u64 index, char *name,
4151 u32 namelen, u32 mode)
4153 struct btrfs_path path;
4154 struct extent_buffer *node;
4155 struct btrfs_dir_item *di;
4156 struct btrfs_key location;
4157 char namebuf[BTRFS_NAME_LEN] = {0};
4167 btrfs_init_path(&path);
4168 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170 ret = DIR_ITEM_MISSING;
4174 /* Process root dir and goto out*/
4177 ret = ROOT_DIR_ERROR;
4179 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4181 ref_key->type == BTRFS_INODE_REF_KEY ?
4183 ref_key->objectid, ref_key->offset,
4184 key->type == BTRFS_DIR_ITEM_KEY ?
4185 "DIR_ITEM" : "DIR_INDEX");
4193 /* Process normal file/dir */
4195 ret = DIR_ITEM_MISSING;
4197 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4199 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200 ref_key->objectid, ref_key->offset,
4201 key->type == BTRFS_DIR_ITEM_KEY ?
4202 "DIR_ITEM" : "DIR_INDEX",
4203 key->objectid, key->offset, namelen, name,
4204 imode_to_type(mode));
4208 /* Check whether inode_id/filetype/name match */
4209 node = path.nodes[0];
4210 slot = path.slots[0];
4211 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212 total = btrfs_item_size_nr(node, slot);
4213 while (cur < total) {
4214 ret = DIR_ITEM_MISMATCH;
4215 name_len = btrfs_dir_name_len(node, di);
4216 data_len = btrfs_dir_data_len(node, di);
4218 btrfs_dir_item_key_to_cpu(node, di, &location);
4219 if (location.objectid != ref_key->objectid ||
4220 location.type != BTRFS_INODE_ITEM_KEY ||
4221 location.offset != 0)
4224 filetype = btrfs_dir_type(node, di);
4225 if (imode_to_type(mode) != filetype)
4228 if (name_len <= BTRFS_NAME_LEN) {
4231 len = BTRFS_NAME_LEN;
4232 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4234 key->type == BTRFS_DIR_ITEM_KEY ?
4235 "DIR_ITEM" : "DIR_INDEX",
4236 key->objectid, key->offset, name_len);
4238 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239 if (len != namelen || strncmp(namebuf, name, len))
4245 len = sizeof(*di) + name_len + data_len;
4246 di = (struct btrfs_dir_item *)((char *)di + len);
4249 if (ret == DIR_ITEM_MISMATCH)
4251 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4253 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254 ref_key->objectid, ref_key->offset,
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, namelen, name,
4258 imode_to_type(mode));
4260 btrfs_release_path(&path);
4265 * Traverse the given INODE_REF and call find_dir_item() to find related
4266 * DIR_ITEM/DIR_INDEX.
4268 * @root: the root of the fs/file tree
4269 * @ref_key: the key of the INODE_REF
4270 * @refs: the count of INODE_REF
4271 * @mode: the st_mode of INODE_ITEM
4273 * Return 0 if no error occurred.
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276 struct extent_buffer *node, int slot, u64 *refs,
4279 struct btrfs_key key;
4280 struct btrfs_inode_ref *ref;
4281 char namebuf[BTRFS_NAME_LEN] = {0};
4289 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290 total = btrfs_item_size_nr(node, slot);
4293 /* Update inode ref count */
4296 index = btrfs_inode_ref_index(node, ref);
4297 name_len = btrfs_inode_ref_name_len(node, ref);
4298 if (name_len <= BTRFS_NAME_LEN) {
4301 len = BTRFS_NAME_LEN;
4302 warning("root %llu INODE_REF[%llu %llu] name too long",
4303 root->objectid, ref_key->objectid, ref_key->offset);
4306 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4308 /* Check root dir ref name */
4309 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311 root->objectid, ref_key->objectid, ref_key->offset,
4313 err |= ROOT_DIR_ERROR;
4316 /* Find related DIR_INDEX */
4317 key.objectid = ref_key->offset;
4318 key.type = BTRFS_DIR_INDEX_KEY;
4320 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4323 /* Find related dir_item */
4324 key.objectid = ref_key->offset;
4325 key.type = BTRFS_DIR_ITEM_KEY;
4326 key.offset = btrfs_name_hash(namebuf, len);
4327 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4330 len = sizeof(*ref) + name_len;
4331 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4340 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341 * DIR_ITEM/DIR_INDEX.
4343 * @root: the root of the fs/file tree
4344 * @ref_key: the key of the INODE_EXTREF
4345 * @refs: the count of INODE_EXTREF
4346 * @mode: the st_mode of INODE_ITEM
4348 * Return 0 if no error occurred.
4350 static int check_inode_extref(struct btrfs_root *root,
4351 struct btrfs_key *ref_key,
4352 struct extent_buffer *node, int slot, u64 *refs,
4355 struct btrfs_key key;
4356 struct btrfs_inode_extref *extref;
4357 char namebuf[BTRFS_NAME_LEN] = {0};
4367 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368 total = btrfs_item_size_nr(node, slot);
4371 /* update inode ref count */
4373 name_len = btrfs_inode_extref_name_len(node, extref);
4374 index = btrfs_inode_extref_index(node, extref);
4375 parent = btrfs_inode_extref_parent(node, extref);
4376 if (name_len <= BTRFS_NAME_LEN) {
4379 len = BTRFS_NAME_LEN;
4380 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381 root->objectid, ref_key->objectid, ref_key->offset);
4383 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4385 /* Check root dir ref name */
4386 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388 root->objectid, ref_key->objectid, ref_key->offset,
4390 err |= ROOT_DIR_ERROR;
4393 /* find related dir_index */
4394 key.objectid = parent;
4395 key.type = BTRFS_DIR_INDEX_KEY;
4397 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4400 /* find related dir_item */
4401 key.objectid = parent;
4402 key.type = BTRFS_DIR_ITEM_KEY;
4403 key.offset = btrfs_name_hash(namebuf, len);
4404 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4407 len = sizeof(*extref) + name_len;
4408 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4418 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419 * DIR_ITEM/DIR_INDEX match.
4421 * @root: the root of the fs/file tree
4422 * @key: the key of the INODE_REF/INODE_EXTREF
4423 * @name: the name in the INODE_REF/INODE_EXTREF
4424 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4425 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4427 * @ext_ref: the EXTENDED_IREF feature
4429 * Return 0 if no error occurred.
4430 * Return >0 for error bitmap
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433 char *name, int namelen, u64 index,
4434 unsigned int ext_ref)
4436 struct btrfs_path path;
4437 struct btrfs_inode_ref *ref;
4438 struct btrfs_inode_extref *extref;
4439 struct extent_buffer *node;
4440 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4451 btrfs_init_path(&path);
4452 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4454 ret = INODE_REF_MISSING;
4458 node = path.nodes[0];
4459 slot = path.slots[0];
4461 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462 total = btrfs_item_size_nr(node, slot);
4464 /* Iterate all entry of INODE_REF */
4465 while (cur < total) {
4466 ret = INODE_REF_MISSING;
4468 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469 ref_index = btrfs_inode_ref_index(node, ref);
4470 if (index != (u64)-1 && index != ref_index)
4473 if (ref_namelen <= BTRFS_NAME_LEN) {
4476 len = BTRFS_NAME_LEN;
4477 warning("root %llu INODE %s[%llu %llu] name too long",
4479 key->type == BTRFS_INODE_REF_KEY ?
4481 key->objectid, key->offset);
4483 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4486 if (len != namelen || strncmp(ref_namebuf, name, len))
4492 len = sizeof(*ref) + ref_namelen;
4493 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4498 /* Skip if not support EXTENDED_IREF feature */
4502 btrfs_release_path(&path);
4503 btrfs_init_path(&path);
4505 dir_id = key->offset;
4506 key->type = BTRFS_INODE_EXTREF_KEY;
4507 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4509 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4511 ret = INODE_REF_MISSING;
4515 node = path.nodes[0];
4516 slot = path.slots[0];
4518 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4520 total = btrfs_item_size_nr(node, slot);
4522 /* Iterate all entry of INODE_EXTREF */
4523 while (cur < total) {
4524 ret = INODE_REF_MISSING;
4526 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527 ref_index = btrfs_inode_extref_index(node, extref);
4528 parent = btrfs_inode_extref_parent(node, extref);
4529 if (index != (u64)-1 && index != ref_index)
4532 if (parent != dir_id)
4535 if (ref_namelen <= BTRFS_NAME_LEN) {
4538 len = BTRFS_NAME_LEN;
4539 warning("root %llu INODE %s[%llu %llu] name too long",
4541 key->type == BTRFS_INODE_REF_KEY ?
4543 key->objectid, key->offset);
4545 read_extent_buffer(node, ref_namebuf,
4546 (unsigned long)(extref + 1), len);
4548 if (len != namelen || strncmp(ref_namebuf, name, len))
4555 len = sizeof(*extref) + ref_namelen;
4556 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4561 btrfs_release_path(&path);
4566 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4569 * @root: the root of the fs/file tree
4570 * @key: the key of the INODE_REF/INODE_EXTREF
4571 * @size: the st_size of the INODE_ITEM
4572 * @ext_ref: the EXTENDED_IREF feature
4574 * Return 0 if no error occurred.
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577 struct extent_buffer *node, int slot, u64 *size,
4578 unsigned int ext_ref)
4580 struct btrfs_dir_item *di;
4581 struct btrfs_inode_item *ii;
4582 struct btrfs_path path;
4583 struct btrfs_key location;
4584 char namebuf[BTRFS_NAME_LEN] = {0};
4597 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598 * ignore index check.
4600 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4602 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603 total = btrfs_item_size_nr(node, slot);
4605 while (cur < total) {
4606 data_len = btrfs_dir_data_len(node, di);
4608 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610 "DIR_ITEM" : "DIR_INDEX",
4611 key->objectid, key->offset, data_len);
4613 name_len = btrfs_dir_name_len(node, di);
4614 if (name_len <= BTRFS_NAME_LEN) {
4617 len = BTRFS_NAME_LEN;
4618 warning("root %llu %s[%llu %llu] name too long",
4620 key->type == BTRFS_DIR_ITEM_KEY ?
4621 "DIR_ITEM" : "DIR_INDEX",
4622 key->objectid, key->offset);
4624 (*size) += name_len;
4626 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627 filetype = btrfs_dir_type(node, di);
4629 btrfs_init_path(&path);
4630 btrfs_dir_item_key_to_cpu(node, di, &location);
4632 /* Ignore related ROOT_ITEM check */
4633 if (location.type == BTRFS_ROOT_ITEM_KEY)
4636 /* Check relative INODE_ITEM(existence/filetype) */
4637 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4639 err |= INODE_ITEM_MISSING;
4640 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643 key->offset, location.objectid, name_len,
4648 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649 struct btrfs_inode_item);
4650 mode = btrfs_inode_mode(path.nodes[0], ii);
4652 if (imode_to_type(mode) != filetype) {
4653 err |= INODE_ITEM_MISMATCH;
4654 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657 key->offset, name_len, namebuf, filetype);
4660 /* Check relative INODE_REF/INODE_EXTREF */
4661 location.type = BTRFS_INODE_REF_KEY;
4662 location.offset = key->objectid;
4663 ret = find_inode_ref(root, &location, namebuf, len,
4666 if (ret & INODE_REF_MISSING)
4667 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670 key->offset, name_len, namebuf, filetype);
4673 btrfs_release_path(&path);
4674 len = sizeof(*di) + name_len + data_len;
4675 di = (struct btrfs_dir_item *)((char *)di + len);
4678 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680 root->objectid, key->objectid, key->offset);
4689 * Check file extent datasum/hole, update the size of the file extents,
4690 * check and update the last offset of the file extent.
4692 * @root: the root of fs/file tree.
4693 * @fkey: the key of the file extent.
4694 * @nodatasum: INODE_NODATASUM feature.
4695 * @size: the sum of all EXTENT_DATA items size for this inode.
4696 * @end: the offset of the last extent.
4698 * Return 0 if no error occurred.
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701 struct extent_buffer *node, int slot,
4702 unsigned int nodatasum, u64 *size, u64 *end)
4704 struct btrfs_file_extent_item *fi;
4707 u64 extent_num_bytes;
4709 u64 csum_found; /* In byte size, sectorsize aligned */
4710 u64 search_start; /* Logical range start we search for csum */
4711 u64 search_len; /* Logical range len we search for csum */
4712 unsigned int extent_type;
4713 unsigned int is_hole;
4718 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4720 /* Check inline extent */
4721 extent_type = btrfs_file_extent_type(node, fi);
4722 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723 struct btrfs_item *e = btrfs_item_nr(slot);
4724 u32 item_inline_len;
4726 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728 compressed = btrfs_file_extent_compression(node, fi);
4729 if (extent_num_bytes == 0) {
4731 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732 root->objectid, fkey->objectid, fkey->offset);
4733 err |= FILE_EXTENT_ERROR;
4735 if (!compressed && extent_num_bytes != item_inline_len) {
4737 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738 root->objectid, fkey->objectid, fkey->offset,
4739 extent_num_bytes, item_inline_len);
4740 err |= FILE_EXTENT_ERROR;
4742 *size += extent_num_bytes;
4746 /* Check extent type */
4747 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749 err |= FILE_EXTENT_ERROR;
4750 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751 root->objectid, fkey->objectid, fkey->offset);
4755 /* Check REG_EXTENT/PREALLOC_EXTENT */
4756 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759 extent_offset = btrfs_file_extent_offset(node, fi);
4760 compressed = btrfs_file_extent_compression(node, fi);
4761 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4764 * Check EXTENT_DATA csum
4766 * For plain (uncompressed) extent, we should only check the range
4767 * we're referring to, as it's possible that part of prealloc extent
4768 * has been written, and has csum:
4770 * |<--- Original large preallocated extent A ---->|
4771 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4774 * For compressed extent, we should check the whole range.
4777 search_start = disk_bytenr + extent_offset;
4778 search_len = extent_num_bytes;
4780 search_start = disk_bytenr;
4781 search_len = disk_num_bytes;
4783 ret = count_csum_range(root, search_start, search_len, &csum_found);
4784 if (csum_found > 0 && nodatasum) {
4785 err |= ODD_CSUM_ITEM;
4786 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787 root->objectid, fkey->objectid, fkey->offset);
4788 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789 !is_hole && (ret < 0 || csum_found < search_len)) {
4790 err |= CSUM_ITEM_MISSING;
4791 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792 root->objectid, fkey->objectid, fkey->offset,
4793 csum_found, search_len);
4794 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795 err |= ODD_CSUM_ITEM;
4796 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797 root->objectid, fkey->objectid, fkey->offset, csum_found);
4800 /* Check EXTENT_DATA hole */
4801 if (no_holes && is_hole) {
4802 err |= FILE_EXTENT_ERROR;
4803 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804 root->objectid, fkey->objectid, fkey->offset);
4805 } else if (!no_holes && *end != fkey->offset) {
4806 err |= FILE_EXTENT_ERROR;
4807 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808 root->objectid, fkey->objectid, fkey->offset);
4811 *end += extent_num_bytes;
4813 *size += extent_num_bytes;
4819 * Check INODE_ITEM and related ITEMs (the same inode number)
4820 * 1. check link count
4821 * 2. check inode ref/extref
4822 * 3. check dir item/index
4824 * @ext_ref: the EXTENDED_IREF feature
4826 * Return 0 if no error occurred.
4827 * Return >0 for error or hit the traversal is done(by error bitmap)
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830 unsigned int ext_ref)
4832 struct extent_buffer *node;
4833 struct btrfs_inode_item *ii;
4834 struct btrfs_key key;
4843 u64 extent_size = 0;
4845 unsigned int nodatasum;
4850 node = path->nodes[0];
4851 slot = path->slots[0];
4853 btrfs_item_key_to_cpu(node, &key, slot);
4854 inode_id = key.objectid;
4856 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857 ret = btrfs_next_item(root, path);
4863 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864 isize = btrfs_inode_size(node, ii);
4865 nbytes = btrfs_inode_nbytes(node, ii);
4866 mode = btrfs_inode_mode(node, ii);
4867 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868 nlink = btrfs_inode_nlink(node, ii);
4869 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4872 ret = btrfs_next_item(root, path);
4874 /* out will fill 'err' rusing current statistics */
4876 } else if (ret > 0) {
4881 node = path->nodes[0];
4882 slot = path->slots[0];
4883 btrfs_item_key_to_cpu(node, &key, slot);
4884 if (key.objectid != inode_id)
4888 case BTRFS_INODE_REF_KEY:
4889 ret = check_inode_ref(root, &key, node, slot, &refs,
4893 case BTRFS_INODE_EXTREF_KEY:
4894 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896 root->objectid, key.objectid,
4898 ret = check_inode_extref(root, &key, node, slot, &refs,
4902 case BTRFS_DIR_ITEM_KEY:
4903 case BTRFS_DIR_INDEX_KEY:
4905 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906 root->objectid, inode_id,
4907 imode_to_type(mode), key.objectid,
4910 ret = check_dir_item(root, &key, node, slot, &size,
4914 case BTRFS_EXTENT_DATA_KEY:
4916 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917 root->objectid, inode_id, key.objectid,
4920 ret = check_file_extent(root, &key, node, slot,
4921 nodatasum, &extent_size,
4925 case BTRFS_XATTR_ITEM_KEY:
4928 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929 key.objectid, key.type, key.offset);
4934 /* verify INODE_ITEM nlink/isize/nbytes */
4937 err |= LINK_COUNT_ERROR;
4938 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939 root->objectid, inode_id, nlink);
4943 * Just a warning, as dir inode nbytes is just an
4944 * instructive value.
4946 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948 root->objectid, inode_id, root->nodesize);
4951 if (isize != size) {
4953 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954 root->objectid, inode_id, isize, size);
4957 if (nlink != refs) {
4958 err |= LINK_COUNT_ERROR;
4959 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960 root->objectid, inode_id, nlink, refs);
4961 } else if (!nlink) {
4965 if (!nbytes && !no_holes && extent_end < isize) {
4966 err |= NBYTES_ERROR;
4967 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968 root->objectid, inode_id, isize);
4971 if (nbytes != extent_size) {
4972 err |= NBYTES_ERROR;
4973 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974 root->objectid, inode_id, nbytes, extent_size);
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4983 struct btrfs_path path;
4984 struct btrfs_key key;
4988 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989 key.type = BTRFS_INODE_ITEM_KEY;
4992 /* For root being dropped, we don't need to check first inode */
4993 if (btrfs_root_refs(&root->root_item) == 0 &&
4994 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4998 btrfs_init_path(&path);
5000 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5005 err |= INODE_ITEM_MISSING;
5006 error("first inode item of root %llu is missing",
5010 err |= check_inode_item(root, &path, ext_ref);
5015 btrfs_release_path(&path);
5020 * Iterate all item on the tree and call check_inode_item() to check.
5022 * @root: the root of the tree to be checked.
5023 * @ext_ref: the EXTENDED_IREF feature
5025 * Return 0 if no error found.
5026 * Return <0 for error.
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5030 struct btrfs_path path;
5031 struct node_refs nrefs;
5032 struct btrfs_root_item *root_item = &root->root_item;
5038 * We need to manually check the first inode item(256)
5039 * As the following traversal function will only start from
5040 * the first inode item in the leaf, if inode item(256) is missing
5041 * we will just skip it forever.
5043 ret = check_fs_first_inode(root, ext_ref);
5047 memset(&nrefs, 0, sizeof(nrefs));
5048 level = btrfs_header_level(root->node);
5049 btrfs_init_path(&path);
5051 if (btrfs_root_refs(root_item) > 0 ||
5052 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053 path.nodes[level] = root->node;
5054 path.slots[level] = 0;
5055 extent_buffer_get(root->node);
5057 struct btrfs_key key;
5059 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060 level = root_item->drop_level;
5061 path.lowest_level = level;
5062 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5069 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5072 /* if ret is negative, walk shall stop */
5078 ret = walk_up_tree_v2(root, &path, &level);
5080 /* Normal exit, reset ret to err */
5087 btrfs_release_path(&path);
5092 * Find the relative ref for root_ref and root_backref.
5094 * @root: the root of the root tree.
5095 * @ref_key: the key of the root ref.
5097 * Return 0 if no error occurred.
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100 struct extent_buffer *node, int slot)
5102 struct btrfs_path path;
5103 struct btrfs_key key;
5104 struct btrfs_root_ref *ref;
5105 struct btrfs_root_ref *backref;
5106 char ref_name[BTRFS_NAME_LEN] = {0};
5107 char backref_name[BTRFS_NAME_LEN] = {0};
5113 u32 backref_namelen;
5118 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119 ref_dirid = btrfs_root_ref_dirid(node, ref);
5120 ref_seq = btrfs_root_ref_sequence(node, ref);
5121 ref_namelen = btrfs_root_ref_name_len(node, ref);
5123 if (ref_namelen <= BTRFS_NAME_LEN) {
5126 len = BTRFS_NAME_LEN;
5127 warning("%s[%llu %llu] ref_name too long",
5128 ref_key->type == BTRFS_ROOT_REF_KEY ?
5129 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5132 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5134 /* Find relative root_ref */
5135 key.objectid = ref_key->offset;
5136 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137 key.offset = ref_key->objectid;
5139 btrfs_init_path(&path);
5140 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5142 err |= ROOT_REF_MISSING;
5143 error("%s[%llu %llu] couldn't find relative ref",
5144 ref_key->type == BTRFS_ROOT_REF_KEY ?
5145 "ROOT_REF" : "ROOT_BACKREF",
5146 ref_key->objectid, ref_key->offset);
5150 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151 struct btrfs_root_ref);
5152 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5156 if (backref_namelen <= BTRFS_NAME_LEN) {
5157 len = backref_namelen;
5159 len = BTRFS_NAME_LEN;
5160 warning("%s[%llu %llu] ref_name too long",
5161 key.type == BTRFS_ROOT_REF_KEY ?
5162 "ROOT_REF" : "ROOT_BACKREF",
5163 key.objectid, key.offset);
5165 read_extent_buffer(path.nodes[0], backref_name,
5166 (unsigned long)(backref + 1), len);
5168 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169 ref_namelen != backref_namelen ||
5170 strncmp(ref_name, backref_name, len)) {
5171 err |= ROOT_REF_MISMATCH;
5172 error("%s[%llu %llu] mismatch relative ref",
5173 ref_key->type == BTRFS_ROOT_REF_KEY ?
5174 "ROOT_REF" : "ROOT_BACKREF",
5175 ref_key->objectid, ref_key->offset);
5178 btrfs_release_path(&path);
5183 * Check all fs/file tree in low_memory mode.
5185 * 1. for fs tree root item, call check_fs_root_v2()
5186 * 2. for fs tree root ref/backref, call check_root_ref()
5188 * Return 0 if no error occurred.
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5192 struct btrfs_root *tree_root = fs_info->tree_root;
5193 struct btrfs_root *cur_root = NULL;
5194 struct btrfs_path path;
5195 struct btrfs_key key;
5196 struct extent_buffer *node;
5197 unsigned int ext_ref;
5202 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5204 btrfs_init_path(&path);
5205 key.objectid = BTRFS_FS_TREE_OBJECTID;
5207 key.type = BTRFS_ROOT_ITEM_KEY;
5209 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5213 } else if (ret > 0) {
5219 node = path.nodes[0];
5220 slot = path.slots[0];
5221 btrfs_item_key_to_cpu(node, &key, slot);
5222 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5224 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225 fs_root_objectid(key.objectid)) {
5226 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5230 key.offset = (u64)-1;
5231 cur_root = btrfs_read_fs_root(fs_info, &key);
5234 if (IS_ERR(cur_root)) {
5235 error("Fail to read fs/subvol tree: %lld",
5241 ret = check_fs_root_v2(cur_root, ext_ref);
5244 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245 btrfs_free_fs_root(cur_root);
5246 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248 ret = check_root_ref(tree_root, &key, node, slot);
5252 ret = btrfs_next_item(tree_root, &path);
5262 btrfs_release_path(&path);
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5268 struct list_head *cur = rec->backrefs.next;
5269 struct extent_backref *back;
5270 struct tree_backref *tback;
5271 struct data_backref *dback;
5275 while(cur != &rec->backrefs) {
5276 back = to_extent_backref(cur);
5278 if (!back->found_extent_tree) {
5282 if (back->is_data) {
5283 dback = to_data_backref(back);
5284 fprintf(stderr, "Backref %llu %s %llu"
5285 " owner %llu offset %llu num_refs %lu"
5286 " not found in extent tree\n",
5287 (unsigned long long)rec->start,
5288 back->full_backref ?
5290 back->full_backref ?
5291 (unsigned long long)dback->parent:
5292 (unsigned long long)dback->root,
5293 (unsigned long long)dback->owner,
5294 (unsigned long long)dback->offset,
5295 (unsigned long)dback->num_refs);
5297 tback = to_tree_backref(back);
5298 fprintf(stderr, "Backref %llu parent %llu"
5299 " root %llu not found in extent tree\n",
5300 (unsigned long long)rec->start,
5301 (unsigned long long)tback->parent,
5302 (unsigned long long)tback->root);
5305 if (!back->is_data && !back->found_ref) {
5309 tback = to_tree_backref(back);
5310 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311 (unsigned long long)rec->start,
5312 back->full_backref ? "parent" : "root",
5313 back->full_backref ?
5314 (unsigned long long)tback->parent :
5315 (unsigned long long)tback->root, back);
5317 if (back->is_data) {
5318 dback = to_data_backref(back);
5319 if (dback->found_ref != dback->num_refs) {
5323 fprintf(stderr, "Incorrect local backref count"
5324 " on %llu %s %llu owner %llu"
5325 " offset %llu found %u wanted %u back %p\n",
5326 (unsigned long long)rec->start,
5327 back->full_backref ?
5329 back->full_backref ?
5330 (unsigned long long)dback->parent:
5331 (unsigned long long)dback->root,
5332 (unsigned long long)dback->owner,
5333 (unsigned long long)dback->offset,
5334 dback->found_ref, dback->num_refs, back);
5336 if (dback->disk_bytenr != rec->start) {
5340 fprintf(stderr, "Backref disk bytenr does not"
5341 " match extent record, bytenr=%llu, "
5342 "ref bytenr=%llu\n",
5343 (unsigned long long)rec->start,
5344 (unsigned long long)dback->disk_bytenr);
5347 if (dback->bytes != rec->nr) {
5351 fprintf(stderr, "Backref bytes do not match "
5352 "extent backref, bytenr=%llu, ref "
5353 "bytes=%llu, backref bytes=%llu\n",
5354 (unsigned long long)rec->start,
5355 (unsigned long long)rec->nr,
5356 (unsigned long long)dback->bytes);
5359 if (!back->is_data) {
5362 dback = to_data_backref(back);
5363 found += dback->found_ref;
5366 if (found != rec->refs) {
5370 fprintf(stderr, "Incorrect global backref count "
5371 "on %llu found %llu wanted %llu\n",
5372 (unsigned long long)rec->start,
5373 (unsigned long long)found,
5374 (unsigned long long)rec->refs);
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5382 struct extent_backref *back;
5383 struct list_head *cur;
5384 while (!list_empty(&rec->backrefs)) {
5385 cur = rec->backrefs.next;
5386 back = to_extent_backref(cur);
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5395 struct cache_extent *cache;
5396 struct extent_record *rec;
5399 cache = first_cache_extent(extent_cache);
5402 rec = container_of(cache, struct extent_record, cache);
5403 remove_cache_extent(extent_cache, cache);
5404 free_all_extent_backrefs(rec);
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410 struct extent_record *rec)
5412 if (rec->content_checked && rec->owner_ref_checked &&
5413 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415 !rec->bad_full_backref && !rec->crossing_stripes &&
5416 !rec->wrong_chunk_type) {
5417 remove_cache_extent(extent_cache, &rec->cache);
5418 free_all_extent_backrefs(rec);
5419 list_del_init(&rec->list);
5425 static int check_owner_ref(struct btrfs_root *root,
5426 struct extent_record *rec,
5427 struct extent_buffer *buf)
5429 struct extent_backref *node;
5430 struct tree_backref *back;
5431 struct btrfs_root *ref_root;
5432 struct btrfs_key key;
5433 struct btrfs_path path;
5434 struct extent_buffer *parent;
5439 list_for_each_entry(node, &rec->backrefs, list) {
5442 if (!node->found_ref)
5444 if (node->full_backref)
5446 back = to_tree_backref(node);
5447 if (btrfs_header_owner(buf) == back->root)
5450 BUG_ON(rec->is_root);
5452 /* try to find the block by search corresponding fs tree */
5453 key.objectid = btrfs_header_owner(buf);
5454 key.type = BTRFS_ROOT_ITEM_KEY;
5455 key.offset = (u64)-1;
5457 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458 if (IS_ERR(ref_root))
5461 level = btrfs_header_level(buf);
5463 btrfs_item_key_to_cpu(buf, &key, 0);
5465 btrfs_node_key_to_cpu(buf, &key, 0);
5467 btrfs_init_path(&path);
5468 path.lowest_level = level + 1;
5469 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5473 parent = path.nodes[level + 1];
5474 if (parent && buf->start == btrfs_node_blockptr(parent,
5475 path.slots[level + 1]))
5478 btrfs_release_path(&path);
5479 return found ? 0 : 1;
5482 static int is_extent_tree_record(struct extent_record *rec)
5484 struct list_head *cur = rec->backrefs.next;
5485 struct extent_backref *node;
5486 struct tree_backref *back;
5489 while(cur != &rec->backrefs) {
5490 node = to_extent_backref(cur);
5494 back = to_tree_backref(node);
5495 if (node->full_backref)
5497 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505 struct cache_tree *extent_cache,
5508 struct extent_record *rec;
5509 struct cache_extent *cache;
5510 struct btrfs_key key;
5512 cache = lookup_cache_extent(extent_cache, start, len);
5516 rec = container_of(cache, struct extent_record, cache);
5517 if (!is_extent_tree_record(rec))
5520 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525 struct extent_buffer *buf, int slot)
5527 if (btrfs_header_level(buf)) {
5528 struct btrfs_key_ptr ptr1, ptr2;
5530 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531 sizeof(struct btrfs_key_ptr));
5532 read_extent_buffer(buf, &ptr2,
5533 btrfs_node_key_ptr_offset(slot + 1),
5534 sizeof(struct btrfs_key_ptr));
5535 write_extent_buffer(buf, &ptr1,
5536 btrfs_node_key_ptr_offset(slot + 1),
5537 sizeof(struct btrfs_key_ptr));
5538 write_extent_buffer(buf, &ptr2,
5539 btrfs_node_key_ptr_offset(slot),
5540 sizeof(struct btrfs_key_ptr));
5542 struct btrfs_disk_key key;
5543 btrfs_node_key(buf, &key, 0);
5544 btrfs_fixup_low_keys(root, path, &key,
5545 btrfs_header_level(buf) + 1);
5548 struct btrfs_item *item1, *item2;
5549 struct btrfs_key k1, k2;
5550 char *item1_data, *item2_data;
5551 u32 item1_offset, item2_offset, item1_size, item2_size;
5553 item1 = btrfs_item_nr(slot);
5554 item2 = btrfs_item_nr(slot + 1);
5555 btrfs_item_key_to_cpu(buf, &k1, slot);
5556 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557 item1_offset = btrfs_item_offset(buf, item1);
5558 item2_offset = btrfs_item_offset(buf, item2);
5559 item1_size = btrfs_item_size(buf, item1);
5560 item2_size = btrfs_item_size(buf, item2);
5562 item1_data = malloc(item1_size);
5565 item2_data = malloc(item2_size);
5571 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5574 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5579 btrfs_set_item_offset(buf, item1, item2_offset);
5580 btrfs_set_item_offset(buf, item2, item1_offset);
5581 btrfs_set_item_size(buf, item1, item2_size);
5582 btrfs_set_item_size(buf, item2, item1_size);
5584 path->slots[0] = slot;
5585 btrfs_set_item_key_unsafe(root, path, &k2);
5586 path->slots[0] = slot + 1;
5587 btrfs_set_item_key_unsafe(root, path, &k1);
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5594 struct extent_buffer *buf;
5595 struct btrfs_key k1, k2;
5597 int level = path->lowest_level;
5600 buf = path->nodes[level];
5601 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5603 btrfs_node_key_to_cpu(buf, &k1, i);
5604 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5606 btrfs_item_key_to_cpu(buf, &k1, i);
5607 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5609 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5611 ret = swap_values(root, path, buf, i);
5614 btrfs_mark_buffer_dirty(buf);
5620 static int delete_bogus_item(struct btrfs_root *root,
5621 struct btrfs_path *path,
5622 struct extent_buffer *buf, int slot)
5624 struct btrfs_key key;
5625 int nritems = btrfs_header_nritems(buf);
5627 btrfs_item_key_to_cpu(buf, &key, slot);
5629 /* These are all the keys we can deal with missing. */
5630 if (key.type != BTRFS_DIR_INDEX_KEY &&
5631 key.type != BTRFS_EXTENT_ITEM_KEY &&
5632 key.type != BTRFS_METADATA_ITEM_KEY &&
5633 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5637 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638 (unsigned long long)key.objectid, key.type,
5639 (unsigned long long)key.offset, slot, buf->start);
5640 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641 btrfs_item_nr_offset(slot + 1),
5642 sizeof(struct btrfs_item) *
5643 (nritems - slot - 1));
5644 btrfs_set_header_nritems(buf, nritems - 1);
5646 struct btrfs_disk_key disk_key;
5648 btrfs_item_key(buf, &disk_key, 0);
5649 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5651 btrfs_mark_buffer_dirty(buf);
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5657 struct extent_buffer *buf;
5661 /* We should only get this for leaves */
5662 BUG_ON(path->lowest_level);
5663 buf = path->nodes[0];
5665 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666 unsigned int shift = 0, offset;
5668 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669 BTRFS_LEAF_DATA_SIZE(root)) {
5670 if (btrfs_item_end_nr(buf, i) >
5671 BTRFS_LEAF_DATA_SIZE(root)) {
5672 ret = delete_bogus_item(root, path, buf, i);
5675 fprintf(stderr, "item is off the end of the "
5676 "leaf, can't fix\n");
5680 shift = BTRFS_LEAF_DATA_SIZE(root) -
5681 btrfs_item_end_nr(buf, i);
5682 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683 btrfs_item_offset_nr(buf, i - 1)) {
5684 if (btrfs_item_end_nr(buf, i) >
5685 btrfs_item_offset_nr(buf, i - 1)) {
5686 ret = delete_bogus_item(root, path, buf, i);
5689 fprintf(stderr, "items overlap, can't fix\n");
5693 shift = btrfs_item_offset_nr(buf, i - 1) -
5694 btrfs_item_end_nr(buf, i);
5699 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700 i, shift, (unsigned long long)buf->start);
5701 offset = btrfs_item_offset_nr(buf, i);
5702 memmove_extent_buffer(buf,
5703 btrfs_leaf_data(buf) + offset + shift,
5704 btrfs_leaf_data(buf) + offset,
5705 btrfs_item_size_nr(buf, i));
5706 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5708 btrfs_mark_buffer_dirty(buf);
5712 * We may have moved things, in which case we want to exit so we don't
5713 * write those changes out. Once we have proper abort functionality in
5714 * progs this can be changed to something nicer.
5721 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5722 * then just return -EIO.
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725 struct extent_buffer *buf,
5726 enum btrfs_tree_block_status status)
5728 struct btrfs_trans_handle *trans;
5729 struct ulist *roots;
5730 struct ulist_node *node;
5731 struct btrfs_root *search_root;
5732 struct btrfs_path path;
5733 struct ulist_iterator iter;
5734 struct btrfs_key root_key, key;
5737 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5741 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5745 btrfs_init_path(&path);
5746 ULIST_ITER_INIT(&iter);
5747 while ((node = ulist_next(roots, &iter))) {
5748 root_key.objectid = node->val;
5749 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750 root_key.offset = (u64)-1;
5752 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5759 trans = btrfs_start_transaction(search_root, 0);
5760 if (IS_ERR(trans)) {
5761 ret = PTR_ERR(trans);
5765 path.lowest_level = btrfs_header_level(buf);
5766 path.skip_check_block = 1;
5767 if (path.lowest_level)
5768 btrfs_node_key_to_cpu(buf, &key, 0);
5770 btrfs_item_key_to_cpu(buf, &key, 0);
5771 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5774 btrfs_commit_transaction(trans, search_root);
5777 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778 ret = fix_key_order(search_root, &path);
5779 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780 ret = fix_item_offset(search_root, &path);
5782 btrfs_commit_transaction(trans, search_root);
5785 btrfs_release_path(&path);
5786 btrfs_commit_transaction(trans, search_root);
5789 btrfs_release_path(&path);
5793 static int check_block(struct btrfs_root *root,
5794 struct cache_tree *extent_cache,
5795 struct extent_buffer *buf, u64 flags)
5797 struct extent_record *rec;
5798 struct cache_extent *cache;
5799 struct btrfs_key key;
5800 enum btrfs_tree_block_status status;
5804 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5807 rec = container_of(cache, struct extent_record, cache);
5808 rec->generation = btrfs_header_generation(buf);
5810 level = btrfs_header_level(buf);
5811 if (btrfs_header_nritems(buf) > 0) {
5814 btrfs_item_key_to_cpu(buf, &key, 0);
5816 btrfs_node_key_to_cpu(buf, &key, 0);
5818 rec->info_objectid = key.objectid;
5820 rec->info_level = level;
5822 if (btrfs_is_leaf(buf))
5823 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5825 status = btrfs_check_node(root, &rec->parent_key, buf);
5827 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5829 status = try_to_fix_bad_block(root, buf, status);
5830 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5832 fprintf(stderr, "bad block %llu\n",
5833 (unsigned long long)buf->start);
5836 * Signal to callers we need to start the scan over
5837 * again since we'll have cowed blocks.
5842 rec->content_checked = 1;
5843 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844 rec->owner_ref_checked = 1;
5846 ret = check_owner_ref(root, rec, buf);
5848 rec->owner_ref_checked = 1;
5852 maybe_free_extent_rec(extent_cache, rec);
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857 u64 parent, u64 root)
5859 struct list_head *cur = rec->backrefs.next;
5860 struct extent_backref *node;
5861 struct tree_backref *back;
5863 while(cur != &rec->backrefs) {
5864 node = to_extent_backref(cur);
5868 back = to_tree_backref(node);
5870 if (!node->full_backref)
5872 if (parent == back->parent)
5875 if (node->full_backref)
5877 if (back->root == root)
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885 u64 parent, u64 root)
5887 struct tree_backref *ref = malloc(sizeof(*ref));
5891 memset(&ref->node, 0, sizeof(ref->node));
5893 ref->parent = parent;
5894 ref->node.full_backref = 1;
5897 ref->node.full_backref = 0;
5899 list_add_tail(&ref->node.list, &rec->backrefs);
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905 u64 parent, u64 root,
5906 u64 owner, u64 offset,
5908 u64 disk_bytenr, u64 bytes)
5910 struct list_head *cur = rec->backrefs.next;
5911 struct extent_backref *node;
5912 struct data_backref *back;
5914 while(cur != &rec->backrefs) {
5915 node = to_extent_backref(cur);
5919 back = to_data_backref(node);
5921 if (!node->full_backref)
5923 if (parent == back->parent)
5926 if (node->full_backref)
5928 if (back->root == root && back->owner == owner &&
5929 back->offset == offset) {
5930 if (found_ref && node->found_ref &&
5931 (back->bytes != bytes ||
5932 back->disk_bytenr != disk_bytenr))
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942 u64 parent, u64 root,
5943 u64 owner, u64 offset,
5946 struct data_backref *ref = malloc(sizeof(*ref));
5950 memset(&ref->node, 0, sizeof(ref->node));
5951 ref->node.is_data = 1;
5954 ref->parent = parent;
5957 ref->node.full_backref = 1;
5961 ref->offset = offset;
5962 ref->node.full_backref = 0;
5964 ref->bytes = max_size;
5967 list_add_tail(&ref->node.list, &rec->backrefs);
5968 if (max_size > rec->max_size)
5969 rec->max_size = max_size;
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5976 struct btrfs_block_group_cache *bg_cache;
5978 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5982 /* data extent, check chunk directly*/
5983 if (!rec->metadata) {
5984 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985 rec->wrong_chunk_type = 1;
5989 /* metadata extent, check the obvious case first */
5990 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991 BTRFS_BLOCK_GROUP_METADATA))) {
5992 rec->wrong_chunk_type = 1;
5997 * Check SYSTEM extent, as it's also marked as metadata, we can only
5998 * make sure it's a SYSTEM extent by its backref
6000 if (!list_empty(&rec->backrefs)) {
6001 struct extent_backref *node;
6002 struct tree_backref *tback;
6005 node = to_extent_backref(rec->backrefs.next);
6006 if (node->is_data) {
6007 /* tree block shouldn't have data backref */
6008 rec->wrong_chunk_type = 1;
6011 tback = container_of(node, struct tree_backref, node);
6013 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6016 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017 if (!(bg_cache->flags & bg_type))
6018 rec->wrong_chunk_type = 1;
6023 * Allocate a new extent record, fill default values from @tmpl and insert int
6024 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025 * the cache, otherwise it fails.
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028 struct extent_record *tmpl)
6030 struct extent_record *rec;
6033 rec = malloc(sizeof(*rec));
6036 rec->start = tmpl->start;
6037 rec->max_size = tmpl->max_size;
6038 rec->nr = max(tmpl->nr, tmpl->max_size);
6039 rec->found_rec = tmpl->found_rec;
6040 rec->content_checked = tmpl->content_checked;
6041 rec->owner_ref_checked = tmpl->owner_ref_checked;
6042 rec->num_duplicates = 0;
6043 rec->metadata = tmpl->metadata;
6044 rec->flag_block_full_backref = FLAG_UNSET;
6045 rec->bad_full_backref = 0;
6046 rec->crossing_stripes = 0;
6047 rec->wrong_chunk_type = 0;
6048 rec->is_root = tmpl->is_root;
6049 rec->refs = tmpl->refs;
6050 rec->extent_item_refs = tmpl->extent_item_refs;
6051 rec->parent_generation = tmpl->parent_generation;
6052 INIT_LIST_HEAD(&rec->backrefs);
6053 INIT_LIST_HEAD(&rec->dups);
6054 INIT_LIST_HEAD(&rec->list);
6055 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6056 rec->cache.start = tmpl->start;
6057 rec->cache.size = tmpl->nr;
6058 ret = insert_cache_extent(extent_cache, &rec->cache);
6063 bytes_used += rec->nr;
6066 rec->crossing_stripes = check_crossing_stripes(global_info,
6067 rec->start, global_info->tree_root->nodesize);
6068 check_extent_type(rec);
6073 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6075 * - refs - if found, increase refs
6076 * - is_root - if found, set
6077 * - content_checked - if found, set
6078 * - owner_ref_checked - if found, set
6080 * If not found, create a new one, initialize and insert.
6082 static int add_extent_rec(struct cache_tree *extent_cache,
6083 struct extent_record *tmpl)
6085 struct extent_record *rec;
6086 struct cache_extent *cache;
6090 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6092 rec = container_of(cache, struct extent_record, cache);
6096 rec->nr = max(tmpl->nr, tmpl->max_size);
6099 * We need to make sure to reset nr to whatever the extent
6100 * record says was the real size, this way we can compare it to
6103 if (tmpl->found_rec) {
6104 if (tmpl->start != rec->start || rec->found_rec) {
6105 struct extent_record *tmp;
6108 if (list_empty(&rec->list))
6109 list_add_tail(&rec->list,
6110 &duplicate_extents);
6113 * We have to do this song and dance in case we
6114 * find an extent record that falls inside of
6115 * our current extent record but does not have
6116 * the same objectid.
6118 tmp = malloc(sizeof(*tmp));
6121 tmp->start = tmpl->start;
6122 tmp->max_size = tmpl->max_size;
6125 tmp->metadata = tmpl->metadata;
6126 tmp->extent_item_refs = tmpl->extent_item_refs;
6127 INIT_LIST_HEAD(&tmp->list);
6128 list_add_tail(&tmp->list, &rec->dups);
6129 rec->num_duplicates++;
6136 if (tmpl->extent_item_refs && !dup) {
6137 if (rec->extent_item_refs) {
6138 fprintf(stderr, "block %llu rec "
6139 "extent_item_refs %llu, passed %llu\n",
6140 (unsigned long long)tmpl->start,
6141 (unsigned long long)
6142 rec->extent_item_refs,
6143 (unsigned long long)tmpl->extent_item_refs);
6145 rec->extent_item_refs = tmpl->extent_item_refs;
6149 if (tmpl->content_checked)
6150 rec->content_checked = 1;
6151 if (tmpl->owner_ref_checked)
6152 rec->owner_ref_checked = 1;
6153 memcpy(&rec->parent_key, &tmpl->parent_key,
6154 sizeof(tmpl->parent_key));
6155 if (tmpl->parent_generation)
6156 rec->parent_generation = tmpl->parent_generation;
6157 if (rec->max_size < tmpl->max_size)
6158 rec->max_size = tmpl->max_size;
6161 * A metadata extent can't cross stripe_len boundary, otherwise
6162 * kernel scrub won't be able to handle it.
6163 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6167 rec->crossing_stripes = check_crossing_stripes(
6168 global_info, rec->start,
6169 global_info->tree_root->nodesize);
6170 check_extent_type(rec);
6171 maybe_free_extent_rec(extent_cache, rec);
6175 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6180 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6181 u64 parent, u64 root, int found_ref)
6183 struct extent_record *rec;
6184 struct tree_backref *back;
6185 struct cache_extent *cache;
6188 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6190 struct extent_record tmpl;
6192 memset(&tmpl, 0, sizeof(tmpl));
6193 tmpl.start = bytenr;
6197 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6201 /* really a bug in cache_extent implement now */
6202 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6207 rec = container_of(cache, struct extent_record, cache);
6208 if (rec->start != bytenr) {
6210 * Several cause, from unaligned bytenr to over lapping extents
6215 back = find_tree_backref(rec, parent, root);
6217 back = alloc_tree_backref(rec, parent, root);
6223 if (back->node.found_ref) {
6224 fprintf(stderr, "Extent back ref already exists "
6225 "for %llu parent %llu root %llu \n",
6226 (unsigned long long)bytenr,
6227 (unsigned long long)parent,
6228 (unsigned long long)root);
6230 back->node.found_ref = 1;
6232 if (back->node.found_extent_tree) {
6233 fprintf(stderr, "Extent back ref already exists "
6234 "for %llu parent %llu root %llu \n",
6235 (unsigned long long)bytenr,
6236 (unsigned long long)parent,
6237 (unsigned long long)root);
6239 back->node.found_extent_tree = 1;
6241 check_extent_type(rec);
6242 maybe_free_extent_rec(extent_cache, rec);
6246 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6247 u64 parent, u64 root, u64 owner, u64 offset,
6248 u32 num_refs, int found_ref, u64 max_size)
6250 struct extent_record *rec;
6251 struct data_backref *back;
6252 struct cache_extent *cache;
6255 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6257 struct extent_record tmpl;
6259 memset(&tmpl, 0, sizeof(tmpl));
6260 tmpl.start = bytenr;
6262 tmpl.max_size = max_size;
6264 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6268 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6273 rec = container_of(cache, struct extent_record, cache);
6274 if (rec->max_size < max_size)
6275 rec->max_size = max_size;
6278 * If found_ref is set then max_size is the real size and must match the
6279 * existing refs. So if we have already found a ref then we need to
6280 * make sure that this ref matches the existing one, otherwise we need
6281 * to add a new backref so we can notice that the backrefs don't match
6282 * and we need to figure out who is telling the truth. This is to
6283 * account for that awful fsync bug I introduced where we'd end up with
6284 * a btrfs_file_extent_item that would have its length include multiple
6285 * prealloc extents or point inside of a prealloc extent.
6287 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6290 back = alloc_data_backref(rec, parent, root, owner, offset,
6296 BUG_ON(num_refs != 1);
6297 if (back->node.found_ref)
6298 BUG_ON(back->bytes != max_size);
6299 back->node.found_ref = 1;
6300 back->found_ref += 1;
6301 back->bytes = max_size;
6302 back->disk_bytenr = bytenr;
6304 rec->content_checked = 1;
6305 rec->owner_ref_checked = 1;
6307 if (back->node.found_extent_tree) {
6308 fprintf(stderr, "Extent back ref already exists "
6309 "for %llu parent %llu root %llu "
6310 "owner %llu offset %llu num_refs %lu\n",
6311 (unsigned long long)bytenr,
6312 (unsigned long long)parent,
6313 (unsigned long long)root,
6314 (unsigned long long)owner,
6315 (unsigned long long)offset,
6316 (unsigned long)num_refs);
6318 back->num_refs = num_refs;
6319 back->node.found_extent_tree = 1;
6321 maybe_free_extent_rec(extent_cache, rec);
6325 static int add_pending(struct cache_tree *pending,
6326 struct cache_tree *seen, u64 bytenr, u32 size)
6329 ret = add_cache_extent(seen, bytenr, size);
6332 add_cache_extent(pending, bytenr, size);
6336 static int pick_next_pending(struct cache_tree *pending,
6337 struct cache_tree *reada,
6338 struct cache_tree *nodes,
6339 u64 last, struct block_info *bits, int bits_nr,
6342 unsigned long node_start = last;
6343 struct cache_extent *cache;
6346 cache = search_cache_extent(reada, 0);
6348 bits[0].start = cache->start;
6349 bits[0].size = cache->size;
6354 if (node_start > 32768)
6355 node_start -= 32768;
6357 cache = search_cache_extent(nodes, node_start);
6359 cache = search_cache_extent(nodes, 0);
6362 cache = search_cache_extent(pending, 0);
6367 bits[ret].start = cache->start;
6368 bits[ret].size = cache->size;
6369 cache = next_cache_extent(cache);
6371 } while (cache && ret < bits_nr);
6377 bits[ret].start = cache->start;
6378 bits[ret].size = cache->size;
6379 cache = next_cache_extent(cache);
6381 } while (cache && ret < bits_nr);
6383 if (bits_nr - ret > 8) {
6384 u64 lookup = bits[0].start + bits[0].size;
6385 struct cache_extent *next;
6386 next = search_cache_extent(pending, lookup);
6388 if (next->start - lookup > 32768)
6390 bits[ret].start = next->start;
6391 bits[ret].size = next->size;
6392 lookup = next->start + next->size;
6396 next = next_cache_extent(next);
6404 static void free_chunk_record(struct cache_extent *cache)
6406 struct chunk_record *rec;
6408 rec = container_of(cache, struct chunk_record, cache);
6409 list_del_init(&rec->list);
6410 list_del_init(&rec->dextents);
6414 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6416 cache_tree_free_extents(chunk_cache, free_chunk_record);
6419 static void free_device_record(struct rb_node *node)
6421 struct device_record *rec;
6423 rec = container_of(node, struct device_record, node);
6427 FREE_RB_BASED_TREE(device_cache, free_device_record);
6429 int insert_block_group_record(struct block_group_tree *tree,
6430 struct block_group_record *bg_rec)
6434 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6438 list_add_tail(&bg_rec->list, &tree->block_groups);
6442 static void free_block_group_record(struct cache_extent *cache)
6444 struct block_group_record *rec;
6446 rec = container_of(cache, struct block_group_record, cache);
6447 list_del_init(&rec->list);
6451 void free_block_group_tree(struct block_group_tree *tree)
6453 cache_tree_free_extents(&tree->tree, free_block_group_record);
6456 int insert_device_extent_record(struct device_extent_tree *tree,
6457 struct device_extent_record *de_rec)
6462 * Device extent is a bit different from the other extents, because
6463 * the extents which belong to the different devices may have the
6464 * same start and size, so we need use the special extent cache
6465 * search/insert functions.
6467 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6471 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6472 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6476 static void free_device_extent_record(struct cache_extent *cache)
6478 struct device_extent_record *rec;
6480 rec = container_of(cache, struct device_extent_record, cache);
6481 if (!list_empty(&rec->chunk_list))
6482 list_del_init(&rec->chunk_list);
6483 if (!list_empty(&rec->device_list))
6484 list_del_init(&rec->device_list);
6488 void free_device_extent_tree(struct device_extent_tree *tree)
6490 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6493 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6494 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6495 struct extent_buffer *leaf, int slot)
6497 struct btrfs_extent_ref_v0 *ref0;
6498 struct btrfs_key key;
6501 btrfs_item_key_to_cpu(leaf, &key, slot);
6502 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6503 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6504 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6507 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6508 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6514 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6515 struct btrfs_key *key,
6518 struct btrfs_chunk *ptr;
6519 struct chunk_record *rec;
6522 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6523 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6525 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6527 fprintf(stderr, "memory allocation failed\n");
6531 INIT_LIST_HEAD(&rec->list);
6532 INIT_LIST_HEAD(&rec->dextents);
6535 rec->cache.start = key->offset;
6536 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6538 rec->generation = btrfs_header_generation(leaf);
6540 rec->objectid = key->objectid;
6541 rec->type = key->type;
6542 rec->offset = key->offset;
6544 rec->length = rec->cache.size;
6545 rec->owner = btrfs_chunk_owner(leaf, ptr);
6546 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6547 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6548 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6549 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6550 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6551 rec->num_stripes = num_stripes;
6552 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6554 for (i = 0; i < rec->num_stripes; ++i) {
6555 rec->stripes[i].devid =
6556 btrfs_stripe_devid_nr(leaf, ptr, i);
6557 rec->stripes[i].offset =
6558 btrfs_stripe_offset_nr(leaf, ptr, i);
6559 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6560 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6567 static int process_chunk_item(struct cache_tree *chunk_cache,
6568 struct btrfs_key *key, struct extent_buffer *eb,
6571 struct chunk_record *rec;
6572 struct btrfs_chunk *chunk;
6575 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6577 * Do extra check for this chunk item,
6579 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6580 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6581 * and owner<->key_type check.
6583 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6586 error("chunk(%llu, %llu) is not valid, ignore it",
6587 key->offset, btrfs_chunk_length(eb, chunk));
6590 rec = btrfs_new_chunk_record(eb, key, slot);
6591 ret = insert_cache_extent(chunk_cache, &rec->cache);
6593 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6594 rec->offset, rec->length);
6601 static int process_device_item(struct rb_root *dev_cache,
6602 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6604 struct btrfs_dev_item *ptr;
6605 struct device_record *rec;
6608 ptr = btrfs_item_ptr(eb,
6609 slot, struct btrfs_dev_item);
6611 rec = malloc(sizeof(*rec));
6613 fprintf(stderr, "memory allocation failed\n");
6617 rec->devid = key->offset;
6618 rec->generation = btrfs_header_generation(eb);
6620 rec->objectid = key->objectid;
6621 rec->type = key->type;
6622 rec->offset = key->offset;
6624 rec->devid = btrfs_device_id(eb, ptr);
6625 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6626 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6628 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6630 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6637 struct block_group_record *
6638 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6641 struct btrfs_block_group_item *ptr;
6642 struct block_group_record *rec;
6644 rec = calloc(1, sizeof(*rec));
6646 fprintf(stderr, "memory allocation failed\n");
6650 rec->cache.start = key->objectid;
6651 rec->cache.size = key->offset;
6653 rec->generation = btrfs_header_generation(leaf);
6655 rec->objectid = key->objectid;
6656 rec->type = key->type;
6657 rec->offset = key->offset;
6659 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6660 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6662 INIT_LIST_HEAD(&rec->list);
6667 static int process_block_group_item(struct block_group_tree *block_group_cache,
6668 struct btrfs_key *key,
6669 struct extent_buffer *eb, int slot)
6671 struct block_group_record *rec;
6674 rec = btrfs_new_block_group_record(eb, key, slot);
6675 ret = insert_block_group_record(block_group_cache, rec);
6677 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6678 rec->objectid, rec->offset);
6685 struct device_extent_record *
6686 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6687 struct btrfs_key *key, int slot)
6689 struct device_extent_record *rec;
6690 struct btrfs_dev_extent *ptr;
6692 rec = calloc(1, sizeof(*rec));
6694 fprintf(stderr, "memory allocation failed\n");
6698 rec->cache.objectid = key->objectid;
6699 rec->cache.start = key->offset;
6701 rec->generation = btrfs_header_generation(leaf);
6703 rec->objectid = key->objectid;
6704 rec->type = key->type;
6705 rec->offset = key->offset;
6707 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6708 rec->chunk_objecteid =
6709 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6711 btrfs_dev_extent_chunk_offset(leaf, ptr);
6712 rec->length = btrfs_dev_extent_length(leaf, ptr);
6713 rec->cache.size = rec->length;
6715 INIT_LIST_HEAD(&rec->chunk_list);
6716 INIT_LIST_HEAD(&rec->device_list);
6722 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6723 struct btrfs_key *key, struct extent_buffer *eb,
6726 struct device_extent_record *rec;
6729 rec = btrfs_new_device_extent_record(eb, key, slot);
6730 ret = insert_device_extent_record(dev_extent_cache, rec);
6733 "Device extent[%llu, %llu, %llu] existed.\n",
6734 rec->objectid, rec->offset, rec->length);
6741 static int process_extent_item(struct btrfs_root *root,
6742 struct cache_tree *extent_cache,
6743 struct extent_buffer *eb, int slot)
6745 struct btrfs_extent_item *ei;
6746 struct btrfs_extent_inline_ref *iref;
6747 struct btrfs_extent_data_ref *dref;
6748 struct btrfs_shared_data_ref *sref;
6749 struct btrfs_key key;
6750 struct extent_record tmpl;
6755 u32 item_size = btrfs_item_size_nr(eb, slot);
6761 btrfs_item_key_to_cpu(eb, &key, slot);
6763 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6765 num_bytes = root->nodesize;
6767 num_bytes = key.offset;
6770 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6771 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6772 key.objectid, root->sectorsize);
6775 if (item_size < sizeof(*ei)) {
6776 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6777 struct btrfs_extent_item_v0 *ei0;
6778 BUG_ON(item_size != sizeof(*ei0));
6779 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6780 refs = btrfs_extent_refs_v0(eb, ei0);
6784 memset(&tmpl, 0, sizeof(tmpl));
6785 tmpl.start = key.objectid;
6786 tmpl.nr = num_bytes;
6787 tmpl.extent_item_refs = refs;
6788 tmpl.metadata = metadata;
6790 tmpl.max_size = num_bytes;
6792 return add_extent_rec(extent_cache, &tmpl);
6795 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6796 refs = btrfs_extent_refs(eb, ei);
6797 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6801 if (metadata && num_bytes != root->nodesize) {
6802 error("ignore invalid metadata extent, length %llu does not equal to %u",
6803 num_bytes, root->nodesize);
6806 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6807 error("ignore invalid data extent, length %llu is not aligned to %u",
6808 num_bytes, root->sectorsize);
6812 memset(&tmpl, 0, sizeof(tmpl));
6813 tmpl.start = key.objectid;
6814 tmpl.nr = num_bytes;
6815 tmpl.extent_item_refs = refs;
6816 tmpl.metadata = metadata;
6818 tmpl.max_size = num_bytes;
6819 add_extent_rec(extent_cache, &tmpl);
6821 ptr = (unsigned long)(ei + 1);
6822 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6823 key.type == BTRFS_EXTENT_ITEM_KEY)
6824 ptr += sizeof(struct btrfs_tree_block_info);
6826 end = (unsigned long)ei + item_size;
6828 iref = (struct btrfs_extent_inline_ref *)ptr;
6829 type = btrfs_extent_inline_ref_type(eb, iref);
6830 offset = btrfs_extent_inline_ref_offset(eb, iref);
6832 case BTRFS_TREE_BLOCK_REF_KEY:
6833 ret = add_tree_backref(extent_cache, key.objectid,
6836 error("add_tree_backref failed: %s",
6839 case BTRFS_SHARED_BLOCK_REF_KEY:
6840 ret = add_tree_backref(extent_cache, key.objectid,
6843 error("add_tree_backref failed: %s",
6846 case BTRFS_EXTENT_DATA_REF_KEY:
6847 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6848 add_data_backref(extent_cache, key.objectid, 0,
6849 btrfs_extent_data_ref_root(eb, dref),
6850 btrfs_extent_data_ref_objectid(eb,
6852 btrfs_extent_data_ref_offset(eb, dref),
6853 btrfs_extent_data_ref_count(eb, dref),
6856 case BTRFS_SHARED_DATA_REF_KEY:
6857 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6858 add_data_backref(extent_cache, key.objectid, offset,
6860 btrfs_shared_data_ref_count(eb, sref),
6864 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6865 key.objectid, key.type, num_bytes);
6868 ptr += btrfs_extent_inline_ref_size(type);
6875 static int check_cache_range(struct btrfs_root *root,
6876 struct btrfs_block_group_cache *cache,
6877 u64 offset, u64 bytes)
6879 struct btrfs_free_space *entry;
6885 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6886 bytenr = btrfs_sb_offset(i);
6887 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6888 cache->key.objectid, bytenr, 0,
6889 &logical, &nr, &stripe_len);
6894 if (logical[nr] + stripe_len <= offset)
6896 if (offset + bytes <= logical[nr])
6898 if (logical[nr] == offset) {
6899 if (stripe_len >= bytes) {
6903 bytes -= stripe_len;
6904 offset += stripe_len;
6905 } else if (logical[nr] < offset) {
6906 if (logical[nr] + stripe_len >=
6911 bytes = (offset + bytes) -
6912 (logical[nr] + stripe_len);
6913 offset = logical[nr] + stripe_len;
6916 * Could be tricky, the super may land in the
6917 * middle of the area we're checking. First
6918 * check the easiest case, it's at the end.
6920 if (logical[nr] + stripe_len >=
6922 bytes = logical[nr] - offset;
6926 /* Check the left side */
6927 ret = check_cache_range(root, cache,
6929 logical[nr] - offset);
6935 /* Now we continue with the right side */
6936 bytes = (offset + bytes) -
6937 (logical[nr] + stripe_len);
6938 offset = logical[nr] + stripe_len;
6945 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6947 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6948 offset, offset+bytes);
6952 if (entry->offset != offset) {
6953 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6958 if (entry->bytes != bytes) {
6959 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6960 bytes, entry->bytes, offset);
6964 unlink_free_space(cache->free_space_ctl, entry);
6969 static int verify_space_cache(struct btrfs_root *root,
6970 struct btrfs_block_group_cache *cache)
6972 struct btrfs_path path;
6973 struct extent_buffer *leaf;
6974 struct btrfs_key key;
6978 root = root->fs_info->extent_root;
6980 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6982 btrfs_init_path(&path);
6983 key.objectid = last;
6985 key.type = BTRFS_EXTENT_ITEM_KEY;
6986 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6991 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6992 ret = btrfs_next_leaf(root, &path);
7000 leaf = path.nodes[0];
7001 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7002 if (key.objectid >= cache->key.offset + cache->key.objectid)
7004 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7005 key.type != BTRFS_METADATA_ITEM_KEY) {
7010 if (last == key.objectid) {
7011 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7012 last = key.objectid + key.offset;
7014 last = key.objectid + root->nodesize;
7019 ret = check_cache_range(root, cache, last,
7020 key.objectid - last);
7023 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7024 last = key.objectid + key.offset;
7026 last = key.objectid + root->nodesize;
7030 if (last < cache->key.objectid + cache->key.offset)
7031 ret = check_cache_range(root, cache, last,
7032 cache->key.objectid +
7033 cache->key.offset - last);
7036 btrfs_release_path(&path);
7039 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7040 fprintf(stderr, "There are still entries left in the space "
7048 static int check_space_cache(struct btrfs_root *root)
7050 struct btrfs_block_group_cache *cache;
7051 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7055 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7056 btrfs_super_generation(root->fs_info->super_copy) !=
7057 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7058 printf("cache and super generation don't match, space cache "
7059 "will be invalidated\n");
7063 if (ctx.progress_enabled) {
7064 ctx.tp = TASK_FREE_SPACE;
7065 task_start(ctx.info);
7069 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7073 start = cache->key.objectid + cache->key.offset;
7074 if (!cache->free_space_ctl) {
7075 if (btrfs_init_free_space_ctl(cache,
7076 root->sectorsize)) {
7081 btrfs_remove_free_space_cache(cache);
7084 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7085 ret = exclude_super_stripes(root, cache);
7087 fprintf(stderr, "could not exclude super stripes: %s\n",
7092 ret = load_free_space_tree(root->fs_info, cache);
7093 free_excluded_extents(root, cache);
7095 fprintf(stderr, "could not load free space tree: %s\n",
7102 ret = load_free_space_cache(root->fs_info, cache);
7107 ret = verify_space_cache(root, cache);
7109 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7110 cache->key.objectid);
7115 task_stop(ctx.info);
7117 return error ? -EINVAL : 0;
7120 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7121 u64 num_bytes, unsigned long leaf_offset,
7122 struct extent_buffer *eb) {
7125 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7127 unsigned long csum_offset;
7131 u64 data_checked = 0;
7137 if (num_bytes % root->sectorsize)
7140 data = malloc(num_bytes);
7144 while (offset < num_bytes) {
7147 read_len = num_bytes - offset;
7148 /* read as much space once a time */
7149 ret = read_extent_data(root, data + offset,
7150 bytenr + offset, &read_len, mirror);
7154 /* verify every 4k data's checksum */
7155 while (data_checked < read_len) {
7157 tmp = offset + data_checked;
7159 csum = btrfs_csum_data((char *)data + tmp,
7160 csum, root->sectorsize);
7161 btrfs_csum_final(csum, (u8 *)&csum);
7163 csum_offset = leaf_offset +
7164 tmp / root->sectorsize * csum_size;
7165 read_extent_buffer(eb, (char *)&csum_expected,
7166 csum_offset, csum_size);
7167 /* try another mirror */
7168 if (csum != csum_expected) {
7169 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7170 mirror, bytenr + tmp,
7171 csum, csum_expected);
7172 num_copies = btrfs_num_copies(
7173 &root->fs_info->mapping_tree,
7175 if (mirror < num_copies - 1) {
7180 data_checked += root->sectorsize;
7189 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7192 struct btrfs_path path;
7193 struct extent_buffer *leaf;
7194 struct btrfs_key key;
7197 btrfs_init_path(&path);
7198 key.objectid = bytenr;
7199 key.type = BTRFS_EXTENT_ITEM_KEY;
7200 key.offset = (u64)-1;
7203 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7206 fprintf(stderr, "Error looking up extent record %d\n", ret);
7207 btrfs_release_path(&path);
7210 if (path.slots[0] > 0) {
7213 ret = btrfs_prev_leaf(root, &path);
7216 } else if (ret > 0) {
7223 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7226 * Block group items come before extent items if they have the same
7227 * bytenr, so walk back one more just in case. Dear future traveller,
7228 * first congrats on mastering time travel. Now if it's not too much
7229 * trouble could you go back to 2006 and tell Chris to make the
7230 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7231 * EXTENT_ITEM_KEY please?
7233 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7234 if (path.slots[0] > 0) {
7237 ret = btrfs_prev_leaf(root, &path);
7240 } else if (ret > 0) {
7245 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7249 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7250 ret = btrfs_next_leaf(root, &path);
7252 fprintf(stderr, "Error going to next leaf "
7254 btrfs_release_path(&path);
7260 leaf = path.nodes[0];
7261 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7262 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7266 if (key.objectid + key.offset < bytenr) {
7270 if (key.objectid > bytenr + num_bytes)
7273 if (key.objectid == bytenr) {
7274 if (key.offset >= num_bytes) {
7278 num_bytes -= key.offset;
7279 bytenr += key.offset;
7280 } else if (key.objectid < bytenr) {
7281 if (key.objectid + key.offset >= bytenr + num_bytes) {
7285 num_bytes = (bytenr + num_bytes) -
7286 (key.objectid + key.offset);
7287 bytenr = key.objectid + key.offset;
7289 if (key.objectid + key.offset < bytenr + num_bytes) {
7290 u64 new_start = key.objectid + key.offset;
7291 u64 new_bytes = bytenr + num_bytes - new_start;
7294 * Weird case, the extent is in the middle of
7295 * our range, we'll have to search one side
7296 * and then the other. Not sure if this happens
7297 * in real life, but no harm in coding it up
7298 * anyway just in case.
7300 btrfs_release_path(&path);
7301 ret = check_extent_exists(root, new_start,
7304 fprintf(stderr, "Right section didn't "
7308 num_bytes = key.objectid - bytenr;
7311 num_bytes = key.objectid - bytenr;
7318 if (num_bytes && !ret) {
7319 fprintf(stderr, "There are no extents for csum range "
7320 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7324 btrfs_release_path(&path);
7328 static int check_csums(struct btrfs_root *root)
7330 struct btrfs_path path;
7331 struct extent_buffer *leaf;
7332 struct btrfs_key key;
7333 u64 offset = 0, num_bytes = 0;
7334 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7338 unsigned long leaf_offset;
7340 root = root->fs_info->csum_root;
7341 if (!extent_buffer_uptodate(root->node)) {
7342 fprintf(stderr, "No valid csum tree found\n");
7346 btrfs_init_path(&path);
7347 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7348 key.type = BTRFS_EXTENT_CSUM_KEY;
7350 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7352 fprintf(stderr, "Error searching csum tree %d\n", ret);
7353 btrfs_release_path(&path);
7357 if (ret > 0 && path.slots[0])
7362 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7363 ret = btrfs_next_leaf(root, &path);
7365 fprintf(stderr, "Error going to next leaf "
7372 leaf = path.nodes[0];
7374 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7375 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7380 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7381 csum_size) * root->sectorsize;
7382 if (!check_data_csum)
7383 goto skip_csum_check;
7384 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7385 ret = check_extent_csums(root, key.offset, data_len,
7391 offset = key.offset;
7392 } else if (key.offset != offset + num_bytes) {
7393 ret = check_extent_exists(root, offset, num_bytes);
7395 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7396 "there is no extent record\n",
7397 offset, offset+num_bytes);
7400 offset = key.offset;
7403 num_bytes += data_len;
7407 btrfs_release_path(&path);
7411 static int is_dropped_key(struct btrfs_key *key,
7412 struct btrfs_key *drop_key) {
7413 if (key->objectid < drop_key->objectid)
7415 else if (key->objectid == drop_key->objectid) {
7416 if (key->type < drop_key->type)
7418 else if (key->type == drop_key->type) {
7419 if (key->offset < drop_key->offset)
7427 * Here are the rules for FULL_BACKREF.
7429 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7430 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7432 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7433 * if it happened after the relocation occurred since we'll have dropped the
7434 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7435 * have no real way to know for sure.
7437 * We process the blocks one root at a time, and we start from the lowest root
7438 * objectid and go to the highest. So we can just lookup the owner backref for
7439 * the record and if we don't find it then we know it doesn't exist and we have
7442 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7443 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7444 * be set or not and then we can check later once we've gathered all the refs.
7446 static int calc_extent_flag(struct cache_tree *extent_cache,
7447 struct extent_buffer *buf,
7448 struct root_item_record *ri,
7451 struct extent_record *rec;
7452 struct cache_extent *cache;
7453 struct tree_backref *tback;
7456 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7457 /* we have added this extent before */
7461 rec = container_of(cache, struct extent_record, cache);
7464 * Except file/reloc tree, we can not have
7467 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7472 if (buf->start == ri->bytenr)
7475 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7478 owner = btrfs_header_owner(buf);
7479 if (owner == ri->objectid)
7482 tback = find_tree_backref(rec, 0, owner);
7487 if (rec->flag_block_full_backref != FLAG_UNSET &&
7488 rec->flag_block_full_backref != 0)
7489 rec->bad_full_backref = 1;
7492 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7493 if (rec->flag_block_full_backref != FLAG_UNSET &&
7494 rec->flag_block_full_backref != 1)
7495 rec->bad_full_backref = 1;
7499 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7501 fprintf(stderr, "Invalid key type(");
7502 print_key_type(stderr, 0, key_type);
7503 fprintf(stderr, ") found in root(");
7504 print_objectid(stderr, rootid, 0);
7505 fprintf(stderr, ")\n");
7509 * Check if the key is valid with its extent buffer.
7511 * This is a early check in case invalid key exists in a extent buffer
7512 * This is not comprehensive yet, but should prevent wrong key/item passed
7515 static int check_type_with_root(u64 rootid, u8 key_type)
7518 /* Only valid in chunk tree */
7519 case BTRFS_DEV_ITEM_KEY:
7520 case BTRFS_CHUNK_ITEM_KEY:
7521 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7524 /* valid in csum and log tree */
7525 case BTRFS_CSUM_TREE_OBJECTID:
7526 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7530 case BTRFS_EXTENT_ITEM_KEY:
7531 case BTRFS_METADATA_ITEM_KEY:
7532 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7533 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7536 case BTRFS_ROOT_ITEM_KEY:
7537 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7540 case BTRFS_DEV_EXTENT_KEY:
7541 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7547 report_mismatch_key_root(key_type, rootid);
7551 static int run_next_block(struct btrfs_root *root,
7552 struct block_info *bits,
7555 struct cache_tree *pending,
7556 struct cache_tree *seen,
7557 struct cache_tree *reada,
7558 struct cache_tree *nodes,
7559 struct cache_tree *extent_cache,
7560 struct cache_tree *chunk_cache,
7561 struct rb_root *dev_cache,
7562 struct block_group_tree *block_group_cache,
7563 struct device_extent_tree *dev_extent_cache,
7564 struct root_item_record *ri)
7566 struct extent_buffer *buf;
7567 struct extent_record *rec = NULL;
7578 struct btrfs_key key;
7579 struct cache_extent *cache;
7582 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7583 bits_nr, &reada_bits);
7588 for(i = 0; i < nritems; i++) {
7589 ret = add_cache_extent(reada, bits[i].start,
7594 /* fixme, get the parent transid */
7595 readahead_tree_block(root, bits[i].start,
7599 *last = bits[0].start;
7600 bytenr = bits[0].start;
7601 size = bits[0].size;
7603 cache = lookup_cache_extent(pending, bytenr, size);
7605 remove_cache_extent(pending, cache);
7608 cache = lookup_cache_extent(reada, bytenr, size);
7610 remove_cache_extent(reada, cache);
7613 cache = lookup_cache_extent(nodes, bytenr, size);
7615 remove_cache_extent(nodes, cache);
7618 cache = lookup_cache_extent(extent_cache, bytenr, size);
7620 rec = container_of(cache, struct extent_record, cache);
7621 gen = rec->parent_generation;
7624 /* fixme, get the real parent transid */
7625 buf = read_tree_block(root, bytenr, size, gen);
7626 if (!extent_buffer_uptodate(buf)) {
7627 record_bad_block_io(root->fs_info,
7628 extent_cache, bytenr, size);
7632 nritems = btrfs_header_nritems(buf);
7635 if (!init_extent_tree) {
7636 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7637 btrfs_header_level(buf), 1, NULL,
7640 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7642 fprintf(stderr, "Couldn't calc extent flags\n");
7643 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7648 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7650 fprintf(stderr, "Couldn't calc extent flags\n");
7651 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7655 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7657 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7658 ri->objectid == btrfs_header_owner(buf)) {
7660 * Ok we got to this block from it's original owner and
7661 * we have FULL_BACKREF set. Relocation can leave
7662 * converted blocks over so this is altogether possible,
7663 * however it's not possible if the generation > the
7664 * last snapshot, so check for this case.
7666 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7667 btrfs_header_generation(buf) > ri->last_snapshot) {
7668 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7669 rec->bad_full_backref = 1;
7674 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7675 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7676 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7677 rec->bad_full_backref = 1;
7681 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7682 rec->flag_block_full_backref = 1;
7686 rec->flag_block_full_backref = 0;
7688 owner = btrfs_header_owner(buf);
7691 ret = check_block(root, extent_cache, buf, flags);
7695 if (btrfs_is_leaf(buf)) {
7696 btree_space_waste += btrfs_leaf_free_space(root, buf);
7697 for (i = 0; i < nritems; i++) {
7698 struct btrfs_file_extent_item *fi;
7699 btrfs_item_key_to_cpu(buf, &key, i);
7701 * Check key type against the leaf owner.
7702 * Could filter quite a lot of early error if
7705 if (check_type_with_root(btrfs_header_owner(buf),
7707 fprintf(stderr, "ignoring invalid key\n");
7710 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7711 process_extent_item(root, extent_cache, buf,
7715 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7716 process_extent_item(root, extent_cache, buf,
7720 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7722 btrfs_item_size_nr(buf, i);
7725 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7726 process_chunk_item(chunk_cache, &key, buf, i);
7729 if (key.type == BTRFS_DEV_ITEM_KEY) {
7730 process_device_item(dev_cache, &key, buf, i);
7733 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7734 process_block_group_item(block_group_cache,
7738 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7739 process_device_extent_item(dev_extent_cache,
7744 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7745 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7746 process_extent_ref_v0(extent_cache, buf, i);
7753 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7754 ret = add_tree_backref(extent_cache,
7755 key.objectid, 0, key.offset, 0);
7757 error("add_tree_backref failed: %s",
7761 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7762 ret = add_tree_backref(extent_cache,
7763 key.objectid, key.offset, 0, 0);
7765 error("add_tree_backref failed: %s",
7769 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7770 struct btrfs_extent_data_ref *ref;
7771 ref = btrfs_item_ptr(buf, i,
7772 struct btrfs_extent_data_ref);
7773 add_data_backref(extent_cache,
7775 btrfs_extent_data_ref_root(buf, ref),
7776 btrfs_extent_data_ref_objectid(buf,
7778 btrfs_extent_data_ref_offset(buf, ref),
7779 btrfs_extent_data_ref_count(buf, ref),
7780 0, root->sectorsize);
7783 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7784 struct btrfs_shared_data_ref *ref;
7785 ref = btrfs_item_ptr(buf, i,
7786 struct btrfs_shared_data_ref);
7787 add_data_backref(extent_cache,
7788 key.objectid, key.offset, 0, 0, 0,
7789 btrfs_shared_data_ref_count(buf, ref),
7790 0, root->sectorsize);
7793 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7794 struct bad_item *bad;
7796 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7800 bad = malloc(sizeof(struct bad_item));
7803 INIT_LIST_HEAD(&bad->list);
7804 memcpy(&bad->key, &key,
7805 sizeof(struct btrfs_key));
7806 bad->root_id = owner;
7807 list_add_tail(&bad->list, &delete_items);
7810 if (key.type != BTRFS_EXTENT_DATA_KEY)
7812 fi = btrfs_item_ptr(buf, i,
7813 struct btrfs_file_extent_item);
7814 if (btrfs_file_extent_type(buf, fi) ==
7815 BTRFS_FILE_EXTENT_INLINE)
7817 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7820 data_bytes_allocated +=
7821 btrfs_file_extent_disk_num_bytes(buf, fi);
7822 if (data_bytes_allocated < root->sectorsize) {
7825 data_bytes_referenced +=
7826 btrfs_file_extent_num_bytes(buf, fi);
7827 add_data_backref(extent_cache,
7828 btrfs_file_extent_disk_bytenr(buf, fi),
7829 parent, owner, key.objectid, key.offset -
7830 btrfs_file_extent_offset(buf, fi), 1, 1,
7831 btrfs_file_extent_disk_num_bytes(buf, fi));
7835 struct btrfs_key first_key;
7837 first_key.objectid = 0;
7840 btrfs_item_key_to_cpu(buf, &first_key, 0);
7841 level = btrfs_header_level(buf);
7842 for (i = 0; i < nritems; i++) {
7843 struct extent_record tmpl;
7845 ptr = btrfs_node_blockptr(buf, i);
7846 size = root->nodesize;
7847 btrfs_node_key_to_cpu(buf, &key, i);
7849 if ((level == ri->drop_level)
7850 && is_dropped_key(&key, &ri->drop_key)) {
7855 memset(&tmpl, 0, sizeof(tmpl));
7856 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7857 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7862 tmpl.max_size = size;
7863 ret = add_extent_rec(extent_cache, &tmpl);
7867 ret = add_tree_backref(extent_cache, ptr, parent,
7870 error("add_tree_backref failed: %s",
7876 add_pending(nodes, seen, ptr, size);
7878 add_pending(pending, seen, ptr, size);
7881 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7882 nritems) * sizeof(struct btrfs_key_ptr);
7884 total_btree_bytes += buf->len;
7885 if (fs_root_objectid(btrfs_header_owner(buf)))
7886 total_fs_tree_bytes += buf->len;
7887 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7888 total_extent_tree_bytes += buf->len;
7889 if (!found_old_backref &&
7890 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7891 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7892 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7893 found_old_backref = 1;
7895 free_extent_buffer(buf);
7899 static int add_root_to_pending(struct extent_buffer *buf,
7900 struct cache_tree *extent_cache,
7901 struct cache_tree *pending,
7902 struct cache_tree *seen,
7903 struct cache_tree *nodes,
7906 struct extent_record tmpl;
7909 if (btrfs_header_level(buf) > 0)
7910 add_pending(nodes, seen, buf->start, buf->len);
7912 add_pending(pending, seen, buf->start, buf->len);
7914 memset(&tmpl, 0, sizeof(tmpl));
7915 tmpl.start = buf->start;
7920 tmpl.max_size = buf->len;
7921 add_extent_rec(extent_cache, &tmpl);
7923 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7924 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7925 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7928 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7933 /* as we fix the tree, we might be deleting blocks that
7934 * we're tracking for repair. This hook makes sure we
7935 * remove any backrefs for blocks as we are fixing them.
7937 static int free_extent_hook(struct btrfs_trans_handle *trans,
7938 struct btrfs_root *root,
7939 u64 bytenr, u64 num_bytes, u64 parent,
7940 u64 root_objectid, u64 owner, u64 offset,
7943 struct extent_record *rec;
7944 struct cache_extent *cache;
7946 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7948 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7949 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7953 rec = container_of(cache, struct extent_record, cache);
7955 struct data_backref *back;
7956 back = find_data_backref(rec, parent, root_objectid, owner,
7957 offset, 1, bytenr, num_bytes);
7960 if (back->node.found_ref) {
7961 back->found_ref -= refs_to_drop;
7963 rec->refs -= refs_to_drop;
7965 if (back->node.found_extent_tree) {
7966 back->num_refs -= refs_to_drop;
7967 if (rec->extent_item_refs)
7968 rec->extent_item_refs -= refs_to_drop;
7970 if (back->found_ref == 0)
7971 back->node.found_ref = 0;
7972 if (back->num_refs == 0)
7973 back->node.found_extent_tree = 0;
7975 if (!back->node.found_extent_tree && back->node.found_ref) {
7976 list_del(&back->node.list);
7980 struct tree_backref *back;
7981 back = find_tree_backref(rec, parent, root_objectid);
7984 if (back->node.found_ref) {
7987 back->node.found_ref = 0;
7989 if (back->node.found_extent_tree) {
7990 if (rec->extent_item_refs)
7991 rec->extent_item_refs--;
7992 back->node.found_extent_tree = 0;
7994 if (!back->node.found_extent_tree && back->node.found_ref) {
7995 list_del(&back->node.list);
7999 maybe_free_extent_rec(extent_cache, rec);
8004 static int delete_extent_records(struct btrfs_trans_handle *trans,
8005 struct btrfs_root *root,
8006 struct btrfs_path *path,
8009 struct btrfs_key key;
8010 struct btrfs_key found_key;
8011 struct extent_buffer *leaf;
8016 key.objectid = bytenr;
8018 key.offset = (u64)-1;
8021 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8028 if (path->slots[0] == 0)
8034 leaf = path->nodes[0];
8035 slot = path->slots[0];
8037 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8038 if (found_key.objectid != bytenr)
8041 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8042 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8043 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8044 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8045 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8046 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8047 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8048 btrfs_release_path(path);
8049 if (found_key.type == 0) {
8050 if (found_key.offset == 0)
8052 key.offset = found_key.offset - 1;
8053 key.type = found_key.type;
8055 key.type = found_key.type - 1;
8056 key.offset = (u64)-1;
8060 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8061 found_key.objectid, found_key.type, found_key.offset);
8063 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8066 btrfs_release_path(path);
8068 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8069 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8070 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8071 found_key.offset : root->nodesize;
8073 ret = btrfs_update_block_group(trans, root, bytenr,
8080 btrfs_release_path(path);
8085 * for a single backref, this will allocate a new extent
8086 * and add the backref to it.
8088 static int record_extent(struct btrfs_trans_handle *trans,
8089 struct btrfs_fs_info *info,
8090 struct btrfs_path *path,
8091 struct extent_record *rec,
8092 struct extent_backref *back,
8093 int allocated, u64 flags)
8096 struct btrfs_root *extent_root = info->extent_root;
8097 struct extent_buffer *leaf;
8098 struct btrfs_key ins_key;
8099 struct btrfs_extent_item *ei;
8100 struct data_backref *dback;
8101 struct btrfs_tree_block_info *bi;
8104 rec->max_size = max_t(u64, rec->max_size,
8105 info->extent_root->nodesize);
8108 u32 item_size = sizeof(*ei);
8111 item_size += sizeof(*bi);
8113 ins_key.objectid = rec->start;
8114 ins_key.offset = rec->max_size;
8115 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8117 ret = btrfs_insert_empty_item(trans, extent_root, path,
8118 &ins_key, item_size);
8122 leaf = path->nodes[0];
8123 ei = btrfs_item_ptr(leaf, path->slots[0],
8124 struct btrfs_extent_item);
8126 btrfs_set_extent_refs(leaf, ei, 0);
8127 btrfs_set_extent_generation(leaf, ei, rec->generation);
8129 if (back->is_data) {
8130 btrfs_set_extent_flags(leaf, ei,
8131 BTRFS_EXTENT_FLAG_DATA);
8133 struct btrfs_disk_key copy_key;;
8135 bi = (struct btrfs_tree_block_info *)(ei + 1);
8136 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8139 btrfs_set_disk_key_objectid(©_key,
8140 rec->info_objectid);
8141 btrfs_set_disk_key_type(©_key, 0);
8142 btrfs_set_disk_key_offset(©_key, 0);
8144 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8145 btrfs_set_tree_block_key(leaf, bi, ©_key);
8147 btrfs_set_extent_flags(leaf, ei,
8148 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8151 btrfs_mark_buffer_dirty(leaf);
8152 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8153 rec->max_size, 1, 0);
8156 btrfs_release_path(path);
8159 if (back->is_data) {
8163 dback = to_data_backref(back);
8164 if (back->full_backref)
8165 parent = dback->parent;
8169 for (i = 0; i < dback->found_ref; i++) {
8170 /* if parent != 0, we're doing a full backref
8171 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8172 * just makes the backref allocator create a data
8175 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8176 rec->start, rec->max_size,
8180 BTRFS_FIRST_FREE_OBJECTID :
8186 fprintf(stderr, "adding new data backref"
8187 " on %llu %s %llu owner %llu"
8188 " offset %llu found %d\n",
8189 (unsigned long long)rec->start,
8190 back->full_backref ?
8192 back->full_backref ?
8193 (unsigned long long)parent :
8194 (unsigned long long)dback->root,
8195 (unsigned long long)dback->owner,
8196 (unsigned long long)dback->offset,
8200 struct tree_backref *tback;
8202 tback = to_tree_backref(back);
8203 if (back->full_backref)
8204 parent = tback->parent;
8208 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8209 rec->start, rec->max_size,
8210 parent, tback->root, 0, 0);
8211 fprintf(stderr, "adding new tree backref on "
8212 "start %llu len %llu parent %llu root %llu\n",
8213 rec->start, rec->max_size, parent, tback->root);
8216 btrfs_release_path(path);
8220 static struct extent_entry *find_entry(struct list_head *entries,
8221 u64 bytenr, u64 bytes)
8223 struct extent_entry *entry = NULL;
8225 list_for_each_entry(entry, entries, list) {
8226 if (entry->bytenr == bytenr && entry->bytes == bytes)
8233 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8235 struct extent_entry *entry, *best = NULL, *prev = NULL;
8237 list_for_each_entry(entry, entries, list) {
8239 * If there are as many broken entries as entries then we know
8240 * not to trust this particular entry.
8242 if (entry->broken == entry->count)
8246 * Special case, when there are only two entries and 'best' is
8256 * If our current entry == best then we can't be sure our best
8257 * is really the best, so we need to keep searching.
8259 if (best && best->count == entry->count) {
8265 /* Prev == entry, not good enough, have to keep searching */
8266 if (!prev->broken && prev->count == entry->count)
8270 best = (prev->count > entry->count) ? prev : entry;
8271 else if (best->count < entry->count)
8279 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8280 struct data_backref *dback, struct extent_entry *entry)
8282 struct btrfs_trans_handle *trans;
8283 struct btrfs_root *root;
8284 struct btrfs_file_extent_item *fi;
8285 struct extent_buffer *leaf;
8286 struct btrfs_key key;
8290 key.objectid = dback->root;
8291 key.type = BTRFS_ROOT_ITEM_KEY;
8292 key.offset = (u64)-1;
8293 root = btrfs_read_fs_root(info, &key);
8295 fprintf(stderr, "Couldn't find root for our ref\n");
8300 * The backref points to the original offset of the extent if it was
8301 * split, so we need to search down to the offset we have and then walk
8302 * forward until we find the backref we're looking for.
8304 key.objectid = dback->owner;
8305 key.type = BTRFS_EXTENT_DATA_KEY;
8306 key.offset = dback->offset;
8307 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8309 fprintf(stderr, "Error looking up ref %d\n", ret);
8314 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8315 ret = btrfs_next_leaf(root, path);
8317 fprintf(stderr, "Couldn't find our ref, next\n");
8321 leaf = path->nodes[0];
8322 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8323 if (key.objectid != dback->owner ||
8324 key.type != BTRFS_EXTENT_DATA_KEY) {
8325 fprintf(stderr, "Couldn't find our ref, search\n");
8328 fi = btrfs_item_ptr(leaf, path->slots[0],
8329 struct btrfs_file_extent_item);
8330 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8331 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8333 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8338 btrfs_release_path(path);
8340 trans = btrfs_start_transaction(root, 1);
8342 return PTR_ERR(trans);
8345 * Ok we have the key of the file extent we want to fix, now we can cow
8346 * down to the thing and fix it.
8348 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8350 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8351 key.objectid, key.type, key.offset, ret);
8355 fprintf(stderr, "Well that's odd, we just found this key "
8356 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8361 leaf = path->nodes[0];
8362 fi = btrfs_item_ptr(leaf, path->slots[0],
8363 struct btrfs_file_extent_item);
8365 if (btrfs_file_extent_compression(leaf, fi) &&
8366 dback->disk_bytenr != entry->bytenr) {
8367 fprintf(stderr, "Ref doesn't match the record start and is "
8368 "compressed, please take a btrfs-image of this file "
8369 "system and send it to a btrfs developer so they can "
8370 "complete this functionality for bytenr %Lu\n",
8371 dback->disk_bytenr);
8376 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8377 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8378 } else if (dback->disk_bytenr > entry->bytenr) {
8379 u64 off_diff, offset;
8381 off_diff = dback->disk_bytenr - entry->bytenr;
8382 offset = btrfs_file_extent_offset(leaf, fi);
8383 if (dback->disk_bytenr + offset +
8384 btrfs_file_extent_num_bytes(leaf, fi) >
8385 entry->bytenr + entry->bytes) {
8386 fprintf(stderr, "Ref is past the entry end, please "
8387 "take a btrfs-image of this file system and "
8388 "send it to a btrfs developer, ref %Lu\n",
8389 dback->disk_bytenr);
8394 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8395 btrfs_set_file_extent_offset(leaf, fi, offset);
8396 } else if (dback->disk_bytenr < entry->bytenr) {
8399 offset = btrfs_file_extent_offset(leaf, fi);
8400 if (dback->disk_bytenr + offset < entry->bytenr) {
8401 fprintf(stderr, "Ref is before the entry start, please"
8402 " take a btrfs-image of this file system and "
8403 "send it to a btrfs developer, ref %Lu\n",
8404 dback->disk_bytenr);
8409 offset += dback->disk_bytenr;
8410 offset -= entry->bytenr;
8411 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8412 btrfs_set_file_extent_offset(leaf, fi, offset);
8415 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8418 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8419 * only do this if we aren't using compression, otherwise it's a
8422 if (!btrfs_file_extent_compression(leaf, fi))
8423 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8425 printf("ram bytes may be wrong?\n");
8426 btrfs_mark_buffer_dirty(leaf);
8428 err = btrfs_commit_transaction(trans, root);
8429 btrfs_release_path(path);
8430 return ret ? ret : err;
8433 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8434 struct extent_record *rec)
8436 struct extent_backref *back;
8437 struct data_backref *dback;
8438 struct extent_entry *entry, *best = NULL;
8441 int broken_entries = 0;
8446 * Metadata is easy and the backrefs should always agree on bytenr and
8447 * size, if not we've got bigger issues.
8452 list_for_each_entry(back, &rec->backrefs, list) {
8453 if (back->full_backref || !back->is_data)
8456 dback = to_data_backref(back);
8459 * We only pay attention to backrefs that we found a real
8462 if (dback->found_ref == 0)
8466 * For now we only catch when the bytes don't match, not the
8467 * bytenr. We can easily do this at the same time, but I want
8468 * to have a fs image to test on before we just add repair
8469 * functionality willy-nilly so we know we won't screw up the
8473 entry = find_entry(&entries, dback->disk_bytenr,
8476 entry = malloc(sizeof(struct extent_entry));
8481 memset(entry, 0, sizeof(*entry));
8482 entry->bytenr = dback->disk_bytenr;
8483 entry->bytes = dback->bytes;
8484 list_add_tail(&entry->list, &entries);
8489 * If we only have on entry we may think the entries agree when
8490 * in reality they don't so we have to do some extra checking.
8492 if (dback->disk_bytenr != rec->start ||
8493 dback->bytes != rec->nr || back->broken)
8504 /* Yay all the backrefs agree, carry on good sir */
8505 if (nr_entries <= 1 && !mismatch)
8508 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8509 "%Lu\n", rec->start);
8512 * First we want to see if the backrefs can agree amongst themselves who
8513 * is right, so figure out which one of the entries has the highest
8516 best = find_most_right_entry(&entries);
8519 * Ok so we may have an even split between what the backrefs think, so
8520 * this is where we use the extent ref to see what it thinks.
8523 entry = find_entry(&entries, rec->start, rec->nr);
8524 if (!entry && (!broken_entries || !rec->found_rec)) {
8525 fprintf(stderr, "Backrefs don't agree with each other "
8526 "and extent record doesn't agree with anybody,"
8527 " so we can't fix bytenr %Lu bytes %Lu\n",
8528 rec->start, rec->nr);
8531 } else if (!entry) {
8533 * Ok our backrefs were broken, we'll assume this is the
8534 * correct value and add an entry for this range.
8536 entry = malloc(sizeof(struct extent_entry));
8541 memset(entry, 0, sizeof(*entry));
8542 entry->bytenr = rec->start;
8543 entry->bytes = rec->nr;
8544 list_add_tail(&entry->list, &entries);
8548 best = find_most_right_entry(&entries);
8550 fprintf(stderr, "Backrefs and extent record evenly "
8551 "split on who is right, this is going to "
8552 "require user input to fix bytenr %Lu bytes "
8553 "%Lu\n", rec->start, rec->nr);
8560 * I don't think this can happen currently as we'll abort() if we catch
8561 * this case higher up, but in case somebody removes that we still can't
8562 * deal with it properly here yet, so just bail out of that's the case.
8564 if (best->bytenr != rec->start) {
8565 fprintf(stderr, "Extent start and backref starts don't match, "
8566 "please use btrfs-image on this file system and send "
8567 "it to a btrfs developer so they can make fsck fix "
8568 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8569 rec->start, rec->nr);
8575 * Ok great we all agreed on an extent record, let's go find the real
8576 * references and fix up the ones that don't match.
8578 list_for_each_entry(back, &rec->backrefs, list) {
8579 if (back->full_backref || !back->is_data)
8582 dback = to_data_backref(back);
8585 * Still ignoring backrefs that don't have a real ref attached
8588 if (dback->found_ref == 0)
8591 if (dback->bytes == best->bytes &&
8592 dback->disk_bytenr == best->bytenr)
8595 ret = repair_ref(info, path, dback, best);
8601 * Ok we messed with the actual refs, which means we need to drop our
8602 * entire cache and go back and rescan. I know this is a huge pain and
8603 * adds a lot of extra work, but it's the only way to be safe. Once all
8604 * the backrefs agree we may not need to do anything to the extent
8609 while (!list_empty(&entries)) {
8610 entry = list_entry(entries.next, struct extent_entry, list);
8611 list_del_init(&entry->list);
8617 static int process_duplicates(struct cache_tree *extent_cache,
8618 struct extent_record *rec)
8620 struct extent_record *good, *tmp;
8621 struct cache_extent *cache;
8625 * If we found a extent record for this extent then return, or if we
8626 * have more than one duplicate we are likely going to need to delete
8629 if (rec->found_rec || rec->num_duplicates > 1)
8632 /* Shouldn't happen but just in case */
8633 BUG_ON(!rec->num_duplicates);
8636 * So this happens if we end up with a backref that doesn't match the
8637 * actual extent entry. So either the backref is bad or the extent
8638 * entry is bad. Either way we want to have the extent_record actually
8639 * reflect what we found in the extent_tree, so we need to take the
8640 * duplicate out and use that as the extent_record since the only way we
8641 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8643 remove_cache_extent(extent_cache, &rec->cache);
8645 good = to_extent_record(rec->dups.next);
8646 list_del_init(&good->list);
8647 INIT_LIST_HEAD(&good->backrefs);
8648 INIT_LIST_HEAD(&good->dups);
8649 good->cache.start = good->start;
8650 good->cache.size = good->nr;
8651 good->content_checked = 0;
8652 good->owner_ref_checked = 0;
8653 good->num_duplicates = 0;
8654 good->refs = rec->refs;
8655 list_splice_init(&rec->backrefs, &good->backrefs);
8657 cache = lookup_cache_extent(extent_cache, good->start,
8661 tmp = container_of(cache, struct extent_record, cache);
8664 * If we find another overlapping extent and it's found_rec is
8665 * set then it's a duplicate and we need to try and delete
8668 if (tmp->found_rec || tmp->num_duplicates > 0) {
8669 if (list_empty(&good->list))
8670 list_add_tail(&good->list,
8671 &duplicate_extents);
8672 good->num_duplicates += tmp->num_duplicates + 1;
8673 list_splice_init(&tmp->dups, &good->dups);
8674 list_del_init(&tmp->list);
8675 list_add_tail(&tmp->list, &good->dups);
8676 remove_cache_extent(extent_cache, &tmp->cache);
8681 * Ok we have another non extent item backed extent rec, so lets
8682 * just add it to this extent and carry on like we did above.
8684 good->refs += tmp->refs;
8685 list_splice_init(&tmp->backrefs, &good->backrefs);
8686 remove_cache_extent(extent_cache, &tmp->cache);
8689 ret = insert_cache_extent(extent_cache, &good->cache);
8692 return good->num_duplicates ? 0 : 1;
8695 static int delete_duplicate_records(struct btrfs_root *root,
8696 struct extent_record *rec)
8698 struct btrfs_trans_handle *trans;
8699 LIST_HEAD(delete_list);
8700 struct btrfs_path path;
8701 struct extent_record *tmp, *good, *n;
8704 struct btrfs_key key;
8706 btrfs_init_path(&path);
8709 /* Find the record that covers all of the duplicates. */
8710 list_for_each_entry(tmp, &rec->dups, list) {
8711 if (good->start < tmp->start)
8713 if (good->nr > tmp->nr)
8716 if (tmp->start + tmp->nr < good->start + good->nr) {
8717 fprintf(stderr, "Ok we have overlapping extents that "
8718 "aren't completely covered by each other, this "
8719 "is going to require more careful thought. "
8720 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8721 tmp->start, tmp->nr, good->start, good->nr);
8728 list_add_tail(&rec->list, &delete_list);
8730 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8733 list_move_tail(&tmp->list, &delete_list);
8736 root = root->fs_info->extent_root;
8737 trans = btrfs_start_transaction(root, 1);
8738 if (IS_ERR(trans)) {
8739 ret = PTR_ERR(trans);
8743 list_for_each_entry(tmp, &delete_list, list) {
8744 if (tmp->found_rec == 0)
8746 key.objectid = tmp->start;
8747 key.type = BTRFS_EXTENT_ITEM_KEY;
8748 key.offset = tmp->nr;
8750 /* Shouldn't happen but just in case */
8751 if (tmp->metadata) {
8752 fprintf(stderr, "Well this shouldn't happen, extent "
8753 "record overlaps but is metadata? "
8754 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8758 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8764 ret = btrfs_del_item(trans, root, &path);
8767 btrfs_release_path(&path);
8770 err = btrfs_commit_transaction(trans, root);
8774 while (!list_empty(&delete_list)) {
8775 tmp = to_extent_record(delete_list.next);
8776 list_del_init(&tmp->list);
8782 while (!list_empty(&rec->dups)) {
8783 tmp = to_extent_record(rec->dups.next);
8784 list_del_init(&tmp->list);
8788 btrfs_release_path(&path);
8790 if (!ret && !nr_del)
8791 rec->num_duplicates = 0;
8793 return ret ? ret : nr_del;
8796 static int find_possible_backrefs(struct btrfs_fs_info *info,
8797 struct btrfs_path *path,
8798 struct cache_tree *extent_cache,
8799 struct extent_record *rec)
8801 struct btrfs_root *root;
8802 struct extent_backref *back;
8803 struct data_backref *dback;
8804 struct cache_extent *cache;
8805 struct btrfs_file_extent_item *fi;
8806 struct btrfs_key key;
8810 list_for_each_entry(back, &rec->backrefs, list) {
8811 /* Don't care about full backrefs (poor unloved backrefs) */
8812 if (back->full_backref || !back->is_data)
8815 dback = to_data_backref(back);
8817 /* We found this one, we don't need to do a lookup */
8818 if (dback->found_ref)
8821 key.objectid = dback->root;
8822 key.type = BTRFS_ROOT_ITEM_KEY;
8823 key.offset = (u64)-1;
8825 root = btrfs_read_fs_root(info, &key);
8827 /* No root, definitely a bad ref, skip */
8828 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8830 /* Other err, exit */
8832 return PTR_ERR(root);
8834 key.objectid = dback->owner;
8835 key.type = BTRFS_EXTENT_DATA_KEY;
8836 key.offset = dback->offset;
8837 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8839 btrfs_release_path(path);
8842 /* Didn't find it, we can carry on */
8847 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8848 struct btrfs_file_extent_item);
8849 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8850 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8851 btrfs_release_path(path);
8852 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8854 struct extent_record *tmp;
8855 tmp = container_of(cache, struct extent_record, cache);
8858 * If we found an extent record for the bytenr for this
8859 * particular backref then we can't add it to our
8860 * current extent record. We only want to add backrefs
8861 * that don't have a corresponding extent item in the
8862 * extent tree since they likely belong to this record
8863 * and we need to fix it if it doesn't match bytenrs.
8869 dback->found_ref += 1;
8870 dback->disk_bytenr = bytenr;
8871 dback->bytes = bytes;
8874 * Set this so the verify backref code knows not to trust the
8875 * values in this backref.
8884 * Record orphan data ref into corresponding root.
8886 * Return 0 if the extent item contains data ref and recorded.
8887 * Return 1 if the extent item contains no useful data ref
8888 * On that case, it may contains only shared_dataref or metadata backref
8889 * or the file extent exists(this should be handled by the extent bytenr
8891 * Return <0 if something goes wrong.
8893 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8894 struct extent_record *rec)
8896 struct btrfs_key key;
8897 struct btrfs_root *dest_root;
8898 struct extent_backref *back;
8899 struct data_backref *dback;
8900 struct orphan_data_extent *orphan;
8901 struct btrfs_path path;
8902 int recorded_data_ref = 0;
8907 btrfs_init_path(&path);
8908 list_for_each_entry(back, &rec->backrefs, list) {
8909 if (back->full_backref || !back->is_data ||
8910 !back->found_extent_tree)
8912 dback = to_data_backref(back);
8913 if (dback->found_ref)
8915 key.objectid = dback->root;
8916 key.type = BTRFS_ROOT_ITEM_KEY;
8917 key.offset = (u64)-1;
8919 dest_root = btrfs_read_fs_root(fs_info, &key);
8921 /* For non-exist root we just skip it */
8922 if (IS_ERR(dest_root) || !dest_root)
8925 key.objectid = dback->owner;
8926 key.type = BTRFS_EXTENT_DATA_KEY;
8927 key.offset = dback->offset;
8929 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8930 btrfs_release_path(&path);
8932 * For ret < 0, it's OK since the fs-tree may be corrupted,
8933 * we need to record it for inode/file extent rebuild.
8934 * For ret > 0, we record it only for file extent rebuild.
8935 * For ret == 0, the file extent exists but only bytenr
8936 * mismatch, let the original bytenr fix routine to handle,
8942 orphan = malloc(sizeof(*orphan));
8947 INIT_LIST_HEAD(&orphan->list);
8948 orphan->root = dback->root;
8949 orphan->objectid = dback->owner;
8950 orphan->offset = dback->offset;
8951 orphan->disk_bytenr = rec->cache.start;
8952 orphan->disk_len = rec->cache.size;
8953 list_add(&dest_root->orphan_data_extents, &orphan->list);
8954 recorded_data_ref = 1;
8957 btrfs_release_path(&path);
8959 return !recorded_data_ref;
8965 * when an incorrect extent item is found, this will delete
8966 * all of the existing entries for it and recreate them
8967 * based on what the tree scan found.
8969 static int fixup_extent_refs(struct btrfs_fs_info *info,
8970 struct cache_tree *extent_cache,
8971 struct extent_record *rec)
8973 struct btrfs_trans_handle *trans = NULL;
8975 struct btrfs_path path;
8976 struct list_head *cur = rec->backrefs.next;
8977 struct cache_extent *cache;
8978 struct extent_backref *back;
8982 if (rec->flag_block_full_backref)
8983 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8985 btrfs_init_path(&path);
8986 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8988 * Sometimes the backrefs themselves are so broken they don't
8989 * get attached to any meaningful rec, so first go back and
8990 * check any of our backrefs that we couldn't find and throw
8991 * them into the list if we find the backref so that
8992 * verify_backrefs can figure out what to do.
8994 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8999 /* step one, make sure all of the backrefs agree */
9000 ret = verify_backrefs(info, &path, rec);
9004 trans = btrfs_start_transaction(info->extent_root, 1);
9005 if (IS_ERR(trans)) {
9006 ret = PTR_ERR(trans);
9010 /* step two, delete all the existing records */
9011 ret = delete_extent_records(trans, info->extent_root, &path,
9017 /* was this block corrupt? If so, don't add references to it */
9018 cache = lookup_cache_extent(info->corrupt_blocks,
9019 rec->start, rec->max_size);
9025 /* step three, recreate all the refs we did find */
9026 while(cur != &rec->backrefs) {
9027 back = to_extent_backref(cur);
9031 * if we didn't find any references, don't create a
9034 if (!back->found_ref)
9037 rec->bad_full_backref = 0;
9038 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9046 int err = btrfs_commit_transaction(trans, info->extent_root);
9052 fprintf(stderr, "Repaired extent references for %llu\n",
9053 (unsigned long long)rec->start);
9055 btrfs_release_path(&path);
9059 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9060 struct extent_record *rec)
9062 struct btrfs_trans_handle *trans;
9063 struct btrfs_root *root = fs_info->extent_root;
9064 struct btrfs_path path;
9065 struct btrfs_extent_item *ei;
9066 struct btrfs_key key;
9070 key.objectid = rec->start;
9071 if (rec->metadata) {
9072 key.type = BTRFS_METADATA_ITEM_KEY;
9073 key.offset = rec->info_level;
9075 key.type = BTRFS_EXTENT_ITEM_KEY;
9076 key.offset = rec->max_size;
9079 trans = btrfs_start_transaction(root, 0);
9081 return PTR_ERR(trans);
9083 btrfs_init_path(&path);
9084 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9086 btrfs_release_path(&path);
9087 btrfs_commit_transaction(trans, root);
9090 fprintf(stderr, "Didn't find extent for %llu\n",
9091 (unsigned long long)rec->start);
9092 btrfs_release_path(&path);
9093 btrfs_commit_transaction(trans, root);
9097 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9098 struct btrfs_extent_item);
9099 flags = btrfs_extent_flags(path.nodes[0], ei);
9100 if (rec->flag_block_full_backref) {
9101 fprintf(stderr, "setting full backref on %llu\n",
9102 (unsigned long long)key.objectid);
9103 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9105 fprintf(stderr, "clearing full backref on %llu\n",
9106 (unsigned long long)key.objectid);
9107 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9109 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9110 btrfs_mark_buffer_dirty(path.nodes[0]);
9111 btrfs_release_path(&path);
9112 ret = btrfs_commit_transaction(trans, root);
9114 fprintf(stderr, "Repaired extent flags for %llu\n",
9115 (unsigned long long)rec->start);
9120 /* right now we only prune from the extent allocation tree */
9121 static int prune_one_block(struct btrfs_trans_handle *trans,
9122 struct btrfs_fs_info *info,
9123 struct btrfs_corrupt_block *corrupt)
9126 struct btrfs_path path;
9127 struct extent_buffer *eb;
9131 int level = corrupt->level + 1;
9133 btrfs_init_path(&path);
9135 /* we want to stop at the parent to our busted block */
9136 path.lowest_level = level;
9138 ret = btrfs_search_slot(trans, info->extent_root,
9139 &corrupt->key, &path, -1, 1);
9144 eb = path.nodes[level];
9151 * hopefully the search gave us the block we want to prune,
9152 * lets try that first
9154 slot = path.slots[level];
9155 found = btrfs_node_blockptr(eb, slot);
9156 if (found == corrupt->cache.start)
9159 nritems = btrfs_header_nritems(eb);
9161 /* the search failed, lets scan this node and hope we find it */
9162 for (slot = 0; slot < nritems; slot++) {
9163 found = btrfs_node_blockptr(eb, slot);
9164 if (found == corrupt->cache.start)
9168 * we couldn't find the bad block. TODO, search all the nodes for pointers
9171 if (eb == info->extent_root->node) {
9176 btrfs_release_path(&path);
9181 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9182 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9185 btrfs_release_path(&path);
9189 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9191 struct btrfs_trans_handle *trans = NULL;
9192 struct cache_extent *cache;
9193 struct btrfs_corrupt_block *corrupt;
9196 cache = search_cache_extent(info->corrupt_blocks, 0);
9200 trans = btrfs_start_transaction(info->extent_root, 1);
9202 return PTR_ERR(trans);
9204 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9205 prune_one_block(trans, info, corrupt);
9206 remove_cache_extent(info->corrupt_blocks, cache);
9209 return btrfs_commit_transaction(trans, info->extent_root);
9213 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9215 struct btrfs_block_group_cache *cache;
9220 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9221 &start, &end, EXTENT_DIRTY);
9224 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9229 cache = btrfs_lookup_first_block_group(fs_info, start);
9234 start = cache->key.objectid + cache->key.offset;
9238 static int check_extent_refs(struct btrfs_root *root,
9239 struct cache_tree *extent_cache)
9241 struct extent_record *rec;
9242 struct cache_extent *cache;
9248 * if we're doing a repair, we have to make sure
9249 * we don't allocate from the problem extents.
9250 * In the worst case, this will be all the
9253 cache = search_cache_extent(extent_cache, 0);
9255 rec = container_of(cache, struct extent_record, cache);
9256 set_extent_dirty(root->fs_info->excluded_extents,
9258 rec->start + rec->max_size - 1);
9259 cache = next_cache_extent(cache);
9262 /* pin down all the corrupted blocks too */
9263 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9265 set_extent_dirty(root->fs_info->excluded_extents,
9267 cache->start + cache->size - 1);
9268 cache = next_cache_extent(cache);
9270 prune_corrupt_blocks(root->fs_info);
9271 reset_cached_block_groups(root->fs_info);
9274 reset_cached_block_groups(root->fs_info);
9277 * We need to delete any duplicate entries we find first otherwise we
9278 * could mess up the extent tree when we have backrefs that actually
9279 * belong to a different extent item and not the weird duplicate one.
9281 while (repair && !list_empty(&duplicate_extents)) {
9282 rec = to_extent_record(duplicate_extents.next);
9283 list_del_init(&rec->list);
9285 /* Sometimes we can find a backref before we find an actual
9286 * extent, so we need to process it a little bit to see if there
9287 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9288 * if this is a backref screwup. If we need to delete stuff
9289 * process_duplicates() will return 0, otherwise it will return
9292 if (process_duplicates(extent_cache, rec))
9294 ret = delete_duplicate_records(root, rec);
9298 * delete_duplicate_records will return the number of entries
9299 * deleted, so if it's greater than 0 then we know we actually
9300 * did something and we need to remove.
9313 cache = search_cache_extent(extent_cache, 0);
9316 rec = container_of(cache, struct extent_record, cache);
9317 if (rec->num_duplicates) {
9318 fprintf(stderr, "extent item %llu has multiple extent "
9319 "items\n", (unsigned long long)rec->start);
9323 if (rec->refs != rec->extent_item_refs) {
9324 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9325 (unsigned long long)rec->start,
9326 (unsigned long long)rec->nr);
9327 fprintf(stderr, "extent item %llu, found %llu\n",
9328 (unsigned long long)rec->extent_item_refs,
9329 (unsigned long long)rec->refs);
9330 ret = record_orphan_data_extents(root->fs_info, rec);
9336 if (all_backpointers_checked(rec, 1)) {
9337 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9338 (unsigned long long)rec->start,
9339 (unsigned long long)rec->nr);
9343 if (!rec->owner_ref_checked) {
9344 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9345 (unsigned long long)rec->start,
9346 (unsigned long long)rec->nr);
9351 if (repair && fix) {
9352 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9358 if (rec->bad_full_backref) {
9359 fprintf(stderr, "bad full backref, on [%llu]\n",
9360 (unsigned long long)rec->start);
9362 ret = fixup_extent_flags(root->fs_info, rec);
9370 * Although it's not a extent ref's problem, we reuse this
9371 * routine for error reporting.
9372 * No repair function yet.
9374 if (rec->crossing_stripes) {
9376 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9377 rec->start, rec->start + rec->max_size);
9381 if (rec->wrong_chunk_type) {
9383 "bad extent [%llu, %llu), type mismatch with chunk\n",
9384 rec->start, rec->start + rec->max_size);
9388 remove_cache_extent(extent_cache, cache);
9389 free_all_extent_backrefs(rec);
9390 if (!init_extent_tree && repair && (!cur_err || fix))
9391 clear_extent_dirty(root->fs_info->excluded_extents,
9393 rec->start + rec->max_size - 1);
9398 if (ret && ret != -EAGAIN) {
9399 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9402 struct btrfs_trans_handle *trans;
9404 root = root->fs_info->extent_root;
9405 trans = btrfs_start_transaction(root, 1);
9406 if (IS_ERR(trans)) {
9407 ret = PTR_ERR(trans);
9411 btrfs_fix_block_accounting(trans, root);
9412 ret = btrfs_commit_transaction(trans, root);
9421 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9425 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9426 stripe_size = length;
9427 stripe_size /= num_stripes;
9428 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9429 stripe_size = length * 2;
9430 stripe_size /= num_stripes;
9431 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9432 stripe_size = length;
9433 stripe_size /= (num_stripes - 1);
9434 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9435 stripe_size = length;
9436 stripe_size /= (num_stripes - 2);
9438 stripe_size = length;
9444 * Check the chunk with its block group/dev list ref:
9445 * Return 0 if all refs seems valid.
9446 * Return 1 if part of refs seems valid, need later check for rebuild ref
9447 * like missing block group and needs to search extent tree to rebuild them.
9448 * Return -1 if essential refs are missing and unable to rebuild.
9450 static int check_chunk_refs(struct chunk_record *chunk_rec,
9451 struct block_group_tree *block_group_cache,
9452 struct device_extent_tree *dev_extent_cache,
9455 struct cache_extent *block_group_item;
9456 struct block_group_record *block_group_rec;
9457 struct cache_extent *dev_extent_item;
9458 struct device_extent_record *dev_extent_rec;
9462 int metadump_v2 = 0;
9466 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9469 if (block_group_item) {
9470 block_group_rec = container_of(block_group_item,
9471 struct block_group_record,
9473 if (chunk_rec->length != block_group_rec->offset ||
9474 chunk_rec->offset != block_group_rec->objectid ||
9476 chunk_rec->type_flags != block_group_rec->flags)) {
9479 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9480 chunk_rec->objectid,
9485 chunk_rec->type_flags,
9486 block_group_rec->objectid,
9487 block_group_rec->type,
9488 block_group_rec->offset,
9489 block_group_rec->offset,
9490 block_group_rec->objectid,
9491 block_group_rec->flags);
9494 list_del_init(&block_group_rec->list);
9495 chunk_rec->bg_rec = block_group_rec;
9500 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9501 chunk_rec->objectid,
9506 chunk_rec->type_flags);
9513 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9514 chunk_rec->num_stripes);
9515 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9516 devid = chunk_rec->stripes[i].devid;
9517 offset = chunk_rec->stripes[i].offset;
9518 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9519 devid, offset, length);
9520 if (dev_extent_item) {
9521 dev_extent_rec = container_of(dev_extent_item,
9522 struct device_extent_record,
9524 if (dev_extent_rec->objectid != devid ||
9525 dev_extent_rec->offset != offset ||
9526 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9527 dev_extent_rec->length != length) {
9530 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9531 chunk_rec->objectid,
9534 chunk_rec->stripes[i].devid,
9535 chunk_rec->stripes[i].offset,
9536 dev_extent_rec->objectid,
9537 dev_extent_rec->offset,
9538 dev_extent_rec->length);
9541 list_move(&dev_extent_rec->chunk_list,
9542 &chunk_rec->dextents);
9547 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9548 chunk_rec->objectid,
9551 chunk_rec->stripes[i].devid,
9552 chunk_rec->stripes[i].offset);
9559 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9560 int check_chunks(struct cache_tree *chunk_cache,
9561 struct block_group_tree *block_group_cache,
9562 struct device_extent_tree *dev_extent_cache,
9563 struct list_head *good, struct list_head *bad,
9564 struct list_head *rebuild, int silent)
9566 struct cache_extent *chunk_item;
9567 struct chunk_record *chunk_rec;
9568 struct block_group_record *bg_rec;
9569 struct device_extent_record *dext_rec;
9573 chunk_item = first_cache_extent(chunk_cache);
9574 while (chunk_item) {
9575 chunk_rec = container_of(chunk_item, struct chunk_record,
9577 err = check_chunk_refs(chunk_rec, block_group_cache,
9578 dev_extent_cache, silent);
9581 if (err == 0 && good)
9582 list_add_tail(&chunk_rec->list, good);
9583 if (err > 0 && rebuild)
9584 list_add_tail(&chunk_rec->list, rebuild);
9586 list_add_tail(&chunk_rec->list, bad);
9587 chunk_item = next_cache_extent(chunk_item);
9590 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9593 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9601 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9605 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9616 static int check_device_used(struct device_record *dev_rec,
9617 struct device_extent_tree *dext_cache)
9619 struct cache_extent *cache;
9620 struct device_extent_record *dev_extent_rec;
9623 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9625 dev_extent_rec = container_of(cache,
9626 struct device_extent_record,
9628 if (dev_extent_rec->objectid != dev_rec->devid)
9631 list_del_init(&dev_extent_rec->device_list);
9632 total_byte += dev_extent_rec->length;
9633 cache = next_cache_extent(cache);
9636 if (total_byte != dev_rec->byte_used) {
9638 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9639 total_byte, dev_rec->byte_used, dev_rec->objectid,
9640 dev_rec->type, dev_rec->offset);
9647 /* check btrfs_dev_item -> btrfs_dev_extent */
9648 static int check_devices(struct rb_root *dev_cache,
9649 struct device_extent_tree *dev_extent_cache)
9651 struct rb_node *dev_node;
9652 struct device_record *dev_rec;
9653 struct device_extent_record *dext_rec;
9657 dev_node = rb_first(dev_cache);
9659 dev_rec = container_of(dev_node, struct device_record, node);
9660 err = check_device_used(dev_rec, dev_extent_cache);
9664 dev_node = rb_next(dev_node);
9666 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9669 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9670 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9677 static int add_root_item_to_list(struct list_head *head,
9678 u64 objectid, u64 bytenr, u64 last_snapshot,
9679 u8 level, u8 drop_level,
9680 int level_size, struct btrfs_key *drop_key)
9683 struct root_item_record *ri_rec;
9684 ri_rec = malloc(sizeof(*ri_rec));
9687 ri_rec->bytenr = bytenr;
9688 ri_rec->objectid = objectid;
9689 ri_rec->level = level;
9690 ri_rec->level_size = level_size;
9691 ri_rec->drop_level = drop_level;
9692 ri_rec->last_snapshot = last_snapshot;
9694 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9695 list_add_tail(&ri_rec->list, head);
9700 static void free_root_item_list(struct list_head *list)
9702 struct root_item_record *ri_rec;
9704 while (!list_empty(list)) {
9705 ri_rec = list_first_entry(list, struct root_item_record,
9707 list_del_init(&ri_rec->list);
9712 static int deal_root_from_list(struct list_head *list,
9713 struct btrfs_root *root,
9714 struct block_info *bits,
9716 struct cache_tree *pending,
9717 struct cache_tree *seen,
9718 struct cache_tree *reada,
9719 struct cache_tree *nodes,
9720 struct cache_tree *extent_cache,
9721 struct cache_tree *chunk_cache,
9722 struct rb_root *dev_cache,
9723 struct block_group_tree *block_group_cache,
9724 struct device_extent_tree *dev_extent_cache)
9729 while (!list_empty(list)) {
9730 struct root_item_record *rec;
9731 struct extent_buffer *buf;
9732 rec = list_entry(list->next,
9733 struct root_item_record, list);
9735 buf = read_tree_block(root->fs_info->tree_root,
9736 rec->bytenr, rec->level_size, 0);
9737 if (!extent_buffer_uptodate(buf)) {
9738 free_extent_buffer(buf);
9742 ret = add_root_to_pending(buf, extent_cache, pending,
9743 seen, nodes, rec->objectid);
9747 * To rebuild extent tree, we need deal with snapshot
9748 * one by one, otherwise we deal with node firstly which
9749 * can maximize readahead.
9752 ret = run_next_block(root, bits, bits_nr, &last,
9753 pending, seen, reada, nodes,
9754 extent_cache, chunk_cache,
9755 dev_cache, block_group_cache,
9756 dev_extent_cache, rec);
9760 free_extent_buffer(buf);
9761 list_del(&rec->list);
9767 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9768 reada, nodes, extent_cache, chunk_cache,
9769 dev_cache, block_group_cache,
9770 dev_extent_cache, NULL);
9780 static int check_chunks_and_extents(struct btrfs_root *root)
9782 struct rb_root dev_cache;
9783 struct cache_tree chunk_cache;
9784 struct block_group_tree block_group_cache;
9785 struct device_extent_tree dev_extent_cache;
9786 struct cache_tree extent_cache;
9787 struct cache_tree seen;
9788 struct cache_tree pending;
9789 struct cache_tree reada;
9790 struct cache_tree nodes;
9791 struct extent_io_tree excluded_extents;
9792 struct cache_tree corrupt_blocks;
9793 struct btrfs_path path;
9794 struct btrfs_key key;
9795 struct btrfs_key found_key;
9797 struct block_info *bits;
9799 struct extent_buffer *leaf;
9801 struct btrfs_root_item ri;
9802 struct list_head dropping_trees;
9803 struct list_head normal_trees;
9804 struct btrfs_root *root1;
9809 dev_cache = RB_ROOT;
9810 cache_tree_init(&chunk_cache);
9811 block_group_tree_init(&block_group_cache);
9812 device_extent_tree_init(&dev_extent_cache);
9814 cache_tree_init(&extent_cache);
9815 cache_tree_init(&seen);
9816 cache_tree_init(&pending);
9817 cache_tree_init(&nodes);
9818 cache_tree_init(&reada);
9819 cache_tree_init(&corrupt_blocks);
9820 extent_io_tree_init(&excluded_extents);
9821 INIT_LIST_HEAD(&dropping_trees);
9822 INIT_LIST_HEAD(&normal_trees);
9825 root->fs_info->excluded_extents = &excluded_extents;
9826 root->fs_info->fsck_extent_cache = &extent_cache;
9827 root->fs_info->free_extent_hook = free_extent_hook;
9828 root->fs_info->corrupt_blocks = &corrupt_blocks;
9832 bits = malloc(bits_nr * sizeof(struct block_info));
9838 if (ctx.progress_enabled) {
9839 ctx.tp = TASK_EXTENTS;
9840 task_start(ctx.info);
9844 root1 = root->fs_info->tree_root;
9845 level = btrfs_header_level(root1->node);
9846 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9847 root1->node->start, 0, level, 0,
9848 root1->nodesize, NULL);
9851 root1 = root->fs_info->chunk_root;
9852 level = btrfs_header_level(root1->node);
9853 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9854 root1->node->start, 0, level, 0,
9855 root1->nodesize, NULL);
9858 btrfs_init_path(&path);
9861 key.type = BTRFS_ROOT_ITEM_KEY;
9862 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9867 leaf = path.nodes[0];
9868 slot = path.slots[0];
9869 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9870 ret = btrfs_next_leaf(root, &path);
9873 leaf = path.nodes[0];
9874 slot = path.slots[0];
9876 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9877 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9878 unsigned long offset;
9881 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9882 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9883 last_snapshot = btrfs_root_last_snapshot(&ri);
9884 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9885 level = btrfs_root_level(&ri);
9886 level_size = root->nodesize;
9887 ret = add_root_item_to_list(&normal_trees,
9889 btrfs_root_bytenr(&ri),
9890 last_snapshot, level,
9891 0, level_size, NULL);
9895 level = btrfs_root_level(&ri);
9896 level_size = root->nodesize;
9897 objectid = found_key.objectid;
9898 btrfs_disk_key_to_cpu(&found_key,
9900 ret = add_root_item_to_list(&dropping_trees,
9902 btrfs_root_bytenr(&ri),
9903 last_snapshot, level,
9905 level_size, &found_key);
9912 btrfs_release_path(&path);
9915 * check_block can return -EAGAIN if it fixes something, please keep
9916 * this in mind when dealing with return values from these functions, if
9917 * we get -EAGAIN we want to fall through and restart the loop.
9919 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9920 &seen, &reada, &nodes, &extent_cache,
9921 &chunk_cache, &dev_cache, &block_group_cache,
9928 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9929 &pending, &seen, &reada, &nodes,
9930 &extent_cache, &chunk_cache, &dev_cache,
9931 &block_group_cache, &dev_extent_cache);
9938 ret = check_chunks(&chunk_cache, &block_group_cache,
9939 &dev_extent_cache, NULL, NULL, NULL, 0);
9946 ret = check_extent_refs(root, &extent_cache);
9953 ret = check_devices(&dev_cache, &dev_extent_cache);
9958 task_stop(ctx.info);
9960 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9961 extent_io_tree_cleanup(&excluded_extents);
9962 root->fs_info->fsck_extent_cache = NULL;
9963 root->fs_info->free_extent_hook = NULL;
9964 root->fs_info->corrupt_blocks = NULL;
9965 root->fs_info->excluded_extents = NULL;
9968 free_chunk_cache_tree(&chunk_cache);
9969 free_device_cache_tree(&dev_cache);
9970 free_block_group_tree(&block_group_cache);
9971 free_device_extent_tree(&dev_extent_cache);
9972 free_extent_cache_tree(&seen);
9973 free_extent_cache_tree(&pending);
9974 free_extent_cache_tree(&reada);
9975 free_extent_cache_tree(&nodes);
9976 free_root_item_list(&normal_trees);
9977 free_root_item_list(&dropping_trees);
9980 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9981 free_extent_cache_tree(&seen);
9982 free_extent_cache_tree(&pending);
9983 free_extent_cache_tree(&reada);
9984 free_extent_cache_tree(&nodes);
9985 free_chunk_cache_tree(&chunk_cache);
9986 free_block_group_tree(&block_group_cache);
9987 free_device_cache_tree(&dev_cache);
9988 free_device_extent_tree(&dev_extent_cache);
9989 free_extent_record_cache(&extent_cache);
9990 free_root_item_list(&normal_trees);
9991 free_root_item_list(&dropping_trees);
9992 extent_io_tree_cleanup(&excluded_extents);
9997 * Check backrefs of a tree block given by @bytenr or @eb.
9999 * @root: the root containing the @bytenr or @eb
10000 * @eb: tree block extent buffer, can be NULL
10001 * @bytenr: bytenr of the tree block to search
10002 * @level: tree level of the tree block
10003 * @owner: owner of the tree block
10005 * Return >0 for any error found and output error message
10006 * Return 0 for no error found
10008 static int check_tree_block_ref(struct btrfs_root *root,
10009 struct extent_buffer *eb, u64 bytenr,
10010 int level, u64 owner)
10012 struct btrfs_key key;
10013 struct btrfs_root *extent_root = root->fs_info->extent_root;
10014 struct btrfs_path path;
10015 struct btrfs_extent_item *ei;
10016 struct btrfs_extent_inline_ref *iref;
10017 struct extent_buffer *leaf;
10023 u32 nodesize = root->nodesize;
10026 int tree_reloc_root = 0;
10031 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10032 btrfs_header_bytenr(root->node) == bytenr)
10033 tree_reloc_root = 1;
10035 btrfs_init_path(&path);
10036 key.objectid = bytenr;
10037 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10038 key.type = BTRFS_METADATA_ITEM_KEY;
10040 key.type = BTRFS_EXTENT_ITEM_KEY;
10041 key.offset = (u64)-1;
10043 /* Search for the backref in extent tree */
10044 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10046 err |= BACKREF_MISSING;
10049 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10051 err |= BACKREF_MISSING;
10055 leaf = path.nodes[0];
10056 slot = path.slots[0];
10057 btrfs_item_key_to_cpu(leaf, &key, slot);
10059 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10061 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10062 skinny_level = (int)key.offset;
10063 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10065 struct btrfs_tree_block_info *info;
10067 info = (struct btrfs_tree_block_info *)(ei + 1);
10068 skinny_level = btrfs_tree_block_level(leaf, info);
10069 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10076 if (!(btrfs_extent_flags(leaf, ei) &
10077 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10079 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10080 key.objectid, nodesize,
10081 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10082 err = BACKREF_MISMATCH;
10084 header_gen = btrfs_header_generation(eb);
10085 extent_gen = btrfs_extent_generation(leaf, ei);
10086 if (header_gen != extent_gen) {
10088 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10089 key.objectid, nodesize, header_gen,
10091 err = BACKREF_MISMATCH;
10093 if (level != skinny_level) {
10095 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10096 key.objectid, nodesize, level, skinny_level);
10097 err = BACKREF_MISMATCH;
10099 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10101 "extent[%llu %u] is referred by other roots than %llu",
10102 key.objectid, nodesize, root->objectid);
10103 err = BACKREF_MISMATCH;
10108 * Iterate the extent/metadata item to find the exact backref
10110 item_size = btrfs_item_size_nr(leaf, slot);
10111 ptr = (unsigned long)iref;
10112 end = (unsigned long)ei + item_size;
10113 while (ptr < end) {
10114 iref = (struct btrfs_extent_inline_ref *)ptr;
10115 type = btrfs_extent_inline_ref_type(leaf, iref);
10116 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10118 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10119 (offset == root->objectid || offset == owner)) {
10121 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10123 * Backref of tree reloc root points to itself, no need
10124 * to check backref any more.
10126 if (tree_reloc_root)
10129 /* Check if the backref points to valid referencer */
10130 found_ref = !check_tree_block_ref(root, NULL,
10131 offset, level + 1, owner);
10136 ptr += btrfs_extent_inline_ref_size(type);
10140 * Inlined extent item doesn't have what we need, check
10141 * TREE_BLOCK_REF_KEY
10144 btrfs_release_path(&path);
10145 key.objectid = bytenr;
10146 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10147 key.offset = root->objectid;
10149 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10154 err |= BACKREF_MISSING;
10156 btrfs_release_path(&path);
10157 if (eb && (err & BACKREF_MISSING))
10158 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10159 bytenr, nodesize, owner, level);
10164 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10166 * Return >0 any error found and output error message
10167 * Return 0 for no error found
10169 static int check_extent_data_item(struct btrfs_root *root,
10170 struct extent_buffer *eb, int slot)
10172 struct btrfs_file_extent_item *fi;
10173 struct btrfs_path path;
10174 struct btrfs_root *extent_root = root->fs_info->extent_root;
10175 struct btrfs_key fi_key;
10176 struct btrfs_key dbref_key;
10177 struct extent_buffer *leaf;
10178 struct btrfs_extent_item *ei;
10179 struct btrfs_extent_inline_ref *iref;
10180 struct btrfs_extent_data_ref *dref;
10183 u64 disk_num_bytes;
10184 u64 extent_num_bytes;
10191 int found_dbackref = 0;
10195 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10196 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10198 /* Nothing to check for hole and inline data extents */
10199 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10200 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10203 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10204 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10205 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10207 /* Check unaligned disk_num_bytes and num_bytes */
10208 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10210 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10211 fi_key.objectid, fi_key.offset, disk_num_bytes,
10213 err |= BYTES_UNALIGNED;
10215 data_bytes_allocated += disk_num_bytes;
10217 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10219 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10220 fi_key.objectid, fi_key.offset, extent_num_bytes,
10222 err |= BYTES_UNALIGNED;
10224 data_bytes_referenced += extent_num_bytes;
10226 owner = btrfs_header_owner(eb);
10228 /* Check the extent item of the file extent in extent tree */
10229 btrfs_init_path(&path);
10230 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10231 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10232 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10234 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10238 leaf = path.nodes[0];
10239 slot = path.slots[0];
10240 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10242 extent_flags = btrfs_extent_flags(leaf, ei);
10244 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10246 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10247 disk_bytenr, disk_num_bytes,
10248 BTRFS_EXTENT_FLAG_DATA);
10249 err |= BACKREF_MISMATCH;
10252 /* Check data backref inside that extent item */
10253 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10254 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10255 ptr = (unsigned long)iref;
10256 end = (unsigned long)ei + item_size;
10257 while (ptr < end) {
10258 iref = (struct btrfs_extent_inline_ref *)ptr;
10259 type = btrfs_extent_inline_ref_type(leaf, iref);
10260 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10262 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10263 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10264 if (ref_root == owner || ref_root == root->objectid)
10265 found_dbackref = 1;
10266 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10267 found_dbackref = !check_tree_block_ref(root, NULL,
10268 btrfs_extent_inline_ref_offset(leaf, iref),
10272 if (found_dbackref)
10274 ptr += btrfs_extent_inline_ref_size(type);
10277 if (!found_dbackref) {
10278 btrfs_release_path(&path);
10280 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10281 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10282 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10283 dbref_key.offset = hash_extent_data_ref(root->objectid,
10284 fi_key.objectid, fi_key.offset);
10286 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10287 &dbref_key, &path, 0, 0);
10289 found_dbackref = 1;
10293 btrfs_release_path(&path);
10296 * Neither inlined nor EXTENT_DATA_REF found, try
10297 * SHARED_DATA_REF as last chance.
10299 dbref_key.objectid = disk_bytenr;
10300 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10301 dbref_key.offset = eb->start;
10303 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10304 &dbref_key, &path, 0, 0);
10306 found_dbackref = 1;
10312 if (!found_dbackref)
10313 err |= BACKREF_MISSING;
10314 btrfs_release_path(&path);
10315 if (err & BACKREF_MISSING) {
10316 error("data extent[%llu %llu] backref lost",
10317 disk_bytenr, disk_num_bytes);
10323 * Get real tree block level for the case like shared block
10324 * Return >= 0 as tree level
10325 * Return <0 for error
10327 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10329 struct extent_buffer *eb;
10330 struct btrfs_path path;
10331 struct btrfs_key key;
10332 struct btrfs_extent_item *ei;
10335 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10340 /* Search extent tree for extent generation and level */
10341 key.objectid = bytenr;
10342 key.type = BTRFS_METADATA_ITEM_KEY;
10343 key.offset = (u64)-1;
10345 btrfs_init_path(&path);
10346 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10349 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10357 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10358 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10359 struct btrfs_extent_item);
10360 flags = btrfs_extent_flags(path.nodes[0], ei);
10361 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10366 /* Get transid for later read_tree_block() check */
10367 transid = btrfs_extent_generation(path.nodes[0], ei);
10369 /* Get backref level as one source */
10370 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10371 backref_level = key.offset;
10373 struct btrfs_tree_block_info *info;
10375 info = (struct btrfs_tree_block_info *)(ei + 1);
10376 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10378 btrfs_release_path(&path);
10380 /* Get level from tree block as an alternative source */
10381 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10382 if (!extent_buffer_uptodate(eb)) {
10383 free_extent_buffer(eb);
10386 header_level = btrfs_header_level(eb);
10387 free_extent_buffer(eb);
10389 if (header_level != backref_level)
10391 return header_level;
10394 btrfs_release_path(&path);
10399 * Check if a tree block backref is valid (points to a valid tree block)
10400 * if level == -1, level will be resolved
10401 * Return >0 for any error found and print error message
10403 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10404 u64 bytenr, int level)
10406 struct btrfs_root *root;
10407 struct btrfs_key key;
10408 struct btrfs_path path;
10409 struct extent_buffer *eb;
10410 struct extent_buffer *node;
10411 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10415 /* Query level for level == -1 special case */
10417 level = query_tree_block_level(fs_info, bytenr);
10419 err |= REFERENCER_MISSING;
10423 key.objectid = root_id;
10424 key.type = BTRFS_ROOT_ITEM_KEY;
10425 key.offset = (u64)-1;
10427 root = btrfs_read_fs_root(fs_info, &key);
10428 if (IS_ERR(root)) {
10429 err |= REFERENCER_MISSING;
10433 /* Read out the tree block to get item/node key */
10434 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10435 if (!extent_buffer_uptodate(eb)) {
10436 err |= REFERENCER_MISSING;
10437 free_extent_buffer(eb);
10441 /* Empty tree, no need to check key */
10442 if (!btrfs_header_nritems(eb) && !level) {
10443 free_extent_buffer(eb);
10448 btrfs_node_key_to_cpu(eb, &key, 0);
10450 btrfs_item_key_to_cpu(eb, &key, 0);
10452 free_extent_buffer(eb);
10454 btrfs_init_path(&path);
10455 path.lowest_level = level;
10456 /* Search with the first key, to ensure we can reach it */
10457 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10459 err |= REFERENCER_MISSING;
10463 node = path.nodes[level];
10464 if (btrfs_header_bytenr(node) != bytenr) {
10466 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10467 bytenr, nodesize, bytenr,
10468 btrfs_header_bytenr(node));
10469 err |= REFERENCER_MISMATCH;
10471 if (btrfs_header_level(node) != level) {
10473 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10474 bytenr, nodesize, level,
10475 btrfs_header_level(node));
10476 err |= REFERENCER_MISMATCH;
10480 btrfs_release_path(&path);
10482 if (err & REFERENCER_MISSING) {
10484 error("extent [%llu %d] lost referencer (owner: %llu)",
10485 bytenr, nodesize, root_id);
10488 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10489 bytenr, nodesize, root_id, level);
10496 * Check if tree block @eb is tree reloc root.
10497 * Return 0 if it's not or any problem happens
10498 * Return 1 if it's a tree reloc root
10500 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10501 struct extent_buffer *eb)
10503 struct btrfs_root *tree_reloc_root;
10504 struct btrfs_key key;
10505 u64 bytenr = btrfs_header_bytenr(eb);
10506 u64 owner = btrfs_header_owner(eb);
10509 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10510 key.offset = owner;
10511 key.type = BTRFS_ROOT_ITEM_KEY;
10513 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10514 if (IS_ERR(tree_reloc_root))
10517 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10519 btrfs_free_fs_root(tree_reloc_root);
10524 * Check referencer for shared block backref
10525 * If level == -1, this function will resolve the level.
10527 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10528 u64 parent, u64 bytenr, int level)
10530 struct extent_buffer *eb;
10531 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10533 int found_parent = 0;
10536 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10537 if (!extent_buffer_uptodate(eb))
10541 level = query_tree_block_level(fs_info, bytenr);
10545 /* It's possible it's a tree reloc root */
10546 if (parent == bytenr) {
10547 if (is_tree_reloc_root(fs_info, eb))
10552 if (level + 1 != btrfs_header_level(eb))
10555 nr = btrfs_header_nritems(eb);
10556 for (i = 0; i < nr; i++) {
10557 if (bytenr == btrfs_node_blockptr(eb, i)) {
10563 free_extent_buffer(eb);
10564 if (!found_parent) {
10566 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10567 bytenr, nodesize, parent, level);
10568 return REFERENCER_MISSING;
10574 * Check referencer for normal (inlined) data ref
10575 * If len == 0, it will be resolved by searching in extent tree
10577 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10578 u64 root_id, u64 objectid, u64 offset,
10579 u64 bytenr, u64 len, u32 count)
10581 struct btrfs_root *root;
10582 struct btrfs_root *extent_root = fs_info->extent_root;
10583 struct btrfs_key key;
10584 struct btrfs_path path;
10585 struct extent_buffer *leaf;
10586 struct btrfs_file_extent_item *fi;
10587 u32 found_count = 0;
10592 key.objectid = bytenr;
10593 key.type = BTRFS_EXTENT_ITEM_KEY;
10594 key.offset = (u64)-1;
10596 btrfs_init_path(&path);
10597 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10600 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10603 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10604 if (key.objectid != bytenr ||
10605 key.type != BTRFS_EXTENT_ITEM_KEY)
10608 btrfs_release_path(&path);
10610 key.objectid = root_id;
10611 key.type = BTRFS_ROOT_ITEM_KEY;
10612 key.offset = (u64)-1;
10613 btrfs_init_path(&path);
10615 root = btrfs_read_fs_root(fs_info, &key);
10619 key.objectid = objectid;
10620 key.type = BTRFS_EXTENT_DATA_KEY;
10622 * It can be nasty as data backref offset is
10623 * file offset - file extent offset, which is smaller or
10624 * equal to original backref offset. The only special case is
10625 * overflow. So we need to special check and do further search.
10627 key.offset = offset & (1ULL << 63) ? 0 : offset;
10629 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10634 * Search afterwards to get correct one
10635 * NOTE: As we must do a comprehensive check on the data backref to
10636 * make sure the dref count also matches, we must iterate all file
10637 * extents for that inode.
10640 leaf = path.nodes[0];
10641 slot = path.slots[0];
10643 if (slot >= btrfs_header_nritems(leaf))
10645 btrfs_item_key_to_cpu(leaf, &key, slot);
10646 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10648 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10650 * Except normal disk bytenr and disk num bytes, we still
10651 * need to do extra check on dbackref offset as
10652 * dbackref offset = file_offset - file_extent_offset
10654 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10655 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10656 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10661 ret = btrfs_next_item(root, &path);
10666 btrfs_release_path(&path);
10667 if (found_count != count) {
10669 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10670 bytenr, len, root_id, objectid, offset, count, found_count);
10671 return REFERENCER_MISSING;
10677 * Check if the referencer of a shared data backref exists
10679 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10680 u64 parent, u64 bytenr)
10682 struct extent_buffer *eb;
10683 struct btrfs_key key;
10684 struct btrfs_file_extent_item *fi;
10685 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10687 int found_parent = 0;
10690 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10691 if (!extent_buffer_uptodate(eb))
10694 nr = btrfs_header_nritems(eb);
10695 for (i = 0; i < nr; i++) {
10696 btrfs_item_key_to_cpu(eb, &key, i);
10697 if (key.type != BTRFS_EXTENT_DATA_KEY)
10700 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10701 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10704 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10711 free_extent_buffer(eb);
10712 if (!found_parent) {
10713 error("shared extent %llu referencer lost (parent: %llu)",
10715 return REFERENCER_MISSING;
10721 * This function will check a given extent item, including its backref and
10722 * itself (like crossing stripe boundary and type)
10724 * Since we don't use extent_record anymore, introduce new error bit
10726 static int check_extent_item(struct btrfs_fs_info *fs_info,
10727 struct extent_buffer *eb, int slot)
10729 struct btrfs_extent_item *ei;
10730 struct btrfs_extent_inline_ref *iref;
10731 struct btrfs_extent_data_ref *dref;
10735 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10736 u32 item_size = btrfs_item_size_nr(eb, slot);
10741 struct btrfs_key key;
10745 btrfs_item_key_to_cpu(eb, &key, slot);
10746 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10747 bytes_used += key.offset;
10749 bytes_used += nodesize;
10751 if (item_size < sizeof(*ei)) {
10753 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10754 * old thing when on disk format is still un-determined.
10755 * No need to care about it anymore
10757 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10761 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10762 flags = btrfs_extent_flags(eb, ei);
10764 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10766 if (metadata && check_crossing_stripes(global_info, key.objectid,
10768 error("bad metadata [%llu, %llu) crossing stripe boundary",
10769 key.objectid, key.objectid + nodesize);
10770 err |= CROSSING_STRIPE_BOUNDARY;
10773 ptr = (unsigned long)(ei + 1);
10775 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10776 /* Old EXTENT_ITEM metadata */
10777 struct btrfs_tree_block_info *info;
10779 info = (struct btrfs_tree_block_info *)ptr;
10780 level = btrfs_tree_block_level(eb, info);
10781 ptr += sizeof(struct btrfs_tree_block_info);
10783 /* New METADATA_ITEM */
10784 level = key.offset;
10786 end = (unsigned long)ei + item_size;
10789 /* Reached extent item end normally */
10793 /* Beyond extent item end, wrong item size */
10795 err |= ITEM_SIZE_MISMATCH;
10796 error("extent item at bytenr %llu slot %d has wrong size",
10801 /* Now check every backref in this extent item */
10802 iref = (struct btrfs_extent_inline_ref *)ptr;
10803 type = btrfs_extent_inline_ref_type(eb, iref);
10804 offset = btrfs_extent_inline_ref_offset(eb, iref);
10806 case BTRFS_TREE_BLOCK_REF_KEY:
10807 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10811 case BTRFS_SHARED_BLOCK_REF_KEY:
10812 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10816 case BTRFS_EXTENT_DATA_REF_KEY:
10817 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10818 ret = check_extent_data_backref(fs_info,
10819 btrfs_extent_data_ref_root(eb, dref),
10820 btrfs_extent_data_ref_objectid(eb, dref),
10821 btrfs_extent_data_ref_offset(eb, dref),
10822 key.objectid, key.offset,
10823 btrfs_extent_data_ref_count(eb, dref));
10826 case BTRFS_SHARED_DATA_REF_KEY:
10827 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10831 error("extent[%llu %d %llu] has unknown ref type: %d",
10832 key.objectid, key.type, key.offset, type);
10833 err |= UNKNOWN_TYPE;
10837 ptr += btrfs_extent_inline_ref_size(type);
10845 * Check if a dev extent item is referred correctly by its chunk
10847 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10848 struct extent_buffer *eb, int slot)
10850 struct btrfs_root *chunk_root = fs_info->chunk_root;
10851 struct btrfs_dev_extent *ptr;
10852 struct btrfs_path path;
10853 struct btrfs_key chunk_key;
10854 struct btrfs_key devext_key;
10855 struct btrfs_chunk *chunk;
10856 struct extent_buffer *l;
10860 int found_chunk = 0;
10863 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10864 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10865 length = btrfs_dev_extent_length(eb, ptr);
10867 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10868 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10869 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10871 btrfs_init_path(&path);
10872 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10877 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10878 if (btrfs_chunk_length(l, chunk) != length)
10881 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10882 for (i = 0; i < num_stripes; i++) {
10883 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10884 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10886 if (devid == devext_key.objectid &&
10887 offset == devext_key.offset) {
10893 btrfs_release_path(&path);
10894 if (!found_chunk) {
10896 "device extent[%llu, %llu, %llu] did not find the related chunk",
10897 devext_key.objectid, devext_key.offset, length);
10898 return REFERENCER_MISSING;
10904 * Check if the used space is correct with the dev item
10906 static int check_dev_item(struct btrfs_fs_info *fs_info,
10907 struct extent_buffer *eb, int slot)
10909 struct btrfs_root *dev_root = fs_info->dev_root;
10910 struct btrfs_dev_item *dev_item;
10911 struct btrfs_path path;
10912 struct btrfs_key key;
10913 struct btrfs_dev_extent *ptr;
10919 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10920 dev_id = btrfs_device_id(eb, dev_item);
10921 used = btrfs_device_bytes_used(eb, dev_item);
10923 key.objectid = dev_id;
10924 key.type = BTRFS_DEV_EXTENT_KEY;
10927 btrfs_init_path(&path);
10928 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10930 btrfs_item_key_to_cpu(eb, &key, slot);
10931 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10932 key.objectid, key.type, key.offset);
10933 btrfs_release_path(&path);
10934 return REFERENCER_MISSING;
10937 /* Iterate dev_extents to calculate the used space of a device */
10939 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10942 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10943 if (key.objectid > dev_id)
10945 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10948 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10949 struct btrfs_dev_extent);
10950 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10952 ret = btrfs_next_item(dev_root, &path);
10956 btrfs_release_path(&path);
10958 if (used != total) {
10959 btrfs_item_key_to_cpu(eb, &key, slot);
10961 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10962 total, used, BTRFS_ROOT_TREE_OBJECTID,
10963 BTRFS_DEV_EXTENT_KEY, dev_id);
10964 return ACCOUNTING_MISMATCH;
10970 * Check a block group item with its referener (chunk) and its used space
10971 * with extent/metadata item
10973 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10974 struct extent_buffer *eb, int slot)
10976 struct btrfs_root *extent_root = fs_info->extent_root;
10977 struct btrfs_root *chunk_root = fs_info->chunk_root;
10978 struct btrfs_block_group_item *bi;
10979 struct btrfs_block_group_item bg_item;
10980 struct btrfs_path path;
10981 struct btrfs_key bg_key;
10982 struct btrfs_key chunk_key;
10983 struct btrfs_key extent_key;
10984 struct btrfs_chunk *chunk;
10985 struct extent_buffer *leaf;
10986 struct btrfs_extent_item *ei;
10987 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10995 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10996 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10997 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10998 used = btrfs_block_group_used(&bg_item);
10999 bg_flags = btrfs_block_group_flags(&bg_item);
11001 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11002 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11003 chunk_key.offset = bg_key.objectid;
11005 btrfs_init_path(&path);
11006 /* Search for the referencer chunk */
11007 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11010 "block group[%llu %llu] did not find the related chunk item",
11011 bg_key.objectid, bg_key.offset);
11012 err |= REFERENCER_MISSING;
11014 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11015 struct btrfs_chunk);
11016 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11019 "block group[%llu %llu] related chunk item length does not match",
11020 bg_key.objectid, bg_key.offset);
11021 err |= REFERENCER_MISMATCH;
11024 btrfs_release_path(&path);
11026 /* Search from the block group bytenr */
11027 extent_key.objectid = bg_key.objectid;
11028 extent_key.type = 0;
11029 extent_key.offset = 0;
11031 btrfs_init_path(&path);
11032 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11036 /* Iterate extent tree to account used space */
11038 leaf = path.nodes[0];
11040 /* Search slot can point to the last item beyond leaf nritems */
11041 if (path.slots[0] >= btrfs_header_nritems(leaf))
11044 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11045 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11048 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11049 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11051 if (extent_key.objectid < bg_key.objectid)
11054 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11057 total += extent_key.offset;
11059 ei = btrfs_item_ptr(leaf, path.slots[0],
11060 struct btrfs_extent_item);
11061 flags = btrfs_extent_flags(leaf, ei);
11062 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11063 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11065 "bad extent[%llu, %llu) type mismatch with chunk",
11066 extent_key.objectid,
11067 extent_key.objectid + extent_key.offset);
11068 err |= CHUNK_TYPE_MISMATCH;
11070 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11071 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11072 BTRFS_BLOCK_GROUP_METADATA))) {
11074 "bad extent[%llu, %llu) type mismatch with chunk",
11075 extent_key.objectid,
11076 extent_key.objectid + nodesize);
11077 err |= CHUNK_TYPE_MISMATCH;
11081 ret = btrfs_next_item(extent_root, &path);
11087 btrfs_release_path(&path);
11089 if (total != used) {
11091 "block group[%llu %llu] used %llu but extent items used %llu",
11092 bg_key.objectid, bg_key.offset, used, total);
11093 err |= ACCOUNTING_MISMATCH;
11099 * Check a chunk item.
11100 * Including checking all referred dev_extents and block group
11102 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11103 struct extent_buffer *eb, int slot)
11105 struct btrfs_root *extent_root = fs_info->extent_root;
11106 struct btrfs_root *dev_root = fs_info->dev_root;
11107 struct btrfs_path path;
11108 struct btrfs_key chunk_key;
11109 struct btrfs_key bg_key;
11110 struct btrfs_key devext_key;
11111 struct btrfs_chunk *chunk;
11112 struct extent_buffer *leaf;
11113 struct btrfs_block_group_item *bi;
11114 struct btrfs_block_group_item bg_item;
11115 struct btrfs_dev_extent *ptr;
11116 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11128 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11129 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11130 length = btrfs_chunk_length(eb, chunk);
11131 chunk_end = chunk_key.offset + length;
11132 if (!IS_ALIGNED(length, sectorsize)) {
11133 error("chunk[%llu %llu) not aligned to %u",
11134 chunk_key.offset, chunk_end, sectorsize);
11135 err |= BYTES_UNALIGNED;
11139 type = btrfs_chunk_type(eb, chunk);
11140 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11141 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11142 error("chunk[%llu %llu) has no chunk type",
11143 chunk_key.offset, chunk_end);
11144 err |= UNKNOWN_TYPE;
11146 if (profile && (profile & (profile - 1))) {
11147 error("chunk[%llu %llu) multiple profiles detected: %llx",
11148 chunk_key.offset, chunk_end, profile);
11149 err |= UNKNOWN_TYPE;
11152 bg_key.objectid = chunk_key.offset;
11153 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11154 bg_key.offset = length;
11156 btrfs_init_path(&path);
11157 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11160 "chunk[%llu %llu) did not find the related block group item",
11161 chunk_key.offset, chunk_end);
11162 err |= REFERENCER_MISSING;
11164 leaf = path.nodes[0];
11165 bi = btrfs_item_ptr(leaf, path.slots[0],
11166 struct btrfs_block_group_item);
11167 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11169 if (btrfs_block_group_flags(&bg_item) != type) {
11171 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11172 chunk_key.offset, chunk_end, type,
11173 btrfs_block_group_flags(&bg_item));
11174 err |= REFERENCER_MISSING;
11178 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11179 for (i = 0; i < num_stripes; i++) {
11180 btrfs_release_path(&path);
11181 btrfs_init_path(&path);
11182 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11183 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11184 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11186 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11189 goto not_match_dev;
11191 leaf = path.nodes[0];
11192 ptr = btrfs_item_ptr(leaf, path.slots[0],
11193 struct btrfs_dev_extent);
11194 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11195 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11196 if (objectid != chunk_key.objectid ||
11197 offset != chunk_key.offset ||
11198 btrfs_dev_extent_length(leaf, ptr) != length)
11199 goto not_match_dev;
11202 err |= BACKREF_MISSING;
11204 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11205 chunk_key.objectid, chunk_end, i);
11208 btrfs_release_path(&path);
11214 * Main entry function to check known items and update related accounting info
11216 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11218 struct btrfs_fs_info *fs_info = root->fs_info;
11219 struct btrfs_key key;
11222 struct btrfs_extent_data_ref *dref;
11227 btrfs_item_key_to_cpu(eb, &key, slot);
11231 case BTRFS_EXTENT_DATA_KEY:
11232 ret = check_extent_data_item(root, eb, slot);
11235 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11236 ret = check_block_group_item(fs_info, eb, slot);
11239 case BTRFS_DEV_ITEM_KEY:
11240 ret = check_dev_item(fs_info, eb, slot);
11243 case BTRFS_CHUNK_ITEM_KEY:
11244 ret = check_chunk_item(fs_info, eb, slot);
11247 case BTRFS_DEV_EXTENT_KEY:
11248 ret = check_dev_extent_item(fs_info, eb, slot);
11251 case BTRFS_EXTENT_ITEM_KEY:
11252 case BTRFS_METADATA_ITEM_KEY:
11253 ret = check_extent_item(fs_info, eb, slot);
11256 case BTRFS_EXTENT_CSUM_KEY:
11257 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11259 case BTRFS_TREE_BLOCK_REF_KEY:
11260 ret = check_tree_block_backref(fs_info, key.offset,
11264 case BTRFS_EXTENT_DATA_REF_KEY:
11265 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11266 ret = check_extent_data_backref(fs_info,
11267 btrfs_extent_data_ref_root(eb, dref),
11268 btrfs_extent_data_ref_objectid(eb, dref),
11269 btrfs_extent_data_ref_offset(eb, dref),
11271 btrfs_extent_data_ref_count(eb, dref));
11274 case BTRFS_SHARED_BLOCK_REF_KEY:
11275 ret = check_shared_block_backref(fs_info, key.offset,
11279 case BTRFS_SHARED_DATA_REF_KEY:
11280 ret = check_shared_data_backref(fs_info, key.offset,
11288 if (++slot < btrfs_header_nritems(eb))
11295 * Helper function for later fs/subvol tree check. To determine if a tree
11296 * block should be checked.
11297 * This function will ensure only the direct referencer with lowest rootid to
11298 * check a fs/subvolume tree block.
11300 * Backref check at extent tree would detect errors like missing subvolume
11301 * tree, so we can do aggressive check to reduce duplicated checks.
11303 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11305 struct btrfs_root *extent_root = root->fs_info->extent_root;
11306 struct btrfs_key key;
11307 struct btrfs_path path;
11308 struct extent_buffer *leaf;
11310 struct btrfs_extent_item *ei;
11316 struct btrfs_extent_inline_ref *iref;
11319 btrfs_init_path(&path);
11320 key.objectid = btrfs_header_bytenr(eb);
11321 key.type = BTRFS_METADATA_ITEM_KEY;
11322 key.offset = (u64)-1;
11325 * Any failure in backref resolving means we can't determine
11326 * whom the tree block belongs to.
11327 * So in that case, we need to check that tree block
11329 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11333 ret = btrfs_previous_extent_item(extent_root, &path,
11334 btrfs_header_bytenr(eb));
11338 leaf = path.nodes[0];
11339 slot = path.slots[0];
11340 btrfs_item_key_to_cpu(leaf, &key, slot);
11341 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11343 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11344 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11346 struct btrfs_tree_block_info *info;
11348 info = (struct btrfs_tree_block_info *)(ei + 1);
11349 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11352 item_size = btrfs_item_size_nr(leaf, slot);
11353 ptr = (unsigned long)iref;
11354 end = (unsigned long)ei + item_size;
11355 while (ptr < end) {
11356 iref = (struct btrfs_extent_inline_ref *)ptr;
11357 type = btrfs_extent_inline_ref_type(leaf, iref);
11358 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11361 * We only check the tree block if current root is
11362 * the lowest referencer of it.
11364 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11365 offset < root->objectid) {
11366 btrfs_release_path(&path);
11370 ptr += btrfs_extent_inline_ref_size(type);
11373 * Normally we should also check keyed tree block ref, but that may be
11374 * very time consuming. Inlined ref should already make us skip a lot
11375 * of refs now. So skip search keyed tree block ref.
11379 btrfs_release_path(&path);
11384 * Traversal function for tree block. We will do:
11385 * 1) Skip shared fs/subvolume tree blocks
11386 * 2) Update related bytes accounting
11387 * 3) Pre-order traversal
11389 static int traverse_tree_block(struct btrfs_root *root,
11390 struct extent_buffer *node)
11392 struct extent_buffer *eb;
11393 struct btrfs_key key;
11394 struct btrfs_key drop_key;
11402 * Skip shared fs/subvolume tree block, in that case they will
11403 * be checked by referencer with lowest rootid
11405 if (is_fstree(root->objectid) && !should_check(root, node))
11408 /* Update bytes accounting */
11409 total_btree_bytes += node->len;
11410 if (fs_root_objectid(btrfs_header_owner(node)))
11411 total_fs_tree_bytes += node->len;
11412 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11413 total_extent_tree_bytes += node->len;
11414 if (!found_old_backref &&
11415 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11416 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11417 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11418 found_old_backref = 1;
11420 /* pre-order tranversal, check itself first */
11421 level = btrfs_header_level(node);
11422 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11423 btrfs_header_level(node),
11424 btrfs_header_owner(node));
11428 "check %s failed root %llu bytenr %llu level %d, force continue check",
11429 level ? "node":"leaf", root->objectid,
11430 btrfs_header_bytenr(node), btrfs_header_level(node));
11433 btree_space_waste += btrfs_leaf_free_space(root, node);
11434 ret = check_leaf_items(root, node);
11439 nr = btrfs_header_nritems(node);
11440 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11441 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11442 sizeof(struct btrfs_key_ptr);
11444 /* Then check all its children */
11445 for (i = 0; i < nr; i++) {
11446 u64 blocknr = btrfs_node_blockptr(node, i);
11448 btrfs_node_key_to_cpu(node, &key, i);
11449 if (level == root->root_item.drop_level &&
11450 is_dropped_key(&key, &drop_key))
11454 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11455 * to call the function itself.
11457 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11458 if (extent_buffer_uptodate(eb)) {
11459 ret = traverse_tree_block(root, eb);
11462 free_extent_buffer(eb);
11469 * Low memory usage version check_chunks_and_extents.
11471 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11473 struct btrfs_path path;
11474 struct btrfs_key key;
11475 struct btrfs_root *root1;
11476 struct btrfs_root *cur_root;
11480 root1 = root->fs_info->chunk_root;
11481 ret = traverse_tree_block(root1, root1->node);
11484 root1 = root->fs_info->tree_root;
11485 ret = traverse_tree_block(root1, root1->node);
11488 btrfs_init_path(&path);
11489 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11491 key.type = BTRFS_ROOT_ITEM_KEY;
11493 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11495 error("cannot find extent treet in tree_root");
11500 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11501 if (key.type != BTRFS_ROOT_ITEM_KEY)
11503 key.offset = (u64)-1;
11505 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11506 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11509 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11510 if (IS_ERR(cur_root) || !cur_root) {
11511 error("failed to read tree: %lld", key.objectid);
11515 ret = traverse_tree_block(cur_root, cur_root->node);
11518 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11519 btrfs_free_fs_root(cur_root);
11521 ret = btrfs_next_item(root1, &path);
11527 btrfs_release_path(&path);
11531 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11532 struct btrfs_root *root, int overwrite)
11534 struct extent_buffer *c;
11535 struct extent_buffer *old = root->node;
11538 struct btrfs_disk_key disk_key = {0,0,0};
11544 extent_buffer_get(c);
11547 c = btrfs_alloc_free_block(trans, root,
11549 root->root_key.objectid,
11550 &disk_key, level, 0, 0);
11553 extent_buffer_get(c);
11557 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11558 btrfs_set_header_level(c, level);
11559 btrfs_set_header_bytenr(c, c->start);
11560 btrfs_set_header_generation(c, trans->transid);
11561 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11562 btrfs_set_header_owner(c, root->root_key.objectid);
11564 write_extent_buffer(c, root->fs_info->fsid,
11565 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11567 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11568 btrfs_header_chunk_tree_uuid(c),
11571 btrfs_mark_buffer_dirty(c);
11573 * this case can happen in the following case:
11575 * 1.overwrite previous root.
11577 * 2.reinit reloc data root, this is because we skip pin
11578 * down reloc data tree before which means we can allocate
11579 * same block bytenr here.
11581 if (old->start == c->start) {
11582 btrfs_set_root_generation(&root->root_item,
11584 root->root_item.level = btrfs_header_level(root->node);
11585 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11586 &root->root_key, &root->root_item);
11588 free_extent_buffer(c);
11592 free_extent_buffer(old);
11594 add_root_to_dirty_list(root);
11598 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11599 struct extent_buffer *eb, int tree_root)
11601 struct extent_buffer *tmp;
11602 struct btrfs_root_item *ri;
11603 struct btrfs_key key;
11606 int level = btrfs_header_level(eb);
11612 * If we have pinned this block before, don't pin it again.
11613 * This can not only avoid forever loop with broken filesystem
11614 * but also give us some speedups.
11616 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11617 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11620 btrfs_pin_extent(fs_info, eb->start, eb->len);
11622 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11623 nritems = btrfs_header_nritems(eb);
11624 for (i = 0; i < nritems; i++) {
11626 btrfs_item_key_to_cpu(eb, &key, i);
11627 if (key.type != BTRFS_ROOT_ITEM_KEY)
11629 /* Skip the extent root and reloc roots */
11630 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11631 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11632 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11634 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11635 bytenr = btrfs_disk_root_bytenr(eb, ri);
11638 * If at any point we start needing the real root we
11639 * will have to build a stump root for the root we are
11640 * in, but for now this doesn't actually use the root so
11641 * just pass in extent_root.
11643 tmp = read_tree_block(fs_info->extent_root, bytenr,
11645 if (!extent_buffer_uptodate(tmp)) {
11646 fprintf(stderr, "Error reading root block\n");
11649 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11650 free_extent_buffer(tmp);
11654 bytenr = btrfs_node_blockptr(eb, i);
11656 /* If we aren't the tree root don't read the block */
11657 if (level == 1 && !tree_root) {
11658 btrfs_pin_extent(fs_info, bytenr, nodesize);
11662 tmp = read_tree_block(fs_info->extent_root, bytenr,
11664 if (!extent_buffer_uptodate(tmp)) {
11665 fprintf(stderr, "Error reading tree block\n");
11668 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11669 free_extent_buffer(tmp);
11678 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11682 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11686 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11689 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11691 struct btrfs_block_group_cache *cache;
11692 struct btrfs_path path;
11693 struct extent_buffer *leaf;
11694 struct btrfs_chunk *chunk;
11695 struct btrfs_key key;
11699 btrfs_init_path(&path);
11701 key.type = BTRFS_CHUNK_ITEM_KEY;
11703 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11705 btrfs_release_path(&path);
11710 * We do this in case the block groups were screwed up and had alloc
11711 * bits that aren't actually set on the chunks. This happens with
11712 * restored images every time and could happen in real life I guess.
11714 fs_info->avail_data_alloc_bits = 0;
11715 fs_info->avail_metadata_alloc_bits = 0;
11716 fs_info->avail_system_alloc_bits = 0;
11718 /* First we need to create the in-memory block groups */
11720 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11721 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11723 btrfs_release_path(&path);
11731 leaf = path.nodes[0];
11732 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11733 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11738 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11739 btrfs_add_block_group(fs_info, 0,
11740 btrfs_chunk_type(leaf, chunk),
11741 key.objectid, key.offset,
11742 btrfs_chunk_length(leaf, chunk));
11743 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11744 key.offset + btrfs_chunk_length(leaf, chunk));
11749 cache = btrfs_lookup_first_block_group(fs_info, start);
11753 start = cache->key.objectid + cache->key.offset;
11756 btrfs_release_path(&path);
11760 static int reset_balance(struct btrfs_trans_handle *trans,
11761 struct btrfs_fs_info *fs_info)
11763 struct btrfs_root *root = fs_info->tree_root;
11764 struct btrfs_path path;
11765 struct extent_buffer *leaf;
11766 struct btrfs_key key;
11767 int del_slot, del_nr = 0;
11771 btrfs_init_path(&path);
11772 key.objectid = BTRFS_BALANCE_OBJECTID;
11773 key.type = BTRFS_BALANCE_ITEM_KEY;
11775 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11780 goto reinit_data_reloc;
11785 ret = btrfs_del_item(trans, root, &path);
11788 btrfs_release_path(&path);
11790 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11791 key.type = BTRFS_ROOT_ITEM_KEY;
11793 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11797 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11802 ret = btrfs_del_items(trans, root, &path,
11809 btrfs_release_path(&path);
11812 ret = btrfs_search_slot(trans, root, &key, &path,
11819 leaf = path.nodes[0];
11820 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11821 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11823 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11828 del_slot = path.slots[0];
11837 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11841 btrfs_release_path(&path);
11844 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11845 key.type = BTRFS_ROOT_ITEM_KEY;
11846 key.offset = (u64)-1;
11847 root = btrfs_read_fs_root(fs_info, &key);
11848 if (IS_ERR(root)) {
11849 fprintf(stderr, "Error reading data reloc tree\n");
11850 ret = PTR_ERR(root);
11853 record_root_in_trans(trans, root);
11854 ret = btrfs_fsck_reinit_root(trans, root, 0);
11857 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11859 btrfs_release_path(&path);
11863 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11864 struct btrfs_fs_info *fs_info)
11870 * The only reason we don't do this is because right now we're just
11871 * walking the trees we find and pinning down their bytes, we don't look
11872 * at any of the leaves. In order to do mixed groups we'd have to check
11873 * the leaves of any fs roots and pin down the bytes for any file
11874 * extents we find. Not hard but why do it if we don't have to?
11876 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11877 fprintf(stderr, "We don't support re-initing the extent tree "
11878 "for mixed block groups yet, please notify a btrfs "
11879 "developer you want to do this so they can add this "
11880 "functionality.\n");
11885 * first we need to walk all of the trees except the extent tree and pin
11886 * down the bytes that are in use so we don't overwrite any existing
11889 ret = pin_metadata_blocks(fs_info);
11891 fprintf(stderr, "error pinning down used bytes\n");
11896 * Need to drop all the block groups since we're going to recreate all
11899 btrfs_free_block_groups(fs_info);
11900 ret = reset_block_groups(fs_info);
11902 fprintf(stderr, "error resetting the block groups\n");
11906 /* Ok we can allocate now, reinit the extent root */
11907 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11909 fprintf(stderr, "extent root initialization failed\n");
11911 * When the transaction code is updated we should end the
11912 * transaction, but for now progs only knows about commit so
11913 * just return an error.
11919 * Now we have all the in-memory block groups setup so we can make
11920 * allocations properly, and the metadata we care about is safe since we
11921 * pinned all of it above.
11924 struct btrfs_block_group_cache *cache;
11926 cache = btrfs_lookup_first_block_group(fs_info, start);
11929 start = cache->key.objectid + cache->key.offset;
11930 ret = btrfs_insert_item(trans, fs_info->extent_root,
11931 &cache->key, &cache->item,
11932 sizeof(cache->item));
11934 fprintf(stderr, "Error adding block group\n");
11937 btrfs_extent_post_op(trans, fs_info->extent_root);
11940 ret = reset_balance(trans, fs_info);
11942 fprintf(stderr, "error resetting the pending balance\n");
11947 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11949 struct btrfs_path path;
11950 struct btrfs_trans_handle *trans;
11951 struct btrfs_key key;
11954 printf("Recowing metadata block %llu\n", eb->start);
11955 key.objectid = btrfs_header_owner(eb);
11956 key.type = BTRFS_ROOT_ITEM_KEY;
11957 key.offset = (u64)-1;
11959 root = btrfs_read_fs_root(root->fs_info, &key);
11960 if (IS_ERR(root)) {
11961 fprintf(stderr, "Couldn't find owner root %llu\n",
11963 return PTR_ERR(root);
11966 trans = btrfs_start_transaction(root, 1);
11968 return PTR_ERR(trans);
11970 btrfs_init_path(&path);
11971 path.lowest_level = btrfs_header_level(eb);
11972 if (path.lowest_level)
11973 btrfs_node_key_to_cpu(eb, &key, 0);
11975 btrfs_item_key_to_cpu(eb, &key, 0);
11977 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11978 btrfs_commit_transaction(trans, root);
11979 btrfs_release_path(&path);
11983 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11985 struct btrfs_path path;
11986 struct btrfs_trans_handle *trans;
11987 struct btrfs_key key;
11990 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11991 bad->key.type, bad->key.offset);
11992 key.objectid = bad->root_id;
11993 key.type = BTRFS_ROOT_ITEM_KEY;
11994 key.offset = (u64)-1;
11996 root = btrfs_read_fs_root(root->fs_info, &key);
11997 if (IS_ERR(root)) {
11998 fprintf(stderr, "Couldn't find owner root %llu\n",
12000 return PTR_ERR(root);
12003 trans = btrfs_start_transaction(root, 1);
12005 return PTR_ERR(trans);
12007 btrfs_init_path(&path);
12008 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12014 ret = btrfs_del_item(trans, root, &path);
12016 btrfs_commit_transaction(trans, root);
12017 btrfs_release_path(&path);
12021 static int zero_log_tree(struct btrfs_root *root)
12023 struct btrfs_trans_handle *trans;
12026 trans = btrfs_start_transaction(root, 1);
12027 if (IS_ERR(trans)) {
12028 ret = PTR_ERR(trans);
12031 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12032 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12033 ret = btrfs_commit_transaction(trans, root);
12037 static int populate_csum(struct btrfs_trans_handle *trans,
12038 struct btrfs_root *csum_root, char *buf, u64 start,
12045 while (offset < len) {
12046 sectorsize = csum_root->sectorsize;
12047 ret = read_extent_data(csum_root, buf, start + offset,
12051 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12052 start + offset, buf, sectorsize);
12055 offset += sectorsize;
12060 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12061 struct btrfs_root *csum_root,
12062 struct btrfs_root *cur_root)
12064 struct btrfs_path path;
12065 struct btrfs_key key;
12066 struct extent_buffer *node;
12067 struct btrfs_file_extent_item *fi;
12074 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12078 btrfs_init_path(&path);
12082 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12085 /* Iterate all regular file extents and fill its csum */
12087 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12089 if (key.type != BTRFS_EXTENT_DATA_KEY)
12091 node = path.nodes[0];
12092 slot = path.slots[0];
12093 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12094 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12096 start = btrfs_file_extent_disk_bytenr(node, fi);
12097 len = btrfs_file_extent_disk_num_bytes(node, fi);
12099 ret = populate_csum(trans, csum_root, buf, start, len);
12100 if (ret == -EEXIST)
12106 * TODO: if next leaf is corrupted, jump to nearest next valid
12109 ret = btrfs_next_item(cur_root, &path);
12119 btrfs_release_path(&path);
12124 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12125 struct btrfs_root *csum_root)
12127 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12128 struct btrfs_path path;
12129 struct btrfs_root *tree_root = fs_info->tree_root;
12130 struct btrfs_root *cur_root;
12131 struct extent_buffer *node;
12132 struct btrfs_key key;
12136 btrfs_init_path(&path);
12137 key.objectid = BTRFS_FS_TREE_OBJECTID;
12139 key.type = BTRFS_ROOT_ITEM_KEY;
12140 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12149 node = path.nodes[0];
12150 slot = path.slots[0];
12151 btrfs_item_key_to_cpu(node, &key, slot);
12152 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12154 if (key.type != BTRFS_ROOT_ITEM_KEY)
12156 if (!is_fstree(key.objectid))
12158 key.offset = (u64)-1;
12160 cur_root = btrfs_read_fs_root(fs_info, &key);
12161 if (IS_ERR(cur_root) || !cur_root) {
12162 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12166 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12171 ret = btrfs_next_item(tree_root, &path);
12181 btrfs_release_path(&path);
12185 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12186 struct btrfs_root *csum_root)
12188 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12189 struct btrfs_path path;
12190 struct btrfs_extent_item *ei;
12191 struct extent_buffer *leaf;
12193 struct btrfs_key key;
12196 btrfs_init_path(&path);
12198 key.type = BTRFS_EXTENT_ITEM_KEY;
12200 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12202 btrfs_release_path(&path);
12206 buf = malloc(csum_root->sectorsize);
12208 btrfs_release_path(&path);
12213 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12214 ret = btrfs_next_leaf(extent_root, &path);
12222 leaf = path.nodes[0];
12224 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12225 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12230 ei = btrfs_item_ptr(leaf, path.slots[0],
12231 struct btrfs_extent_item);
12232 if (!(btrfs_extent_flags(leaf, ei) &
12233 BTRFS_EXTENT_FLAG_DATA)) {
12238 ret = populate_csum(trans, csum_root, buf, key.objectid,
12245 btrfs_release_path(&path);
12251 * Recalculate the csum and put it into the csum tree.
12253 * Extent tree init will wipe out all the extent info, so in that case, we
12254 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12255 * will use fs/subvol trees to init the csum tree.
12257 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12258 struct btrfs_root *csum_root,
12259 int search_fs_tree)
12261 if (search_fs_tree)
12262 return fill_csum_tree_from_fs(trans, csum_root);
12264 return fill_csum_tree_from_extent(trans, csum_root);
12267 static void free_roots_info_cache(void)
12269 if (!roots_info_cache)
12272 while (!cache_tree_empty(roots_info_cache)) {
12273 struct cache_extent *entry;
12274 struct root_item_info *rii;
12276 entry = first_cache_extent(roots_info_cache);
12279 remove_cache_extent(roots_info_cache, entry);
12280 rii = container_of(entry, struct root_item_info, cache_extent);
12284 free(roots_info_cache);
12285 roots_info_cache = NULL;
12288 static int build_roots_info_cache(struct btrfs_fs_info *info)
12291 struct btrfs_key key;
12292 struct extent_buffer *leaf;
12293 struct btrfs_path path;
12295 if (!roots_info_cache) {
12296 roots_info_cache = malloc(sizeof(*roots_info_cache));
12297 if (!roots_info_cache)
12299 cache_tree_init(roots_info_cache);
12302 btrfs_init_path(&path);
12304 key.type = BTRFS_EXTENT_ITEM_KEY;
12306 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12309 leaf = path.nodes[0];
12312 struct btrfs_key found_key;
12313 struct btrfs_extent_item *ei;
12314 struct btrfs_extent_inline_ref *iref;
12315 int slot = path.slots[0];
12320 struct cache_extent *entry;
12321 struct root_item_info *rii;
12323 if (slot >= btrfs_header_nritems(leaf)) {
12324 ret = btrfs_next_leaf(info->extent_root, &path);
12331 leaf = path.nodes[0];
12332 slot = path.slots[0];
12335 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12337 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12338 found_key.type != BTRFS_METADATA_ITEM_KEY)
12341 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12342 flags = btrfs_extent_flags(leaf, ei);
12344 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12345 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12348 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12349 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12350 level = found_key.offset;
12352 struct btrfs_tree_block_info *binfo;
12354 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12355 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12356 level = btrfs_tree_block_level(leaf, binfo);
12360 * For a root extent, it must be of the following type and the
12361 * first (and only one) iref in the item.
12363 type = btrfs_extent_inline_ref_type(leaf, iref);
12364 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12367 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12368 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12370 rii = malloc(sizeof(struct root_item_info));
12375 rii->cache_extent.start = root_id;
12376 rii->cache_extent.size = 1;
12377 rii->level = (u8)-1;
12378 entry = &rii->cache_extent;
12379 ret = insert_cache_extent(roots_info_cache, entry);
12382 rii = container_of(entry, struct root_item_info,
12386 ASSERT(rii->cache_extent.start == root_id);
12387 ASSERT(rii->cache_extent.size == 1);
12389 if (level > rii->level || rii->level == (u8)-1) {
12390 rii->level = level;
12391 rii->bytenr = found_key.objectid;
12392 rii->gen = btrfs_extent_generation(leaf, ei);
12393 rii->node_count = 1;
12394 } else if (level == rii->level) {
12402 btrfs_release_path(&path);
12407 static int maybe_repair_root_item(struct btrfs_path *path,
12408 const struct btrfs_key *root_key,
12409 const int read_only_mode)
12411 const u64 root_id = root_key->objectid;
12412 struct cache_extent *entry;
12413 struct root_item_info *rii;
12414 struct btrfs_root_item ri;
12415 unsigned long offset;
12417 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12420 "Error: could not find extent items for root %llu\n",
12421 root_key->objectid);
12425 rii = container_of(entry, struct root_item_info, cache_extent);
12426 ASSERT(rii->cache_extent.start == root_id);
12427 ASSERT(rii->cache_extent.size == 1);
12429 if (rii->node_count != 1) {
12431 "Error: could not find btree root extent for root %llu\n",
12436 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12437 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12439 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12440 btrfs_root_level(&ri) != rii->level ||
12441 btrfs_root_generation(&ri) != rii->gen) {
12444 * If we're in repair mode but our caller told us to not update
12445 * the root item, i.e. just check if it needs to be updated, don't
12446 * print this message, since the caller will call us again shortly
12447 * for the same root item without read only mode (the caller will
12448 * open a transaction first).
12450 if (!(read_only_mode && repair))
12452 "%sroot item for root %llu,"
12453 " current bytenr %llu, current gen %llu, current level %u,"
12454 " new bytenr %llu, new gen %llu, new level %u\n",
12455 (read_only_mode ? "" : "fixing "),
12457 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12458 btrfs_root_level(&ri),
12459 rii->bytenr, rii->gen, rii->level);
12461 if (btrfs_root_generation(&ri) > rii->gen) {
12463 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12464 root_id, btrfs_root_generation(&ri), rii->gen);
12468 if (!read_only_mode) {
12469 btrfs_set_root_bytenr(&ri, rii->bytenr);
12470 btrfs_set_root_level(&ri, rii->level);
12471 btrfs_set_root_generation(&ri, rii->gen);
12472 write_extent_buffer(path->nodes[0], &ri,
12473 offset, sizeof(ri));
12483 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12484 * caused read-only snapshots to be corrupted if they were created at a moment
12485 * when the source subvolume/snapshot had orphan items. The issue was that the
12486 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12487 * node instead of the post orphan cleanup root node.
12488 * So this function, and its callees, just detects and fixes those cases. Even
12489 * though the regression was for read-only snapshots, this function applies to
12490 * any snapshot/subvolume root.
12491 * This must be run before any other repair code - not doing it so, makes other
12492 * repair code delete or modify backrefs in the extent tree for example, which
12493 * will result in an inconsistent fs after repairing the root items.
12495 static int repair_root_items(struct btrfs_fs_info *info)
12497 struct btrfs_path path;
12498 struct btrfs_key key;
12499 struct extent_buffer *leaf;
12500 struct btrfs_trans_handle *trans = NULL;
12503 int need_trans = 0;
12505 btrfs_init_path(&path);
12507 ret = build_roots_info_cache(info);
12511 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12512 key.type = BTRFS_ROOT_ITEM_KEY;
12517 * Avoid opening and committing transactions if a leaf doesn't have
12518 * any root items that need to be fixed, so that we avoid rotating
12519 * backup roots unnecessarily.
12522 trans = btrfs_start_transaction(info->tree_root, 1);
12523 if (IS_ERR(trans)) {
12524 ret = PTR_ERR(trans);
12529 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12533 leaf = path.nodes[0];
12536 struct btrfs_key found_key;
12538 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12539 int no_more_keys = find_next_key(&path, &key);
12541 btrfs_release_path(&path);
12543 ret = btrfs_commit_transaction(trans,
12555 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12557 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12559 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12562 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12566 if (!trans && repair) {
12569 btrfs_release_path(&path);
12579 free_roots_info_cache();
12580 btrfs_release_path(&path);
12582 btrfs_commit_transaction(trans, info->tree_root);
12589 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12591 struct btrfs_trans_handle *trans;
12592 struct btrfs_block_group_cache *bg_cache;
12596 /* Clear all free space cache inodes and its extent data */
12598 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12601 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12604 current = bg_cache->key.objectid + bg_cache->key.offset;
12607 /* Don't forget to set cache_generation to -1 */
12608 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12609 if (IS_ERR(trans)) {
12610 error("failed to update super block cache generation");
12611 return PTR_ERR(trans);
12613 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12614 btrfs_commit_transaction(trans, fs_info->tree_root);
12619 const char * const cmd_check_usage[] = {
12620 "btrfs check [options] <device>",
12621 "Check structural integrity of a filesystem (unmounted).",
12622 "Check structural integrity of an unmounted filesystem. Verify internal",
12623 "trees' consistency and item connectivity. In the repair mode try to",
12624 "fix the problems found. ",
12625 "WARNING: the repair mode is considered dangerous",
12627 "-s|--super <superblock> use this superblock copy",
12628 "-b|--backup use the first valid backup root copy",
12629 "--repair try to repair the filesystem",
12630 "--readonly run in read-only mode (default)",
12631 "--init-csum-tree create a new CRC tree",
12632 "--init-extent-tree create a new extent tree",
12633 "--mode <MODE> allows choice of memory/IO trade-offs",
12634 " where MODE is one of:",
12635 " original - read inodes and extents to memory (requires",
12636 " more memory, does less IO)",
12637 " lowmem - try to use less memory but read blocks again",
12639 "--check-data-csum verify checksums of data blocks",
12640 "-Q|--qgroup-report print a report on qgroup consistency",
12641 "-E|--subvol-extents <subvolid>",
12642 " print subvolume extents and sharing state",
12643 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12644 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12645 "-p|--progress indicate progress",
12646 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12650 int cmd_check(int argc, char **argv)
12652 struct cache_tree root_cache;
12653 struct btrfs_root *root;
12654 struct btrfs_fs_info *info;
12657 u64 tree_root_bytenr = 0;
12658 u64 chunk_root_bytenr = 0;
12659 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12663 int init_csum_tree = 0;
12665 int clear_space_cache = 0;
12666 int qgroup_report = 0;
12667 int qgroups_repaired = 0;
12668 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12672 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12673 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12674 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12675 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12676 static const struct option long_options[] = {
12677 { "super", required_argument, NULL, 's' },
12678 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12679 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12680 { "init-csum-tree", no_argument, NULL,
12681 GETOPT_VAL_INIT_CSUM },
12682 { "init-extent-tree", no_argument, NULL,
12683 GETOPT_VAL_INIT_EXTENT },
12684 { "check-data-csum", no_argument, NULL,
12685 GETOPT_VAL_CHECK_CSUM },
12686 { "backup", no_argument, NULL, 'b' },
12687 { "subvol-extents", required_argument, NULL, 'E' },
12688 { "qgroup-report", no_argument, NULL, 'Q' },
12689 { "tree-root", required_argument, NULL, 'r' },
12690 { "chunk-root", required_argument, NULL,
12691 GETOPT_VAL_CHUNK_TREE },
12692 { "progress", no_argument, NULL, 'p' },
12693 { "mode", required_argument, NULL,
12695 { "clear-space-cache", required_argument, NULL,
12696 GETOPT_VAL_CLEAR_SPACE_CACHE},
12697 { NULL, 0, NULL, 0}
12700 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12704 case 'a': /* ignored */ break;
12706 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12709 num = arg_strtou64(optarg);
12710 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12712 "super mirror should be less than %d",
12713 BTRFS_SUPER_MIRROR_MAX);
12716 bytenr = btrfs_sb_offset(((int)num));
12717 printf("using SB copy %llu, bytenr %llu\n", num,
12718 (unsigned long long)bytenr);
12724 subvolid = arg_strtou64(optarg);
12727 tree_root_bytenr = arg_strtou64(optarg);
12729 case GETOPT_VAL_CHUNK_TREE:
12730 chunk_root_bytenr = arg_strtou64(optarg);
12733 ctx.progress_enabled = true;
12737 usage(cmd_check_usage);
12738 case GETOPT_VAL_REPAIR:
12739 printf("enabling repair mode\n");
12741 ctree_flags |= OPEN_CTREE_WRITES;
12743 case GETOPT_VAL_READONLY:
12746 case GETOPT_VAL_INIT_CSUM:
12747 printf("Creating a new CRC tree\n");
12748 init_csum_tree = 1;
12750 ctree_flags |= OPEN_CTREE_WRITES;
12752 case GETOPT_VAL_INIT_EXTENT:
12753 init_extent_tree = 1;
12754 ctree_flags |= (OPEN_CTREE_WRITES |
12755 OPEN_CTREE_NO_BLOCK_GROUPS);
12758 case GETOPT_VAL_CHECK_CSUM:
12759 check_data_csum = 1;
12761 case GETOPT_VAL_MODE:
12762 check_mode = parse_check_mode(optarg);
12763 if (check_mode == CHECK_MODE_UNKNOWN) {
12764 error("unknown mode: %s", optarg);
12768 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12769 if (strcmp(optarg, "v1") == 0) {
12770 clear_space_cache = 1;
12771 } else if (strcmp(optarg, "v2") == 0) {
12772 clear_space_cache = 2;
12773 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12776 "invalid argument to --clear-space-cache, must be v1 or v2");
12779 ctree_flags |= OPEN_CTREE_WRITES;
12784 if (check_argc_exact(argc - optind, 1))
12785 usage(cmd_check_usage);
12787 if (ctx.progress_enabled) {
12788 ctx.tp = TASK_NOTHING;
12789 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12792 /* This check is the only reason for --readonly to exist */
12793 if (readonly && repair) {
12794 error("repair options are not compatible with --readonly");
12799 * Not supported yet
12801 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12802 error("low memory mode doesn't support repair yet");
12807 cache_tree_init(&root_cache);
12809 if((ret = check_mounted(argv[optind])) < 0) {
12810 error("could not check mount status: %s", strerror(-ret));
12814 error("%s is currently mounted, aborting", argv[optind]);
12820 /* only allow partial opening under repair mode */
12822 ctree_flags |= OPEN_CTREE_PARTIAL;
12824 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12825 chunk_root_bytenr, ctree_flags);
12827 error("cannot open file system");
12833 global_info = info;
12834 root = info->fs_root;
12835 if (clear_space_cache == 1) {
12836 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12838 "free space cache v2 detected, use --clear-space-cache v2");
12842 printf("Clearing free space cache\n");
12843 ret = clear_free_space_cache(info);
12845 error("failed to clear free space cache");
12848 printf("Free space cache cleared\n");
12851 } else if (clear_space_cache == 2) {
12852 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12853 printf("no free space cache v2 to clear\n");
12857 printf("Clear free space cache v2\n");
12858 ret = btrfs_clear_free_space_tree(info);
12860 error("failed to clear free space cache v2: %d", ret);
12863 printf("free space cache v2 cleared\n");
12869 * repair mode will force us to commit transaction which
12870 * will make us fail to load log tree when mounting.
12872 if (repair && btrfs_super_log_root(info->super_copy)) {
12873 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12879 ret = zero_log_tree(root);
12882 error("failed to zero log tree: %d", ret);
12887 uuid_unparse(info->super_copy->fsid, uuidbuf);
12888 if (qgroup_report) {
12889 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12891 ret = qgroup_verify_all(info);
12898 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12899 subvolid, argv[optind], uuidbuf);
12900 ret = print_extent_state(info, subvolid);
12904 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12906 if (!extent_buffer_uptodate(info->tree_root->node) ||
12907 !extent_buffer_uptodate(info->dev_root->node) ||
12908 !extent_buffer_uptodate(info->chunk_root->node)) {
12909 error("critical roots corrupted, unable to check the filesystem");
12915 if (init_extent_tree || init_csum_tree) {
12916 struct btrfs_trans_handle *trans;
12918 trans = btrfs_start_transaction(info->extent_root, 0);
12919 if (IS_ERR(trans)) {
12920 error("error starting transaction");
12921 ret = PTR_ERR(trans);
12926 if (init_extent_tree) {
12927 printf("Creating a new extent tree\n");
12928 ret = reinit_extent_tree(trans, info);
12934 if (init_csum_tree) {
12935 printf("Reinitialize checksum tree\n");
12936 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12938 error("checksum tree initialization failed: %d",
12945 ret = fill_csum_tree(trans, info->csum_root,
12949 error("checksum tree refilling failed: %d", ret);
12954 * Ok now we commit and run the normal fsck, which will add
12955 * extent entries for all of the items it finds.
12957 ret = btrfs_commit_transaction(trans, info->extent_root);
12962 if (!extent_buffer_uptodate(info->extent_root->node)) {
12963 error("critical: extent_root, unable to check the filesystem");
12968 if (!extent_buffer_uptodate(info->csum_root->node)) {
12969 error("critical: csum_root, unable to check the filesystem");
12975 if (!ctx.progress_enabled)
12976 fprintf(stderr, "checking extents\n");
12977 if (check_mode == CHECK_MODE_LOWMEM)
12978 ret = check_chunks_and_extents_v2(root);
12980 ret = check_chunks_and_extents(root);
12984 "errors found in extent allocation tree or chunk allocation");
12986 ret = repair_root_items(info);
12989 error("failed to repair root items: %s", strerror(-ret));
12993 fprintf(stderr, "Fixed %d roots.\n", ret);
12995 } else if (ret > 0) {
12997 "Found %d roots with an outdated root item.\n",
13000 "Please run a filesystem check with the option --repair to fix them.\n");
13006 if (!ctx.progress_enabled) {
13007 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13008 fprintf(stderr, "checking free space tree\n");
13010 fprintf(stderr, "checking free space cache\n");
13012 ret = check_space_cache(root);
13015 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13016 error("errors found in free space tree");
13018 error("errors found in free space cache");
13023 * We used to have to have these hole extents in between our real
13024 * extents so if we don't have this flag set we need to make sure there
13025 * are no gaps in the file extents for inodes, otherwise we can just
13026 * ignore it when this happens.
13028 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13029 if (!ctx.progress_enabled)
13030 fprintf(stderr, "checking fs roots\n");
13031 if (check_mode == CHECK_MODE_LOWMEM)
13032 ret = check_fs_roots_v2(root->fs_info);
13034 ret = check_fs_roots(root, &root_cache);
13037 error("errors found in fs roots");
13041 fprintf(stderr, "checking csums\n");
13042 ret = check_csums(root);
13045 error("errors found in csum tree");
13049 fprintf(stderr, "checking root refs\n");
13050 /* For low memory mode, check_fs_roots_v2 handles root refs */
13051 if (check_mode != CHECK_MODE_LOWMEM) {
13052 ret = check_root_refs(root, &root_cache);
13055 error("errors found in root refs");
13060 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13061 struct extent_buffer *eb;
13063 eb = list_first_entry(&root->fs_info->recow_ebs,
13064 struct extent_buffer, recow);
13065 list_del_init(&eb->recow);
13066 ret = recow_extent_buffer(root, eb);
13069 error("fails to fix transid errors");
13074 while (!list_empty(&delete_items)) {
13075 struct bad_item *bad;
13077 bad = list_first_entry(&delete_items, struct bad_item, list);
13078 list_del_init(&bad->list);
13080 ret = delete_bad_item(root, bad);
13086 if (info->quota_enabled) {
13087 fprintf(stderr, "checking quota groups\n");
13088 ret = qgroup_verify_all(info);
13091 error("failed to check quota groups");
13095 ret = repair_qgroups(info, &qgroups_repaired);
13098 error("failed to repair quota groups");
13104 if (!list_empty(&root->fs_info->recow_ebs)) {
13105 error("transid errors in file system");
13110 if (found_old_backref) { /*
13111 * there was a disk format change when mixed
13112 * backref was in testing tree. The old format
13113 * existed about one week.
13115 printf("\n * Found old mixed backref format. "
13116 "The old format is not supported! *"
13117 "\n * Please mount the FS in readonly mode, "
13118 "backup data and re-format the FS. *\n\n");
13121 printf("found %llu bytes used, ",
13122 (unsigned long long)bytes_used);
13124 printf("error(s) found\n");
13126 printf("no error found\n");
13127 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13128 printf("total tree bytes: %llu\n",
13129 (unsigned long long)total_btree_bytes);
13130 printf("total fs tree bytes: %llu\n",
13131 (unsigned long long)total_fs_tree_bytes);
13132 printf("total extent tree bytes: %llu\n",
13133 (unsigned long long)total_extent_tree_bytes);
13134 printf("btree space waste bytes: %llu\n",
13135 (unsigned long long)btree_space_waste);
13136 printf("file data blocks allocated: %llu\n referenced %llu\n",
13137 (unsigned long long)data_bytes_allocated,
13138 (unsigned long long)data_bytes_referenced);
13140 free_qgroup_counts();
13141 free_root_recs_tree(&root_cache);
13145 if (ctx.progress_enabled)
13146 task_deinit(ctx.info);