2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1534 fprintf(stderr, "invalid location in dir item %u\n",
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1683 if (key.offset > start)
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1698 btrfs_release_path(&path);
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1833 BUG_ON(IS_ERR(active_node->current));
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1872 * Returns >0 Found error, not fatal, should continue
1873 * Returns <0 Fatal error, must exit the whole check
1874 * Returns 0 No errors found
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877 struct node_refs *nrefs, int *level, int ext_ref)
1879 struct extent_buffer *cur = path->nodes[0];
1880 struct btrfs_key key;
1884 int root_level = btrfs_header_level(root->node);
1886 int ret = 0; /* Final return value */
1887 int err = 0; /* Positive error bitmap */
1889 cur_bytenr = cur->start;
1891 /* skip to first inode item or the first inode number change */
1892 nritems = btrfs_header_nritems(cur);
1893 for (i = 0; i < nritems; i++) {
1894 btrfs_item_key_to_cpu(cur, &key, i);
1896 first_ino = key.objectid;
1897 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898 (first_ino && first_ino != key.objectid))
1902 path->slots[0] = nritems;
1908 err |= check_inode_item(root, path, ext_ref);
1910 if (err & LAST_ITEM)
1913 /* still have inode items in thie leaf */
1914 if (cur->start == cur_bytenr)
1918 * we have switched to another leaf, above nodes may
1919 * have changed, here walk down the path, if a node
1920 * or leaf is shared, check whether we can skip this
1923 for (i = root_level; i >= 0; i--) {
1924 if (path->nodes[i]->start == nrefs->bytenr[i])
1927 ret = update_nodes_refs(root,
1928 path->nodes[i]->start,
1933 if (!nrefs->need_check[i]) {
1939 for (i = 0; i < *level; i++) {
1940 free_extent_buffer(path->nodes[i]);
1941 path->nodes[i] = NULL;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1960 level = btrfs_header_level(node);
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2072 check = need_check(root, roots);
2074 nrefs->need_check[level] = check;
2076 nrefs->need_check[level] = 1;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134 ret = process_one_leaf(root, cur, wc);
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2157 ret = enter_shared_node(root, bytenr, refs,
2160 path->slots[*level]++;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2186 ret = check_child_node(cur, path->slots[*level], next);
2188 free_extent_buffer(next);
2193 if (btrfs_is_leaf(next))
2194 status = btrfs_check_leaf(root, NULL, next);
2196 status = btrfs_check_node(root, NULL, next);
2197 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198 free_extent_buffer(next);
2203 *level = *level - 1;
2204 free_extent_buffer(path->nodes[*level]);
2205 path->nodes[*level] = next;
2206 path->slots[*level] = 0;
2209 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214 unsigned int ext_ref);
2217 * Returns >0 Found error, should continue
2218 * Returns <0 Fatal error, must exit the whole check
2219 * Returns 0 No errors found
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222 int *level, struct node_refs *nrefs, int ext_ref)
2224 enum btrfs_tree_block_status status;
2227 struct extent_buffer *next;
2228 struct extent_buffer *cur;
2232 WARN_ON(*level < 0);
2233 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235 ret = update_nodes_refs(root, path->nodes[*level]->start,
2240 while (*level >= 0) {
2241 WARN_ON(*level < 0);
2242 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243 cur = path->nodes[*level];
2245 if (btrfs_header_level(cur) != *level)
2248 if (path->slots[*level] >= btrfs_header_nritems(cur))
2250 /* Don't forgot to check leaf/node validation */
2252 ret = btrfs_check_leaf(root, NULL, cur);
2253 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = process_one_leaf_v2(root, path, nrefs,
2261 ret = btrfs_check_node(root, NULL, cur);
2262 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2267 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269 blocksize = root->nodesize;
2271 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2274 if (!nrefs->need_check[*level - 1]) {
2275 path->slots[*level]++;
2279 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root, bytenr, blocksize,
2285 if (!extent_buffer_uptodate(next)) {
2286 struct btrfs_key node_key;
2288 btrfs_node_key_to_cpu(path->nodes[*level],
2290 path->slots[*level]);
2291 btrfs_add_corrupt_extent_record(root->fs_info,
2293 path->nodes[*level]->start,
2294 root->nodesize, *level);
2300 ret = check_child_node(cur, path->slots[*level], next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323 struct walk_control *wc, int *level)
2326 struct extent_buffer *leaf;
2328 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329 leaf = path->nodes[i];
2330 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = NULL;
2337 BUG_ON(*level > wc->active_node);
2338 if (*level == wc->active_node)
2339 leave_shared_node(root, wc, *level);
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2350 struct extent_buffer *leaf;
2352 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353 leaf = path->nodes[i];
2354 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2359 free_extent_buffer(path->nodes[*level]);
2360 path->nodes[*level] = NULL;
2367 static int check_root_dir(struct inode_record *rec)
2369 struct inode_backref *backref;
2372 if (!rec->found_inode_item || rec->errors)
2374 if (rec->nlink != 1 || rec->found_link != 0)
2376 if (list_empty(&rec->backrefs))
2378 backref = to_inode_backref(rec->backrefs.next);
2379 if (!backref->found_inode_ref)
2381 if (backref->index != 0 || backref->namelen != 2 ||
2382 memcmp(backref->name, "..", 2))
2384 if (backref->found_dir_index || backref->found_dir_item)
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root, struct btrfs_path *path,
2393 struct inode_record *rec)
2395 struct btrfs_inode_item *ei;
2396 struct btrfs_key key;
2399 key.objectid = rec->ino;
2400 key.type = BTRFS_INODE_ITEM_KEY;
2401 key.offset = (u64)-1;
2403 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2407 if (!path->slots[0]) {
2414 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415 if (key.objectid != rec->ino) {
2420 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421 struct btrfs_inode_item);
2422 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423 btrfs_mark_buffer_dirty(path->nodes[0]);
2424 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426 root->root_key.objectid);
2428 btrfs_release_path(path);
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433 struct btrfs_root *root,
2434 struct btrfs_path *path,
2435 struct inode_record *rec)
2439 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440 btrfs_release_path(path);
2442 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447 struct btrfs_root *root,
2448 struct btrfs_path *path,
2449 struct inode_record *rec)
2451 struct btrfs_inode_item *ei;
2452 struct btrfs_key key;
2455 key.objectid = rec->ino;
2456 key.type = BTRFS_INODE_ITEM_KEY;
2459 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2466 /* Since ret == 0, no need to check anything */
2467 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468 struct btrfs_inode_item);
2469 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470 btrfs_mark_buffer_dirty(path->nodes[0]);
2471 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472 printf("reset nbytes for ino %llu root %llu\n",
2473 rec->ino, root->root_key.objectid);
2475 btrfs_release_path(path);
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480 struct cache_tree *inode_cache,
2481 struct inode_record *rec,
2482 struct inode_backref *backref)
2484 struct btrfs_path path;
2485 struct btrfs_trans_handle *trans;
2486 struct btrfs_dir_item *dir_item;
2487 struct extent_buffer *leaf;
2488 struct btrfs_key key;
2489 struct btrfs_disk_key disk_key;
2490 struct inode_record *dir_rec;
2491 unsigned long name_ptr;
2492 u32 data_size = sizeof(*dir_item) + backref->namelen;
2495 trans = btrfs_start_transaction(root, 1);
2497 return PTR_ERR(trans);
2499 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500 (unsigned long long)rec->ino);
2502 btrfs_init_path(&path);
2503 key.objectid = backref->dir;
2504 key.type = BTRFS_DIR_INDEX_KEY;
2505 key.offset = backref->index;
2506 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2509 leaf = path.nodes[0];
2510 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2512 disk_key.objectid = cpu_to_le64(rec->ino);
2513 disk_key.type = BTRFS_INODE_ITEM_KEY;
2514 disk_key.offset = 0;
2516 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518 btrfs_set_dir_data_len(leaf, dir_item, 0);
2519 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520 name_ptr = (unsigned long)(dir_item + 1);
2521 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522 btrfs_mark_buffer_dirty(leaf);
2523 btrfs_release_path(&path);
2524 btrfs_commit_transaction(trans, root);
2526 backref->found_dir_index = 1;
2527 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528 BUG_ON(IS_ERR(dir_rec));
2531 dir_rec->found_size += backref->namelen;
2532 if (dir_rec->found_size == dir_rec->isize &&
2533 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535 if (dir_rec->found_size != dir_rec->isize)
2536 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2541 static int delete_dir_index(struct btrfs_root *root,
2542 struct inode_backref *backref)
2544 struct btrfs_trans_handle *trans;
2545 struct btrfs_dir_item *di;
2546 struct btrfs_path path;
2549 trans = btrfs_start_transaction(root, 1);
2551 return PTR_ERR(trans);
2553 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554 (unsigned long long)backref->dir,
2555 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556 (unsigned long long)root->objectid);
2558 btrfs_init_path(&path);
2559 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560 backref->name, backref->namelen,
2561 backref->index, -1);
2564 btrfs_release_path(&path);
2565 btrfs_commit_transaction(trans, root);
2572 ret = btrfs_del_item(trans, root, &path);
2574 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2576 btrfs_release_path(&path);
2577 btrfs_commit_transaction(trans, root);
2581 static int create_inode_item(struct btrfs_root *root,
2582 struct inode_record *rec,
2585 struct btrfs_trans_handle *trans;
2586 struct btrfs_inode_item inode_item;
2587 time_t now = time(NULL);
2590 trans = btrfs_start_transaction(root, 1);
2591 if (IS_ERR(trans)) {
2592 ret = PTR_ERR(trans);
2596 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597 "be incomplete, please check permissions and content after "
2598 "the fsck completes.\n", (unsigned long long)root->objectid,
2599 (unsigned long long)rec->ino);
2601 memset(&inode_item, 0, sizeof(inode_item));
2602 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2604 btrfs_set_stack_inode_nlink(&inode_item, 1);
2606 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608 if (rec->found_dir_item) {
2609 if (rec->found_file_extent)
2610 fprintf(stderr, "root %llu inode %llu has both a dir "
2611 "item and extents, unsure if it is a dir or a "
2612 "regular file so setting it as a directory\n",
2613 (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2615 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617 } else if (!rec->found_dir_item) {
2618 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2621 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2630 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2632 btrfs_commit_transaction(trans, root);
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637 struct inode_record *rec,
2638 struct cache_tree *inode_cache,
2641 struct inode_backref *tmp, *backref;
2642 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2646 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647 if (!delete && rec->ino == root_dirid) {
2648 if (!rec->found_inode_item) {
2649 ret = create_inode_item(root, rec, 1);
2656 /* Index 0 for root dir's are special, don't mess with it */
2657 if (rec->ino == root_dirid && backref->index == 0)
2661 ((backref->found_dir_index && !backref->found_inode_ref) ||
2662 (backref->found_dir_index && backref->found_inode_ref &&
2663 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664 ret = delete_dir_index(root, backref);
2668 list_del(&backref->list);
2672 if (!delete && !backref->found_dir_index &&
2673 backref->found_dir_item && backref->found_inode_ref) {
2674 ret = add_missing_dir_index(root, inode_cache, rec,
2679 if (backref->found_dir_item &&
2680 backref->found_dir_index &&
2681 backref->found_dir_index) {
2682 if (!backref->errors &&
2683 backref->found_inode_ref) {
2684 list_del(&backref->list);
2690 if (!delete && (!backref->found_dir_index &&
2691 !backref->found_dir_item &&
2692 backref->found_inode_ref)) {
2693 struct btrfs_trans_handle *trans;
2694 struct btrfs_key location;
2696 ret = check_dir_conflict(root, backref->name,
2702 * let nlink fixing routine to handle it,
2703 * which can do it better.
2708 location.objectid = rec->ino;
2709 location.type = BTRFS_INODE_ITEM_KEY;
2710 location.offset = 0;
2712 trans = btrfs_start_transaction(root, 1);
2713 if (IS_ERR(trans)) {
2714 ret = PTR_ERR(trans);
2717 fprintf(stderr, "adding missing dir index/item pair "
2719 (unsigned long long)rec->ino);
2720 ret = btrfs_insert_dir_item(trans, root, backref->name,
2722 backref->dir, &location,
2723 imode_to_type(rec->imode),
2726 btrfs_commit_transaction(trans, root);
2730 if (!delete && (backref->found_inode_ref &&
2731 backref->found_dir_index &&
2732 backref->found_dir_item &&
2733 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734 !rec->found_inode_item)) {
2735 ret = create_inode_item(root, rec, 0);
2742 return ret ? ret : repaired;
2746 * To determine the file type for nlink/inode_item repair
2748 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749 * Return -ENOENT if file type is not found.
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2753 struct inode_backref *backref;
2755 /* For inode item recovered case */
2756 if (rec->found_inode_item) {
2757 *type = imode_to_type(rec->imode);
2761 list_for_each_entry(backref, &rec->backrefs, list) {
2762 if (backref->found_dir_index || backref->found_dir_item) {
2763 *type = backref->filetype;
2771 * To determine the file name for nlink repair
2773 * Return 0 if file name is found, set name and namelen.
2774 * Return -ENOENT if file name is not found.
2776 static int find_file_name(struct inode_record *rec,
2777 char *name, int *namelen)
2779 struct inode_backref *backref;
2781 list_for_each_entry(backref, &rec->backrefs, list) {
2782 if (backref->found_dir_index || backref->found_dir_item ||
2783 backref->found_inode_ref) {
2784 memcpy(name, backref->name, backref->namelen);
2785 *namelen = backref->namelen;
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794 struct btrfs_root *root,
2795 struct btrfs_path *path,
2796 struct inode_record *rec)
2798 struct inode_backref *backref;
2799 struct inode_backref *tmp;
2800 struct btrfs_key key;
2801 struct btrfs_inode_item *inode_item;
2804 /* We don't believe this either, reset it and iterate backref */
2805 rec->found_link = 0;
2807 /* Remove all backref including the valid ones */
2808 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810 backref->index, backref->name,
2811 backref->namelen, 0);
2815 /* remove invalid backref, so it won't be added back */
2816 if (!(backref->found_dir_index &&
2817 backref->found_dir_item &&
2818 backref->found_inode_ref)) {
2819 list_del(&backref->list);
2826 /* Set nlink to 0 */
2827 key.objectid = rec->ino;
2828 key.type = BTRFS_INODE_ITEM_KEY;
2830 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2837 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838 struct btrfs_inode_item);
2839 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840 btrfs_mark_buffer_dirty(path->nodes[0]);
2841 btrfs_release_path(path);
2844 * Add back valid inode_ref/dir_item/dir_index,
2845 * add_link() will handle the nlink inc, so new nlink must be correct
2847 list_for_each_entry(backref, &rec->backrefs, list) {
2848 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849 backref->name, backref->namelen,
2850 backref->filetype, &backref->index, 1);
2855 btrfs_release_path(path);
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860 struct btrfs_root *root,
2861 struct btrfs_path *path,
2864 struct btrfs_key key, found_key;
2867 btrfs_init_path(path);
2868 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2870 key.type = BTRFS_INODE_ITEM_KEY;
2871 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2873 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874 path->slots[0] - 1);
2875 *highest_ino = found_key.objectid;
2878 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2880 btrfs_release_path(path);
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885 struct btrfs_root *root,
2886 struct btrfs_path *path,
2887 struct inode_record *rec)
2889 char *dir_name = "lost+found";
2890 char namebuf[BTRFS_NAME_LEN] = {0};
2895 int name_recovered = 0;
2896 int type_recovered = 0;
2900 * Get file name and type first before these invalid inode ref
2901 * are deleted by remove_all_invalid_backref()
2903 name_recovered = !find_file_name(rec, namebuf, &namelen);
2904 type_recovered = !find_file_type(rec, &type);
2906 if (!name_recovered) {
2907 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908 rec->ino, rec->ino);
2909 namelen = count_digits(rec->ino);
2910 sprintf(namebuf, "%llu", rec->ino);
2913 if (!type_recovered) {
2914 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2916 type = BTRFS_FT_REG_FILE;
2920 ret = reset_nlink(trans, root, path, rec);
2923 "Failed to reset nlink for inode %llu: %s\n",
2924 rec->ino, strerror(-ret));
2928 if (rec->found_link == 0) {
2929 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2933 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2937 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938 dir_name, strerror(-ret));
2941 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942 namebuf, namelen, type, NULL, 1);
2944 * Add ".INO" suffix several times to handle case where
2945 * "FILENAME.INO" is already taken by another file.
2947 while (ret == -EEXIST) {
2949 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2951 if (namelen + count_digits(rec->ino) + 1 >
2956 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2958 namelen += count_digits(rec->ino) + 1;
2959 ret = btrfs_add_link(trans, root, rec->ino,
2960 lost_found_ino, namebuf,
2961 namelen, type, NULL, 1);
2965 "Failed to link the inode %llu to %s dir: %s\n",
2966 rec->ino, dir_name, strerror(-ret));
2970 * Just increase the found_link, don't actually add the
2971 * backref. This will make things easier and this inode
2972 * record will be freed after the repair is done.
2973 * So fsck will not report problem about this inode.
2976 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977 namelen, namebuf, dir_name);
2979 printf("Fixed the nlink of inode %llu\n", rec->ino);
2982 * Clear the flag anyway, or we will loop forever for the same inode
2983 * as it will not be removed from the bad inode list and the dead loop
2986 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987 btrfs_release_path(path);
2992 * Check if there is any normal(reg or prealloc) file extent for given
2994 * This is used to determine the file type when neither its dir_index/item or
2995 * inode_item exists.
2997 * This will *NOT* report error, if any error happens, just consider it does
2998 * not have any normal file extent.
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3002 struct btrfs_path path;
3003 struct btrfs_key key;
3004 struct btrfs_key found_key;
3005 struct btrfs_file_extent_item *fi;
3009 btrfs_init_path(&path);
3011 key.type = BTRFS_EXTENT_DATA_KEY;
3014 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3019 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020 ret = btrfs_next_leaf(root, &path);
3027 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3029 if (found_key.objectid != ino ||
3030 found_key.type != BTRFS_EXTENT_DATA_KEY)
3032 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033 struct btrfs_file_extent_item);
3034 type = btrfs_file_extent_type(path.nodes[0], fi);
3035 if (type != BTRFS_FILE_EXTENT_INLINE) {
3041 btrfs_release_path(&path);
3045 static u32 btrfs_type_to_imode(u8 type)
3047 static u32 imode_by_btrfs_type[] = {
3048 [BTRFS_FT_REG_FILE] = S_IFREG,
3049 [BTRFS_FT_DIR] = S_IFDIR,
3050 [BTRFS_FT_CHRDEV] = S_IFCHR,
3051 [BTRFS_FT_BLKDEV] = S_IFBLK,
3052 [BTRFS_FT_FIFO] = S_IFIFO,
3053 [BTRFS_FT_SOCK] = S_IFSOCK,
3054 [BTRFS_FT_SYMLINK] = S_IFLNK,
3057 return imode_by_btrfs_type[(type)];
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061 struct btrfs_root *root,
3062 struct btrfs_path *path,
3063 struct inode_record *rec)
3067 int type_recovered = 0;
3070 printf("Trying to rebuild inode:%llu\n", rec->ino);
3072 type_recovered = !find_file_type(rec, &filetype);
3075 * Try to determine inode type if type not found.
3077 * For found regular file extent, it must be FILE.
3078 * For found dir_item/index, it must be DIR.
3080 * For undetermined one, use FILE as fallback.
3083 * 1. If found backref(inode_index/item is already handled) to it,
3085 * Need new inode-inode ref structure to allow search for that.
3087 if (!type_recovered) {
3088 if (rec->found_file_extent &&
3089 find_normal_file_extent(root, rec->ino)) {
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else if (rec->found_dir_item) {
3094 filetype = BTRFS_FT_DIR;
3095 } else if (!list_empty(&rec->orphan_extents)) {
3097 filetype = BTRFS_FT_REG_FILE;
3099 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3102 filetype = BTRFS_FT_REG_FILE;
3106 ret = btrfs_new_inode(trans, root, rec->ino,
3107 mode | btrfs_type_to_imode(filetype));
3112 * Here inode rebuild is done, we only rebuild the inode item,
3113 * don't repair the nlink(like move to lost+found).
3114 * That is the job of nlink repair.
3116 * We just fill the record and return
3118 rec->found_dir_item = 1;
3119 rec->imode = mode | btrfs_type_to_imode(filetype);
3121 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122 /* Ensure the inode_nlinks repair function will be called */
3123 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129 struct btrfs_root *root,
3130 struct btrfs_path *path,
3131 struct inode_record *rec)
3133 struct orphan_data_extent *orphan;
3134 struct orphan_data_extent *tmp;
3137 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3139 * Check for conflicting file extents
3141 * Here we don't know whether the extents is compressed or not,
3142 * so we can only assume it not compressed nor data offset,
3143 * and use its disk_len as extent length.
3145 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146 orphan->offset, orphan->disk_len, 0);
3147 btrfs_release_path(path);
3152 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153 orphan->disk_bytenr, orphan->disk_len);
3154 ret = btrfs_free_extent(trans,
3155 root->fs_info->extent_root,
3156 orphan->disk_bytenr, orphan->disk_len,
3157 0, root->objectid, orphan->objectid,
3162 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163 orphan->offset, orphan->disk_bytenr,
3164 orphan->disk_len, orphan->disk_len);
3168 /* Update file size info */
3169 rec->found_size += orphan->disk_len;
3170 if (rec->found_size == rec->nbytes)
3171 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3173 /* Update the file extent hole info too */
3174 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3178 if (RB_EMPTY_ROOT(&rec->holes))
3179 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3181 list_del(&orphan->list);
3184 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190 struct btrfs_root *root,
3191 struct btrfs_path *path,
3192 struct inode_record *rec)
3194 struct rb_node *node;
3195 struct file_extent_hole *hole;
3199 node = rb_first(&rec->holes);
3203 hole = rb_entry(node, struct file_extent_hole, node);
3204 ret = btrfs_punch_hole(trans, root, rec->ino,
3205 hole->start, hole->len);
3208 ret = del_file_extent_hole(&rec->holes, hole->start,
3212 if (RB_EMPTY_ROOT(&rec->holes))
3213 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214 node = rb_first(&rec->holes);
3216 /* special case for a file losing all its file extent */
3218 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219 round_up(rec->isize, root->sectorsize));
3223 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224 rec->ino, root->objectid);
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3231 struct btrfs_trans_handle *trans;
3232 struct btrfs_path path;
3235 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236 I_ERR_NO_ORPHAN_ITEM |
3237 I_ERR_LINK_COUNT_WRONG |
3238 I_ERR_NO_INODE_ITEM |
3239 I_ERR_FILE_EXTENT_ORPHAN |
3240 I_ERR_FILE_EXTENT_DISCOUNT|
3241 I_ERR_FILE_NBYTES_WRONG)))
3245 * For nlink repair, it may create a dir and add link, so
3246 * 2 for parent(256)'s dir_index and dir_item
3247 * 2 for lost+found dir's inode_item and inode_ref
3248 * 1 for the new inode_ref of the file
3249 * 2 for lost+found dir's dir_index and dir_item for the file
3251 trans = btrfs_start_transaction(root, 7);
3253 return PTR_ERR(trans);
3255 btrfs_init_path(&path);
3256 if (rec->errors & I_ERR_NO_INODE_ITEM)
3257 ret = repair_inode_no_item(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263 ret = repair_inode_isize(trans, root, &path, rec);
3264 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267 ret = repair_inode_nlinks(trans, root, &path, rec);
3268 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269 ret = repair_inode_nbytes(trans, root, &path, rec);
3270 btrfs_commit_transaction(trans, root);
3271 btrfs_release_path(&path);
3275 static int check_inode_recs(struct btrfs_root *root,
3276 struct cache_tree *inode_cache)
3278 struct cache_extent *cache;
3279 struct ptr_node *node;
3280 struct inode_record *rec;
3281 struct inode_backref *backref;
3286 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3288 if (btrfs_root_refs(&root->root_item) == 0) {
3289 if (!cache_tree_empty(inode_cache))
3290 fprintf(stderr, "warning line %d\n", __LINE__);
3295 * We need to repair backrefs first because we could change some of the
3296 * errors in the inode recs.
3298 * We also need to go through and delete invalid backrefs first and then
3299 * add the correct ones second. We do this because we may get EEXIST
3300 * when adding back the correct index because we hadn't yet deleted the
3303 * For example, if we were missing a dir index then the directories
3304 * isize would be wrong, so if we fixed the isize to what we thought it
3305 * would be and then fixed the backref we'd still have a invalid fs, so
3306 * we need to add back the dir index and then check to see if the isize
3311 if (stage == 3 && !err)
3314 cache = search_cache_extent(inode_cache, 0);
3315 while (repair && cache) {
3316 node = container_of(cache, struct ptr_node, cache);
3318 cache = next_cache_extent(cache);
3320 /* Need to free everything up and rescan */
3322 remove_cache_extent(inode_cache, &node->cache);
3324 free_inode_rec(rec);
3328 if (list_empty(&rec->backrefs))
3331 ret = repair_inode_backrefs(root, rec, inode_cache,
3345 rec = get_inode_rec(inode_cache, root_dirid, 0);
3346 BUG_ON(IS_ERR(rec));
3348 ret = check_root_dir(rec);
3350 fprintf(stderr, "root %llu root dir %llu error\n",
3351 (unsigned long long)root->root_key.objectid,
3352 (unsigned long long)root_dirid);
3353 print_inode_error(root, rec);
3358 struct btrfs_trans_handle *trans;
3360 trans = btrfs_start_transaction(root, 1);
3361 if (IS_ERR(trans)) {
3362 err = PTR_ERR(trans);
3367 "root %llu missing its root dir, recreating\n",
3368 (unsigned long long)root->objectid);
3370 ret = btrfs_make_root_dir(trans, root, root_dirid);
3373 btrfs_commit_transaction(trans, root);
3377 fprintf(stderr, "root %llu root dir %llu not found\n",
3378 (unsigned long long)root->root_key.objectid,
3379 (unsigned long long)root_dirid);
3383 cache = search_cache_extent(inode_cache, 0);
3386 node = container_of(cache, struct ptr_node, cache);
3388 remove_cache_extent(inode_cache, &node->cache);
3390 if (rec->ino == root_dirid ||
3391 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392 free_inode_rec(rec);
3396 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397 ret = check_orphan_item(root, rec->ino);
3399 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400 if (can_free_inode_rec(rec)) {
3401 free_inode_rec(rec);
3406 if (!rec->found_inode_item)
3407 rec->errors |= I_ERR_NO_INODE_ITEM;
3408 if (rec->found_link != rec->nlink)
3409 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3411 ret = try_repair_inode(root, rec);
3412 if (ret == 0 && can_free_inode_rec(rec)) {
3413 free_inode_rec(rec);
3419 if (!(repair && ret == 0))
3421 print_inode_error(root, rec);
3422 list_for_each_entry(backref, &rec->backrefs, list) {
3423 if (!backref->found_dir_item)
3424 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425 if (!backref->found_dir_index)
3426 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427 if (!backref->found_inode_ref)
3428 backref->errors |= REF_ERR_NO_INODE_REF;
3429 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430 " namelen %u name %s filetype %d errors %x",
3431 (unsigned long long)backref->dir,
3432 (unsigned long long)backref->index,
3433 backref->namelen, backref->name,
3434 backref->filetype, backref->errors);
3435 print_ref_error(backref->errors);
3437 free_inode_rec(rec);
3439 return (error > 0) ? -1 : 0;
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3445 struct cache_extent *cache;
3446 struct root_record *rec = NULL;
3449 cache = lookup_cache_extent(root_cache, objectid, 1);
3451 rec = container_of(cache, struct root_record, cache);
3453 rec = calloc(1, sizeof(*rec));
3455 return ERR_PTR(-ENOMEM);
3456 rec->objectid = objectid;
3457 INIT_LIST_HEAD(&rec->backrefs);
3458 rec->cache.start = objectid;
3459 rec->cache.size = 1;
3461 ret = insert_cache_extent(root_cache, &rec->cache);
3463 return ERR_PTR(-EEXIST);
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469 u64 ref_root, u64 dir, u64 index,
3470 const char *name, int namelen)
3472 struct root_backref *backref;
3474 list_for_each_entry(backref, &rec->backrefs, list) {
3475 if (backref->ref_root != ref_root || backref->dir != dir ||
3476 backref->namelen != namelen)
3478 if (memcmp(name, backref->name, namelen))
3483 backref = calloc(1, sizeof(*backref) + namelen + 1);
3486 backref->ref_root = ref_root;
3488 backref->index = index;
3489 backref->namelen = namelen;
3490 memcpy(backref->name, name, namelen);
3491 backref->name[namelen] = '\0';
3492 list_add_tail(&backref->list, &rec->backrefs);
3496 static void free_root_record(struct cache_extent *cache)
3498 struct root_record *rec;
3499 struct root_backref *backref;
3501 rec = container_of(cache, struct root_record, cache);
3502 while (!list_empty(&rec->backrefs)) {
3503 backref = to_root_backref(rec->backrefs.next);
3504 list_del(&backref->list);
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3513 static int add_root_backref(struct cache_tree *root_cache,
3514 u64 root_id, u64 ref_root, u64 dir, u64 index,
3515 const char *name, int namelen,
3516 int item_type, int errors)
3518 struct root_record *rec;
3519 struct root_backref *backref;
3521 rec = get_root_rec(root_cache, root_id);
3522 BUG_ON(IS_ERR(rec));
3523 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3526 backref->errors |= errors;
3528 if (item_type != BTRFS_DIR_ITEM_KEY) {
3529 if (backref->found_dir_index || backref->found_back_ref ||
3530 backref->found_forward_ref) {
3531 if (backref->index != index)
3532 backref->errors |= REF_ERR_INDEX_UNMATCH;
3534 backref->index = index;
3538 if (item_type == BTRFS_DIR_ITEM_KEY) {
3539 if (backref->found_forward_ref)
3541 backref->found_dir_item = 1;
3542 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543 backref->found_dir_index = 1;
3544 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545 if (backref->found_forward_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_REF;
3547 else if (backref->found_dir_item)
3549 backref->found_forward_ref = 1;
3550 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551 if (backref->found_back_ref)
3552 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553 backref->found_back_ref = 1;
3558 if (backref->found_forward_ref && backref->found_dir_item)
3559 backref->reachable = 1;
3563 static int merge_root_recs(struct btrfs_root *root,
3564 struct cache_tree *src_cache,
3565 struct cache_tree *dst_cache)
3567 struct cache_extent *cache;
3568 struct ptr_node *node;
3569 struct inode_record *rec;
3570 struct inode_backref *backref;
3573 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574 free_inode_recs_tree(src_cache);
3579 cache = search_cache_extent(src_cache, 0);
3582 node = container_of(cache, struct ptr_node, cache);
3584 remove_cache_extent(src_cache, &node->cache);
3587 ret = is_child_root(root, root->objectid, rec->ino);
3593 list_for_each_entry(backref, &rec->backrefs, list) {
3594 BUG_ON(backref->found_inode_ref);
3595 if (backref->found_dir_item)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_ITEM_KEY,
3601 if (backref->found_dir_index)
3602 add_root_backref(dst_cache, rec->ino,
3603 root->root_key.objectid, backref->dir,
3604 backref->index, backref->name,
3605 backref->namelen, BTRFS_DIR_INDEX_KEY,
3609 free_inode_rec(rec);
3616 static int check_root_refs(struct btrfs_root *root,
3617 struct cache_tree *root_cache)
3619 struct root_record *rec;
3620 struct root_record *ref_root;
3621 struct root_backref *backref;
3622 struct cache_extent *cache;
3628 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629 BUG_ON(IS_ERR(rec));
3632 /* fixme: this can not detect circular references */
3635 cache = search_cache_extent(root_cache, 0);
3639 rec = container_of(cache, struct root_record, cache);
3640 cache = next_cache_extent(cache);
3642 if (rec->found_ref == 0)
3645 list_for_each_entry(backref, &rec->backrefs, list) {
3646 if (!backref->reachable)
3649 ref_root = get_root_rec(root_cache,
3651 BUG_ON(IS_ERR(ref_root));
3652 if (ref_root->found_ref > 0)
3655 backref->reachable = 0;
3657 if (rec->found_ref == 0)
3663 cache = search_cache_extent(root_cache, 0);
3667 rec = container_of(cache, struct root_record, cache);
3668 cache = next_cache_extent(cache);
3670 if (rec->found_ref == 0 &&
3671 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673 ret = check_orphan_item(root->fs_info->tree_root,
3679 * If we don't have a root item then we likely just have
3680 * a dir item in a snapshot for this root but no actual
3681 * ref key or anything so it's meaningless.
3683 if (!rec->found_root_item)
3686 fprintf(stderr, "fs tree %llu not referenced\n",
3687 (unsigned long long)rec->objectid);
3691 if (rec->found_ref > 0 && !rec->found_root_item)
3693 list_for_each_entry(backref, &rec->backrefs, list) {
3694 if (!backref->found_dir_item)
3695 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696 if (!backref->found_dir_index)
3697 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698 if (!backref->found_back_ref)
3699 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700 if (!backref->found_forward_ref)
3701 backref->errors |= REF_ERR_NO_ROOT_REF;
3702 if (backref->reachable && backref->errors)
3709 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710 (unsigned long long)rec->objectid, rec->found_ref,
3711 rec->found_root_item ? "" : "not found");
3713 list_for_each_entry(backref, &rec->backrefs, list) {
3714 if (!backref->reachable)
3716 if (!backref->errors && rec->found_root_item)
3718 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719 " index %llu namelen %u name %s errors %x\n",
3720 (unsigned long long)backref->ref_root,
3721 (unsigned long long)backref->dir,
3722 (unsigned long long)backref->index,
3723 backref->namelen, backref->name,
3725 print_ref_error(backref->errors);
3728 return errors > 0 ? 1 : 0;
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732 struct btrfs_key *key,
3733 struct cache_tree *root_cache)
3739 struct btrfs_root_ref *ref;
3740 char namebuf[BTRFS_NAME_LEN];
3743 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3745 dirid = btrfs_root_ref_dirid(eb, ref);
3746 index = btrfs_root_ref_sequence(eb, ref);
3747 name_len = btrfs_root_ref_name_len(eb, ref);
3749 if (name_len <= BTRFS_NAME_LEN) {
3753 len = BTRFS_NAME_LEN;
3754 error = REF_ERR_NAME_TOO_LONG;
3756 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3758 if (key->type == BTRFS_ROOT_REF_KEY) {
3759 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760 index, namebuf, len, key->type, error);
3762 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763 index, namebuf, len, key->type, error);
3768 static void free_corrupt_block(struct cache_extent *cache)
3770 struct btrfs_corrupt_block *corrupt;
3772 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3779 * Repair the btree of the given root.
3781 * The fix is to remove the node key in corrupt_blocks cache_tree.
3782 * and rebalance the tree.
3783 * After the fix, the btree should be writeable.
3785 static int repair_btree(struct btrfs_root *root,
3786 struct cache_tree *corrupt_blocks)
3788 struct btrfs_trans_handle *trans;
3789 struct btrfs_path path;
3790 struct btrfs_corrupt_block *corrupt;
3791 struct cache_extent *cache;
3792 struct btrfs_key key;
3797 if (cache_tree_empty(corrupt_blocks))
3800 trans = btrfs_start_transaction(root, 1);
3801 if (IS_ERR(trans)) {
3802 ret = PTR_ERR(trans);
3803 fprintf(stderr, "Error starting transaction: %s\n",
3807 btrfs_init_path(&path);
3808 cache = first_cache_extent(corrupt_blocks);
3810 corrupt = container_of(cache, struct btrfs_corrupt_block,
3812 level = corrupt->level;
3813 path.lowest_level = level;
3814 key.objectid = corrupt->key.objectid;
3815 key.type = corrupt->key.type;
3816 key.offset = corrupt->key.offset;
3819 * Here we don't want to do any tree balance, since it may
3820 * cause a balance with corrupted brother leaf/node,
3821 * so ins_len set to 0 here.
3822 * Balance will be done after all corrupt node/leaf is deleted.
3824 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3827 offset = btrfs_node_blockptr(path.nodes[level],
3830 /* Remove the ptr */
3831 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3835 * Remove the corresponding extent
3836 * return value is not concerned.
3838 btrfs_release_path(&path);
3839 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840 0, root->root_key.objectid,
3842 cache = next_cache_extent(cache);
3845 /* Balance the btree using btrfs_search_slot() */
3846 cache = first_cache_extent(corrupt_blocks);
3848 corrupt = container_of(cache, struct btrfs_corrupt_block,
3850 memcpy(&key, &corrupt->key, sizeof(key));
3851 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3854 /* return will always >0 since it won't find the item */
3856 btrfs_release_path(&path);
3857 cache = next_cache_extent(cache);
3860 btrfs_commit_transaction(trans, root);
3861 btrfs_release_path(&path);
3865 static int check_fs_root(struct btrfs_root *root,
3866 struct cache_tree *root_cache,
3867 struct walk_control *wc)
3873 struct btrfs_path path;
3874 struct shared_node root_node;
3875 struct root_record *rec;
3876 struct btrfs_root_item *root_item = &root->root_item;
3877 struct cache_tree corrupt_blocks;
3878 struct orphan_data_extent *orphan;
3879 struct orphan_data_extent *tmp;
3880 enum btrfs_tree_block_status status;
3881 struct node_refs nrefs;
3884 * Reuse the corrupt_block cache tree to record corrupted tree block
3886 * Unlike the usage in extent tree check, here we do it in a per
3887 * fs/subvol tree base.
3889 cache_tree_init(&corrupt_blocks);
3890 root->fs_info->corrupt_blocks = &corrupt_blocks;
3892 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893 rec = get_root_rec(root_cache, root->root_key.objectid);
3894 BUG_ON(IS_ERR(rec));
3895 if (btrfs_root_refs(root_item) > 0)
3896 rec->found_root_item = 1;
3899 btrfs_init_path(&path);
3900 memset(&root_node, 0, sizeof(root_node));
3901 cache_tree_init(&root_node.root_cache);
3902 cache_tree_init(&root_node.inode_cache);
3903 memset(&nrefs, 0, sizeof(nrefs));
3905 /* Move the orphan extent record to corresponding inode_record */
3906 list_for_each_entry_safe(orphan, tmp,
3907 &root->orphan_data_extents, list) {
3908 struct inode_record *inode;
3910 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3912 BUG_ON(IS_ERR(inode));
3913 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914 list_move(&orphan->list, &inode->orphan_extents);
3917 level = btrfs_header_level(root->node);
3918 memset(wc->nodes, 0, sizeof(wc->nodes));
3919 wc->nodes[level] = &root_node;
3920 wc->active_node = level;
3921 wc->root_level = level;
3923 /* We may not have checked the root block, lets do that now */
3924 if (btrfs_is_leaf(root->node))
3925 status = btrfs_check_leaf(root, NULL, root->node);
3927 status = btrfs_check_node(root, NULL, root->node);
3928 if (status != BTRFS_TREE_BLOCK_CLEAN)
3931 if (btrfs_root_refs(root_item) > 0 ||
3932 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933 path.nodes[level] = root->node;
3934 extent_buffer_get(root->node);
3935 path.slots[level] = 0;
3937 struct btrfs_key key;
3938 struct btrfs_disk_key found_key;
3940 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941 level = root_item->drop_level;
3942 path.lowest_level = level;
3943 if (level > btrfs_header_level(root->node) ||
3944 level >= BTRFS_MAX_LEVEL) {
3945 error("ignoring invalid drop level: %u", level);
3948 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3951 btrfs_node_key(path.nodes[level], &found_key,
3953 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954 sizeof(found_key)));
3958 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3964 wret = walk_up_tree(root, &path, wc, &level);
3971 btrfs_release_path(&path);
3973 if (!cache_tree_empty(&corrupt_blocks)) {
3974 struct cache_extent *cache;
3975 struct btrfs_corrupt_block *corrupt;
3977 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978 root->root_key.objectid);
3979 cache = first_cache_extent(&corrupt_blocks);
3981 corrupt = container_of(cache,
3982 struct btrfs_corrupt_block,
3984 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985 cache->start, corrupt->level,
3986 corrupt->key.objectid, corrupt->key.type,
3987 corrupt->key.offset);
3988 cache = next_cache_extent(cache);
3991 printf("Try to repair the btree for root %llu\n",
3992 root->root_key.objectid);
3993 ret = repair_btree(root, &corrupt_blocks);
3995 fprintf(stderr, "Failed to repair btree: %s\n",
3998 printf("Btree for root %llu is fixed\n",
3999 root->root_key.objectid);
4003 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4007 if (root_node.current) {
4008 root_node.current->checked = 1;
4009 maybe_free_inode_rec(&root_node.inode_cache,
4013 err = check_inode_recs(root, &root_node.inode_cache);
4017 free_corrupt_blocks_tree(&corrupt_blocks);
4018 root->fs_info->corrupt_blocks = NULL;
4019 free_orphan_data_extents(&root->orphan_data_extents);
4023 static int fs_root_objectid(u64 objectid)
4025 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4028 return is_fstree(objectid);
4031 static int check_fs_roots(struct btrfs_root *root,
4032 struct cache_tree *root_cache)
4034 struct btrfs_path path;
4035 struct btrfs_key key;
4036 struct walk_control wc;
4037 struct extent_buffer *leaf, *tree_node;
4038 struct btrfs_root *tmp_root;
4039 struct btrfs_root *tree_root = root->fs_info->tree_root;
4043 if (ctx.progress_enabled) {
4044 ctx.tp = TASK_FS_ROOTS;
4045 task_start(ctx.info);
4049 * Just in case we made any changes to the extent tree that weren't
4050 * reflected into the free space cache yet.
4053 reset_cached_block_groups(root->fs_info);
4054 memset(&wc, 0, sizeof(wc));
4055 cache_tree_init(&wc.shared);
4056 btrfs_init_path(&path);
4061 key.type = BTRFS_ROOT_ITEM_KEY;
4062 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4067 tree_node = tree_root->node;
4069 if (tree_node != tree_root->node) {
4070 free_root_recs_tree(root_cache);
4071 btrfs_release_path(&path);
4074 leaf = path.nodes[0];
4075 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076 ret = btrfs_next_leaf(tree_root, &path);
4082 leaf = path.nodes[0];
4084 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086 fs_root_objectid(key.objectid)) {
4087 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088 tmp_root = btrfs_read_fs_root_no_cache(
4089 root->fs_info, &key);
4091 key.offset = (u64)-1;
4092 tmp_root = btrfs_read_fs_root(
4093 root->fs_info, &key);
4095 if (IS_ERR(tmp_root)) {
4099 ret = check_fs_root(tmp_root, root_cache, &wc);
4100 if (ret == -EAGAIN) {
4101 free_root_recs_tree(root_cache);
4102 btrfs_release_path(&path);
4107 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108 btrfs_free_fs_root(tmp_root);
4109 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110 key.type == BTRFS_ROOT_BACKREF_KEY) {
4111 process_root_ref(leaf, path.slots[0], &key,
4118 btrfs_release_path(&path);
4120 free_extent_cache_tree(&wc.shared);
4121 if (!cache_tree_empty(&wc.shared))
4122 fprintf(stderr, "warning line %d\n", __LINE__);
4124 task_stop(ctx.info);
4130 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131 * INODE_REF/INODE_EXTREF match.
4133 * @root: the root of the fs/file tree
4134 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4135 * @key: the key of the DIR_ITEM/DIR_INDEX
4136 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4137 * distinguish root_dir between normal dir/file
4138 * @name: the name in the INODE_REF/INODE_EXTREF
4139 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4140 * @mode: the st_mode of INODE_ITEM
4142 * Return 0 if no error occurred.
4143 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4146 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147 * not match for normal dir/file.
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150 struct btrfs_key *key, u64 index, char *name,
4151 u32 namelen, u32 mode)
4153 struct btrfs_path path;
4154 struct extent_buffer *node;
4155 struct btrfs_dir_item *di;
4156 struct btrfs_key location;
4157 char namebuf[BTRFS_NAME_LEN] = {0};
4167 btrfs_init_path(&path);
4168 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170 ret = DIR_ITEM_MISSING;
4174 /* Process root dir and goto out*/
4177 ret = ROOT_DIR_ERROR;
4179 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4181 ref_key->type == BTRFS_INODE_REF_KEY ?
4183 ref_key->objectid, ref_key->offset,
4184 key->type == BTRFS_DIR_ITEM_KEY ?
4185 "DIR_ITEM" : "DIR_INDEX");
4193 /* Process normal file/dir */
4195 ret = DIR_ITEM_MISSING;
4197 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4199 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200 ref_key->objectid, ref_key->offset,
4201 key->type == BTRFS_DIR_ITEM_KEY ?
4202 "DIR_ITEM" : "DIR_INDEX",
4203 key->objectid, key->offset, namelen, name,
4204 imode_to_type(mode));
4208 /* Check whether inode_id/filetype/name match */
4209 node = path.nodes[0];
4210 slot = path.slots[0];
4211 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212 total = btrfs_item_size_nr(node, slot);
4213 while (cur < total) {
4214 ret = DIR_ITEM_MISMATCH;
4215 name_len = btrfs_dir_name_len(node, di);
4216 data_len = btrfs_dir_data_len(node, di);
4218 btrfs_dir_item_key_to_cpu(node, di, &location);
4219 if (location.objectid != ref_key->objectid ||
4220 location.type != BTRFS_INODE_ITEM_KEY ||
4221 location.offset != 0)
4224 filetype = btrfs_dir_type(node, di);
4225 if (imode_to_type(mode) != filetype)
4228 if (name_len <= BTRFS_NAME_LEN) {
4231 len = BTRFS_NAME_LEN;
4232 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4234 key->type == BTRFS_DIR_ITEM_KEY ?
4235 "DIR_ITEM" : "DIR_INDEX",
4236 key->objectid, key->offset, name_len);
4238 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239 if (len != namelen || strncmp(namebuf, name, len))
4245 len = sizeof(*di) + name_len + data_len;
4246 di = (struct btrfs_dir_item *)((char *)di + len);
4249 if (ret == DIR_ITEM_MISMATCH)
4251 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4253 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254 ref_key->objectid, ref_key->offset,
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, namelen, name,
4258 imode_to_type(mode));
4260 btrfs_release_path(&path);
4265 * Traverse the given INODE_REF and call find_dir_item() to find related
4266 * DIR_ITEM/DIR_INDEX.
4268 * @root: the root of the fs/file tree
4269 * @ref_key: the key of the INODE_REF
4270 * @refs: the count of INODE_REF
4271 * @mode: the st_mode of INODE_ITEM
4273 * Return 0 if no error occurred.
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276 struct extent_buffer *node, int slot, u64 *refs,
4279 struct btrfs_key key;
4280 struct btrfs_inode_ref *ref;
4281 char namebuf[BTRFS_NAME_LEN] = {0};
4289 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290 total = btrfs_item_size_nr(node, slot);
4293 /* Update inode ref count */
4296 index = btrfs_inode_ref_index(node, ref);
4297 name_len = btrfs_inode_ref_name_len(node, ref);
4298 if (name_len <= BTRFS_NAME_LEN) {
4301 len = BTRFS_NAME_LEN;
4302 warning("root %llu INODE_REF[%llu %llu] name too long",
4303 root->objectid, ref_key->objectid, ref_key->offset);
4306 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4308 /* Check root dir ref name */
4309 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311 root->objectid, ref_key->objectid, ref_key->offset,
4313 err |= ROOT_DIR_ERROR;
4316 /* Find related DIR_INDEX */
4317 key.objectid = ref_key->offset;
4318 key.type = BTRFS_DIR_INDEX_KEY;
4320 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4323 /* Find related dir_item */
4324 key.objectid = ref_key->offset;
4325 key.type = BTRFS_DIR_ITEM_KEY;
4326 key.offset = btrfs_name_hash(namebuf, len);
4327 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4330 len = sizeof(*ref) + name_len;
4331 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4340 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341 * DIR_ITEM/DIR_INDEX.
4343 * @root: the root of the fs/file tree
4344 * @ref_key: the key of the INODE_EXTREF
4345 * @refs: the count of INODE_EXTREF
4346 * @mode: the st_mode of INODE_ITEM
4348 * Return 0 if no error occurred.
4350 static int check_inode_extref(struct btrfs_root *root,
4351 struct btrfs_key *ref_key,
4352 struct extent_buffer *node, int slot, u64 *refs,
4355 struct btrfs_key key;
4356 struct btrfs_inode_extref *extref;
4357 char namebuf[BTRFS_NAME_LEN] = {0};
4367 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368 total = btrfs_item_size_nr(node, slot);
4371 /* update inode ref count */
4373 name_len = btrfs_inode_extref_name_len(node, extref);
4374 index = btrfs_inode_extref_index(node, extref);
4375 parent = btrfs_inode_extref_parent(node, extref);
4376 if (name_len <= BTRFS_NAME_LEN) {
4379 len = BTRFS_NAME_LEN;
4380 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381 root->objectid, ref_key->objectid, ref_key->offset);
4383 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4385 /* Check root dir ref name */
4386 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388 root->objectid, ref_key->objectid, ref_key->offset,
4390 err |= ROOT_DIR_ERROR;
4393 /* find related dir_index */
4394 key.objectid = parent;
4395 key.type = BTRFS_DIR_INDEX_KEY;
4397 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4400 /* find related dir_item */
4401 key.objectid = parent;
4402 key.type = BTRFS_DIR_ITEM_KEY;
4403 key.offset = btrfs_name_hash(namebuf, len);
4404 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4407 len = sizeof(*extref) + name_len;
4408 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4418 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419 * DIR_ITEM/DIR_INDEX match.
4421 * @root: the root of the fs/file tree
4422 * @key: the key of the INODE_REF/INODE_EXTREF
4423 * @name: the name in the INODE_REF/INODE_EXTREF
4424 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4425 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4427 * @ext_ref: the EXTENDED_IREF feature
4429 * Return 0 if no error occurred.
4430 * Return >0 for error bitmap
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433 char *name, int namelen, u64 index,
4434 unsigned int ext_ref)
4436 struct btrfs_path path;
4437 struct btrfs_inode_ref *ref;
4438 struct btrfs_inode_extref *extref;
4439 struct extent_buffer *node;
4440 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4451 btrfs_init_path(&path);
4452 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4454 ret = INODE_REF_MISSING;
4458 node = path.nodes[0];
4459 slot = path.slots[0];
4461 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462 total = btrfs_item_size_nr(node, slot);
4464 /* Iterate all entry of INODE_REF */
4465 while (cur < total) {
4466 ret = INODE_REF_MISSING;
4468 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469 ref_index = btrfs_inode_ref_index(node, ref);
4470 if (index != (u64)-1 && index != ref_index)
4473 if (ref_namelen <= BTRFS_NAME_LEN) {
4476 len = BTRFS_NAME_LEN;
4477 warning("root %llu INODE %s[%llu %llu] name too long",
4479 key->type == BTRFS_INODE_REF_KEY ?
4481 key->objectid, key->offset);
4483 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4486 if (len != namelen || strncmp(ref_namebuf, name, len))
4492 len = sizeof(*ref) + ref_namelen;
4493 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4498 /* Skip if not support EXTENDED_IREF feature */
4502 btrfs_release_path(&path);
4503 btrfs_init_path(&path);
4505 dir_id = key->offset;
4506 key->type = BTRFS_INODE_EXTREF_KEY;
4507 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4509 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4511 ret = INODE_REF_MISSING;
4515 node = path.nodes[0];
4516 slot = path.slots[0];
4518 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4520 total = btrfs_item_size_nr(node, slot);
4522 /* Iterate all entry of INODE_EXTREF */
4523 while (cur < total) {
4524 ret = INODE_REF_MISSING;
4526 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527 ref_index = btrfs_inode_extref_index(node, extref);
4528 parent = btrfs_inode_extref_parent(node, extref);
4529 if (index != (u64)-1 && index != ref_index)
4532 if (parent != dir_id)
4535 if (ref_namelen <= BTRFS_NAME_LEN) {
4538 len = BTRFS_NAME_LEN;
4539 warning("root %llu INODE %s[%llu %llu] name too long",
4541 key->type == BTRFS_INODE_REF_KEY ?
4543 key->objectid, key->offset);
4545 read_extent_buffer(node, ref_namebuf,
4546 (unsigned long)(extref + 1), len);
4548 if (len != namelen || strncmp(ref_namebuf, name, len))
4555 len = sizeof(*extref) + ref_namelen;
4556 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4561 btrfs_release_path(&path);
4566 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4569 * @root: the root of the fs/file tree
4570 * @key: the key of the INODE_REF/INODE_EXTREF
4571 * @size: the st_size of the INODE_ITEM
4572 * @ext_ref: the EXTENDED_IREF feature
4574 * Return 0 if no error occurred.
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577 struct extent_buffer *node, int slot, u64 *size,
4578 unsigned int ext_ref)
4580 struct btrfs_dir_item *di;
4581 struct btrfs_inode_item *ii;
4582 struct btrfs_path path;
4583 struct btrfs_key location;
4584 char namebuf[BTRFS_NAME_LEN] = {0};
4597 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598 * ignore index check.
4600 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4602 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603 total = btrfs_item_size_nr(node, slot);
4605 while (cur < total) {
4606 data_len = btrfs_dir_data_len(node, di);
4608 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610 "DIR_ITEM" : "DIR_INDEX",
4611 key->objectid, key->offset, data_len);
4613 name_len = btrfs_dir_name_len(node, di);
4614 if (name_len <= BTRFS_NAME_LEN) {
4617 len = BTRFS_NAME_LEN;
4618 warning("root %llu %s[%llu %llu] name too long",
4620 key->type == BTRFS_DIR_ITEM_KEY ?
4621 "DIR_ITEM" : "DIR_INDEX",
4622 key->objectid, key->offset);
4624 (*size) += name_len;
4626 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627 filetype = btrfs_dir_type(node, di);
4629 btrfs_init_path(&path);
4630 btrfs_dir_item_key_to_cpu(node, di, &location);
4632 /* Ignore related ROOT_ITEM check */
4633 if (location.type == BTRFS_ROOT_ITEM_KEY)
4636 /* Check relative INODE_ITEM(existence/filetype) */
4637 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4639 err |= INODE_ITEM_MISSING;
4640 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643 key->offset, location.objectid, name_len,
4648 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649 struct btrfs_inode_item);
4650 mode = btrfs_inode_mode(path.nodes[0], ii);
4652 if (imode_to_type(mode) != filetype) {
4653 err |= INODE_ITEM_MISMATCH;
4654 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657 key->offset, name_len, namebuf, filetype);
4660 /* Check relative INODE_REF/INODE_EXTREF */
4661 location.type = BTRFS_INODE_REF_KEY;
4662 location.offset = key->objectid;
4663 ret = find_inode_ref(root, &location, namebuf, len,
4666 if (ret & INODE_REF_MISSING)
4667 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670 key->offset, name_len, namebuf, filetype);
4673 btrfs_release_path(&path);
4674 len = sizeof(*di) + name_len + data_len;
4675 di = (struct btrfs_dir_item *)((char *)di + len);
4678 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680 root->objectid, key->objectid, key->offset);
4689 * Check file extent datasum/hole, update the size of the file extents,
4690 * check and update the last offset of the file extent.
4692 * @root: the root of fs/file tree.
4693 * @fkey: the key of the file extent.
4694 * @nodatasum: INODE_NODATASUM feature.
4695 * @size: the sum of all EXTENT_DATA items size for this inode.
4696 * @end: the offset of the last extent.
4698 * Return 0 if no error occurred.
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701 struct extent_buffer *node, int slot,
4702 unsigned int nodatasum, u64 *size, u64 *end)
4704 struct btrfs_file_extent_item *fi;
4707 u64 extent_num_bytes;
4709 u64 csum_found; /* In byte size, sectorsize aligned */
4710 u64 search_start; /* Logical range start we search for csum */
4711 u64 search_len; /* Logical range len we search for csum */
4712 unsigned int extent_type;
4713 unsigned int is_hole;
4718 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4720 /* Check inline extent */
4721 extent_type = btrfs_file_extent_type(node, fi);
4722 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723 struct btrfs_item *e = btrfs_item_nr(slot);
4724 u32 item_inline_len;
4726 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728 compressed = btrfs_file_extent_compression(node, fi);
4729 if (extent_num_bytes == 0) {
4731 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732 root->objectid, fkey->objectid, fkey->offset);
4733 err |= FILE_EXTENT_ERROR;
4735 if (!compressed && extent_num_bytes != item_inline_len) {
4737 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738 root->objectid, fkey->objectid, fkey->offset,
4739 extent_num_bytes, item_inline_len);
4740 err |= FILE_EXTENT_ERROR;
4742 *size += extent_num_bytes;
4746 /* Check extent type */
4747 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749 err |= FILE_EXTENT_ERROR;
4750 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751 root->objectid, fkey->objectid, fkey->offset);
4755 /* Check REG_EXTENT/PREALLOC_EXTENT */
4756 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759 extent_offset = btrfs_file_extent_offset(node, fi);
4760 compressed = btrfs_file_extent_compression(node, fi);
4761 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4764 * Check EXTENT_DATA csum
4766 * For plain (uncompressed) extent, we should only check the range
4767 * we're referring to, as it's possible that part of prealloc extent
4768 * has been written, and has csum:
4770 * |<--- Original large preallocated extent A ---->|
4771 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4774 * For compressed extent, we should check the whole range.
4777 search_start = disk_bytenr + extent_offset;
4778 search_len = extent_num_bytes;
4780 search_start = disk_bytenr;
4781 search_len = disk_num_bytes;
4783 ret = count_csum_range(root, search_start, search_len, &csum_found);
4784 if (csum_found > 0 && nodatasum) {
4785 err |= ODD_CSUM_ITEM;
4786 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787 root->objectid, fkey->objectid, fkey->offset);
4788 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789 !is_hole && (ret < 0 || csum_found < search_len)) {
4790 err |= CSUM_ITEM_MISSING;
4791 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792 root->objectid, fkey->objectid, fkey->offset,
4793 csum_found, search_len);
4794 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795 err |= ODD_CSUM_ITEM;
4796 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797 root->objectid, fkey->objectid, fkey->offset, csum_found);
4800 /* Check EXTENT_DATA hole */
4801 if (no_holes && is_hole) {
4802 err |= FILE_EXTENT_ERROR;
4803 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804 root->objectid, fkey->objectid, fkey->offset);
4805 } else if (!no_holes && *end != fkey->offset) {
4806 err |= FILE_EXTENT_ERROR;
4807 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808 root->objectid, fkey->objectid, fkey->offset);
4811 *end += extent_num_bytes;
4813 *size += extent_num_bytes;
4819 * Check INODE_ITEM and related ITEMs (the same inode number)
4820 * 1. check link count
4821 * 2. check inode ref/extref
4822 * 3. check dir item/index
4824 * @ext_ref: the EXTENDED_IREF feature
4826 * Return 0 if no error occurred.
4827 * Return >0 for error or hit the traversal is done(by error bitmap)
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830 unsigned int ext_ref)
4832 struct extent_buffer *node;
4833 struct btrfs_inode_item *ii;
4834 struct btrfs_key key;
4843 u64 extent_size = 0;
4845 unsigned int nodatasum;
4850 node = path->nodes[0];
4851 slot = path->slots[0];
4853 btrfs_item_key_to_cpu(node, &key, slot);
4854 inode_id = key.objectid;
4856 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857 ret = btrfs_next_item(root, path);
4863 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864 isize = btrfs_inode_size(node, ii);
4865 nbytes = btrfs_inode_nbytes(node, ii);
4866 mode = btrfs_inode_mode(node, ii);
4867 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868 nlink = btrfs_inode_nlink(node, ii);
4869 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4872 ret = btrfs_next_item(root, path);
4874 /* out will fill 'err' rusing current statistics */
4876 } else if (ret > 0) {
4881 node = path->nodes[0];
4882 slot = path->slots[0];
4883 btrfs_item_key_to_cpu(node, &key, slot);
4884 if (key.objectid != inode_id)
4888 case BTRFS_INODE_REF_KEY:
4889 ret = check_inode_ref(root, &key, node, slot, &refs,
4893 case BTRFS_INODE_EXTREF_KEY:
4894 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896 root->objectid, key.objectid,
4898 ret = check_inode_extref(root, &key, node, slot, &refs,
4902 case BTRFS_DIR_ITEM_KEY:
4903 case BTRFS_DIR_INDEX_KEY:
4905 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906 root->objectid, inode_id,
4907 imode_to_type(mode), key.objectid,
4910 ret = check_dir_item(root, &key, node, slot, &size,
4914 case BTRFS_EXTENT_DATA_KEY:
4916 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917 root->objectid, inode_id, key.objectid,
4920 ret = check_file_extent(root, &key, node, slot,
4921 nodatasum, &extent_size,
4925 case BTRFS_XATTR_ITEM_KEY:
4928 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929 key.objectid, key.type, key.offset);
4934 /* verify INODE_ITEM nlink/isize/nbytes */
4937 err |= LINK_COUNT_ERROR;
4938 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939 root->objectid, inode_id, nlink);
4943 * Just a warning, as dir inode nbytes is just an
4944 * instructive value.
4946 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948 root->objectid, inode_id, root->nodesize);
4951 if (isize != size) {
4953 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954 root->objectid, inode_id, isize, size);
4957 if (nlink != refs) {
4958 err |= LINK_COUNT_ERROR;
4959 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960 root->objectid, inode_id, nlink, refs);
4961 } else if (!nlink) {
4965 if (!nbytes && !no_holes && extent_end < isize) {
4966 err |= NBYTES_ERROR;
4967 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968 root->objectid, inode_id, isize);
4971 if (nbytes != extent_size) {
4972 err |= NBYTES_ERROR;
4973 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974 root->objectid, inode_id, nbytes, extent_size);
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4983 struct btrfs_path path;
4984 struct btrfs_key key;
4988 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989 key.type = BTRFS_INODE_ITEM_KEY;
4992 /* For root being dropped, we don't need to check first inode */
4993 if (btrfs_root_refs(&root->root_item) == 0 &&
4994 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4998 btrfs_init_path(&path);
5000 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5005 err |= INODE_ITEM_MISSING;
5006 error("first inode item of root %llu is missing",
5010 err |= check_inode_item(root, &path, ext_ref);
5015 btrfs_release_path(&path);
5020 * Iterate all item on the tree and call check_inode_item() to check.
5022 * @root: the root of the tree to be checked.
5023 * @ext_ref: the EXTENDED_IREF feature
5025 * Return 0 if no error found.
5026 * Return <0 for error.
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5030 struct btrfs_path path;
5031 struct node_refs nrefs;
5032 struct btrfs_root_item *root_item = &root->root_item;
5038 * We need to manually check the first inode item(256)
5039 * As the following traversal function will only start from
5040 * the first inode item in the leaf, if inode item(256) is missing
5041 * we will just skip it forever.
5043 ret = check_fs_first_inode(root, ext_ref);
5047 memset(&nrefs, 0, sizeof(nrefs));
5048 level = btrfs_header_level(root->node);
5049 btrfs_init_path(&path);
5051 if (btrfs_root_refs(root_item) > 0 ||
5052 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053 path.nodes[level] = root->node;
5054 path.slots[level] = 0;
5055 extent_buffer_get(root->node);
5057 struct btrfs_key key;
5059 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060 level = root_item->drop_level;
5061 path.lowest_level = level;
5062 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5069 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5072 /* if ret is negative, walk shall stop */
5078 ret = walk_up_tree_v2(root, &path, &level);
5080 /* Normal exit, reset ret to err */
5087 btrfs_release_path(&path);
5092 * Find the relative ref for root_ref and root_backref.
5094 * @root: the root of the root tree.
5095 * @ref_key: the key of the root ref.
5097 * Return 0 if no error occurred.
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100 struct extent_buffer *node, int slot)
5102 struct btrfs_path path;
5103 struct btrfs_key key;
5104 struct btrfs_root_ref *ref;
5105 struct btrfs_root_ref *backref;
5106 char ref_name[BTRFS_NAME_LEN] = {0};
5107 char backref_name[BTRFS_NAME_LEN] = {0};
5113 u32 backref_namelen;
5118 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119 ref_dirid = btrfs_root_ref_dirid(node, ref);
5120 ref_seq = btrfs_root_ref_sequence(node, ref);
5121 ref_namelen = btrfs_root_ref_name_len(node, ref);
5123 if (ref_namelen <= BTRFS_NAME_LEN) {
5126 len = BTRFS_NAME_LEN;
5127 warning("%s[%llu %llu] ref_name too long",
5128 ref_key->type == BTRFS_ROOT_REF_KEY ?
5129 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5132 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5134 /* Find relative root_ref */
5135 key.objectid = ref_key->offset;
5136 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137 key.offset = ref_key->objectid;
5139 btrfs_init_path(&path);
5140 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5142 err |= ROOT_REF_MISSING;
5143 error("%s[%llu %llu] couldn't find relative ref",
5144 ref_key->type == BTRFS_ROOT_REF_KEY ?
5145 "ROOT_REF" : "ROOT_BACKREF",
5146 ref_key->objectid, ref_key->offset);
5150 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151 struct btrfs_root_ref);
5152 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5156 if (backref_namelen <= BTRFS_NAME_LEN) {
5157 len = backref_namelen;
5159 len = BTRFS_NAME_LEN;
5160 warning("%s[%llu %llu] ref_name too long",
5161 key.type == BTRFS_ROOT_REF_KEY ?
5162 "ROOT_REF" : "ROOT_BACKREF",
5163 key.objectid, key.offset);
5165 read_extent_buffer(path.nodes[0], backref_name,
5166 (unsigned long)(backref + 1), len);
5168 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169 ref_namelen != backref_namelen ||
5170 strncmp(ref_name, backref_name, len)) {
5171 err |= ROOT_REF_MISMATCH;
5172 error("%s[%llu %llu] mismatch relative ref",
5173 ref_key->type == BTRFS_ROOT_REF_KEY ?
5174 "ROOT_REF" : "ROOT_BACKREF",
5175 ref_key->objectid, ref_key->offset);
5178 btrfs_release_path(&path);
5183 * Check all fs/file tree in low_memory mode.
5185 * 1. for fs tree root item, call check_fs_root_v2()
5186 * 2. for fs tree root ref/backref, call check_root_ref()
5188 * Return 0 if no error occurred.
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5192 struct btrfs_root *tree_root = fs_info->tree_root;
5193 struct btrfs_root *cur_root = NULL;
5194 struct btrfs_path path;
5195 struct btrfs_key key;
5196 struct extent_buffer *node;
5197 unsigned int ext_ref;
5202 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5204 btrfs_init_path(&path);
5205 key.objectid = BTRFS_FS_TREE_OBJECTID;
5207 key.type = BTRFS_ROOT_ITEM_KEY;
5209 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5213 } else if (ret > 0) {
5219 node = path.nodes[0];
5220 slot = path.slots[0];
5221 btrfs_item_key_to_cpu(node, &key, slot);
5222 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5224 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225 fs_root_objectid(key.objectid)) {
5226 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5230 key.offset = (u64)-1;
5231 cur_root = btrfs_read_fs_root(fs_info, &key);
5234 if (IS_ERR(cur_root)) {
5235 error("Fail to read fs/subvol tree: %lld",
5241 ret = check_fs_root_v2(cur_root, ext_ref);
5244 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245 btrfs_free_fs_root(cur_root);
5246 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248 ret = check_root_ref(tree_root, &key, node, slot);
5252 ret = btrfs_next_item(tree_root, &path);
5262 btrfs_release_path(&path);
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5268 struct list_head *cur = rec->backrefs.next;
5269 struct extent_backref *back;
5270 struct tree_backref *tback;
5271 struct data_backref *dback;
5275 while(cur != &rec->backrefs) {
5276 back = to_extent_backref(cur);
5278 if (!back->found_extent_tree) {
5282 if (back->is_data) {
5283 dback = to_data_backref(back);
5284 fprintf(stderr, "Backref %llu %s %llu"
5285 " owner %llu offset %llu num_refs %lu"
5286 " not found in extent tree\n",
5287 (unsigned long long)rec->start,
5288 back->full_backref ?
5290 back->full_backref ?
5291 (unsigned long long)dback->parent:
5292 (unsigned long long)dback->root,
5293 (unsigned long long)dback->owner,
5294 (unsigned long long)dback->offset,
5295 (unsigned long)dback->num_refs);
5297 tback = to_tree_backref(back);
5298 fprintf(stderr, "Backref %llu parent %llu"
5299 " root %llu not found in extent tree\n",
5300 (unsigned long long)rec->start,
5301 (unsigned long long)tback->parent,
5302 (unsigned long long)tback->root);
5305 if (!back->is_data && !back->found_ref) {
5309 tback = to_tree_backref(back);
5310 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311 (unsigned long long)rec->start,
5312 back->full_backref ? "parent" : "root",
5313 back->full_backref ?
5314 (unsigned long long)tback->parent :
5315 (unsigned long long)tback->root, back);
5317 if (back->is_data) {
5318 dback = to_data_backref(back);
5319 if (dback->found_ref != dback->num_refs) {
5323 fprintf(stderr, "Incorrect local backref count"
5324 " on %llu %s %llu owner %llu"
5325 " offset %llu found %u wanted %u back %p\n",
5326 (unsigned long long)rec->start,
5327 back->full_backref ?
5329 back->full_backref ?
5330 (unsigned long long)dback->parent:
5331 (unsigned long long)dback->root,
5332 (unsigned long long)dback->owner,
5333 (unsigned long long)dback->offset,
5334 dback->found_ref, dback->num_refs, back);
5336 if (dback->disk_bytenr != rec->start) {
5340 fprintf(stderr, "Backref disk bytenr does not"
5341 " match extent record, bytenr=%llu, "
5342 "ref bytenr=%llu\n",
5343 (unsigned long long)rec->start,
5344 (unsigned long long)dback->disk_bytenr);
5347 if (dback->bytes != rec->nr) {
5351 fprintf(stderr, "Backref bytes do not match "
5352 "extent backref, bytenr=%llu, ref "
5353 "bytes=%llu, backref bytes=%llu\n",
5354 (unsigned long long)rec->start,
5355 (unsigned long long)rec->nr,
5356 (unsigned long long)dback->bytes);
5359 if (!back->is_data) {
5362 dback = to_data_backref(back);
5363 found += dback->found_ref;
5366 if (found != rec->refs) {
5370 fprintf(stderr, "Incorrect global backref count "
5371 "on %llu found %llu wanted %llu\n",
5372 (unsigned long long)rec->start,
5373 (unsigned long long)found,
5374 (unsigned long long)rec->refs);
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5382 struct extent_backref *back;
5383 struct list_head *cur;
5384 while (!list_empty(&rec->backrefs)) {
5385 cur = rec->backrefs.next;
5386 back = to_extent_backref(cur);
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5395 struct cache_extent *cache;
5396 struct extent_record *rec;
5399 cache = first_cache_extent(extent_cache);
5402 rec = container_of(cache, struct extent_record, cache);
5403 remove_cache_extent(extent_cache, cache);
5404 free_all_extent_backrefs(rec);
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410 struct extent_record *rec)
5412 if (rec->content_checked && rec->owner_ref_checked &&
5413 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415 !rec->bad_full_backref && !rec->crossing_stripes &&
5416 !rec->wrong_chunk_type) {
5417 remove_cache_extent(extent_cache, &rec->cache);
5418 free_all_extent_backrefs(rec);
5419 list_del_init(&rec->list);
5425 static int check_owner_ref(struct btrfs_root *root,
5426 struct extent_record *rec,
5427 struct extent_buffer *buf)
5429 struct extent_backref *node;
5430 struct tree_backref *back;
5431 struct btrfs_root *ref_root;
5432 struct btrfs_key key;
5433 struct btrfs_path path;
5434 struct extent_buffer *parent;
5439 list_for_each_entry(node, &rec->backrefs, list) {
5442 if (!node->found_ref)
5444 if (node->full_backref)
5446 back = to_tree_backref(node);
5447 if (btrfs_header_owner(buf) == back->root)
5450 BUG_ON(rec->is_root);
5452 /* try to find the block by search corresponding fs tree */
5453 key.objectid = btrfs_header_owner(buf);
5454 key.type = BTRFS_ROOT_ITEM_KEY;
5455 key.offset = (u64)-1;
5457 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458 if (IS_ERR(ref_root))
5461 level = btrfs_header_level(buf);
5463 btrfs_item_key_to_cpu(buf, &key, 0);
5465 btrfs_node_key_to_cpu(buf, &key, 0);
5467 btrfs_init_path(&path);
5468 path.lowest_level = level + 1;
5469 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5473 parent = path.nodes[level + 1];
5474 if (parent && buf->start == btrfs_node_blockptr(parent,
5475 path.slots[level + 1]))
5478 btrfs_release_path(&path);
5479 return found ? 0 : 1;
5482 static int is_extent_tree_record(struct extent_record *rec)
5484 struct list_head *cur = rec->backrefs.next;
5485 struct extent_backref *node;
5486 struct tree_backref *back;
5489 while(cur != &rec->backrefs) {
5490 node = to_extent_backref(cur);
5494 back = to_tree_backref(node);
5495 if (node->full_backref)
5497 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505 struct cache_tree *extent_cache,
5508 struct extent_record *rec;
5509 struct cache_extent *cache;
5510 struct btrfs_key key;
5512 cache = lookup_cache_extent(extent_cache, start, len);
5516 rec = container_of(cache, struct extent_record, cache);
5517 if (!is_extent_tree_record(rec))
5520 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525 struct extent_buffer *buf, int slot)
5527 if (btrfs_header_level(buf)) {
5528 struct btrfs_key_ptr ptr1, ptr2;
5530 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531 sizeof(struct btrfs_key_ptr));
5532 read_extent_buffer(buf, &ptr2,
5533 btrfs_node_key_ptr_offset(slot + 1),
5534 sizeof(struct btrfs_key_ptr));
5535 write_extent_buffer(buf, &ptr1,
5536 btrfs_node_key_ptr_offset(slot + 1),
5537 sizeof(struct btrfs_key_ptr));
5538 write_extent_buffer(buf, &ptr2,
5539 btrfs_node_key_ptr_offset(slot),
5540 sizeof(struct btrfs_key_ptr));
5542 struct btrfs_disk_key key;
5543 btrfs_node_key(buf, &key, 0);
5544 btrfs_fixup_low_keys(root, path, &key,
5545 btrfs_header_level(buf) + 1);
5548 struct btrfs_item *item1, *item2;
5549 struct btrfs_key k1, k2;
5550 char *item1_data, *item2_data;
5551 u32 item1_offset, item2_offset, item1_size, item2_size;
5553 item1 = btrfs_item_nr(slot);
5554 item2 = btrfs_item_nr(slot + 1);
5555 btrfs_item_key_to_cpu(buf, &k1, slot);
5556 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557 item1_offset = btrfs_item_offset(buf, item1);
5558 item2_offset = btrfs_item_offset(buf, item2);
5559 item1_size = btrfs_item_size(buf, item1);
5560 item2_size = btrfs_item_size(buf, item2);
5562 item1_data = malloc(item1_size);
5565 item2_data = malloc(item2_size);
5571 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5574 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5579 btrfs_set_item_offset(buf, item1, item2_offset);
5580 btrfs_set_item_offset(buf, item2, item1_offset);
5581 btrfs_set_item_size(buf, item1, item2_size);
5582 btrfs_set_item_size(buf, item2, item1_size);
5584 path->slots[0] = slot;
5585 btrfs_set_item_key_unsafe(root, path, &k2);
5586 path->slots[0] = slot + 1;
5587 btrfs_set_item_key_unsafe(root, path, &k1);
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5594 struct extent_buffer *buf;
5595 struct btrfs_key k1, k2;
5597 int level = path->lowest_level;
5600 buf = path->nodes[level];
5601 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5603 btrfs_node_key_to_cpu(buf, &k1, i);
5604 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5606 btrfs_item_key_to_cpu(buf, &k1, i);
5607 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5609 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5611 ret = swap_values(root, path, buf, i);
5614 btrfs_mark_buffer_dirty(buf);
5620 static int delete_bogus_item(struct btrfs_root *root,
5621 struct btrfs_path *path,
5622 struct extent_buffer *buf, int slot)
5624 struct btrfs_key key;
5625 int nritems = btrfs_header_nritems(buf);
5627 btrfs_item_key_to_cpu(buf, &key, slot);
5629 /* These are all the keys we can deal with missing. */
5630 if (key.type != BTRFS_DIR_INDEX_KEY &&
5631 key.type != BTRFS_EXTENT_ITEM_KEY &&
5632 key.type != BTRFS_METADATA_ITEM_KEY &&
5633 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5637 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638 (unsigned long long)key.objectid, key.type,
5639 (unsigned long long)key.offset, slot, buf->start);
5640 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641 btrfs_item_nr_offset(slot + 1),
5642 sizeof(struct btrfs_item) *
5643 (nritems - slot - 1));
5644 btrfs_set_header_nritems(buf, nritems - 1);
5646 struct btrfs_disk_key disk_key;
5648 btrfs_item_key(buf, &disk_key, 0);
5649 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5651 btrfs_mark_buffer_dirty(buf);
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5657 struct extent_buffer *buf;
5661 /* We should only get this for leaves */
5662 BUG_ON(path->lowest_level);
5663 buf = path->nodes[0];
5665 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666 unsigned int shift = 0, offset;
5668 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669 BTRFS_LEAF_DATA_SIZE(root)) {
5670 if (btrfs_item_end_nr(buf, i) >
5671 BTRFS_LEAF_DATA_SIZE(root)) {
5672 ret = delete_bogus_item(root, path, buf, i);
5675 fprintf(stderr, "item is off the end of the "
5676 "leaf, can't fix\n");
5680 shift = BTRFS_LEAF_DATA_SIZE(root) -
5681 btrfs_item_end_nr(buf, i);
5682 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683 btrfs_item_offset_nr(buf, i - 1)) {
5684 if (btrfs_item_end_nr(buf, i) >
5685 btrfs_item_offset_nr(buf, i - 1)) {
5686 ret = delete_bogus_item(root, path, buf, i);
5689 fprintf(stderr, "items overlap, can't fix\n");
5693 shift = btrfs_item_offset_nr(buf, i - 1) -
5694 btrfs_item_end_nr(buf, i);
5699 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700 i, shift, (unsigned long long)buf->start);
5701 offset = btrfs_item_offset_nr(buf, i);
5702 memmove_extent_buffer(buf,
5703 btrfs_leaf_data(buf) + offset + shift,
5704 btrfs_leaf_data(buf) + offset,
5705 btrfs_item_size_nr(buf, i));
5706 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5708 btrfs_mark_buffer_dirty(buf);
5712 * We may have moved things, in which case we want to exit so we don't
5713 * write those changes out. Once we have proper abort functionality in
5714 * progs this can be changed to something nicer.
5721 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5722 * then just return -EIO.
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725 struct extent_buffer *buf,
5726 enum btrfs_tree_block_status status)
5728 struct btrfs_trans_handle *trans;
5729 struct ulist *roots;
5730 struct ulist_node *node;
5731 struct btrfs_root *search_root;
5732 struct btrfs_path path;
5733 struct ulist_iterator iter;
5734 struct btrfs_key root_key, key;
5737 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5741 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5745 btrfs_init_path(&path);
5746 ULIST_ITER_INIT(&iter);
5747 while ((node = ulist_next(roots, &iter))) {
5748 root_key.objectid = node->val;
5749 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750 root_key.offset = (u64)-1;
5752 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5759 trans = btrfs_start_transaction(search_root, 0);
5760 if (IS_ERR(trans)) {
5761 ret = PTR_ERR(trans);
5765 path.lowest_level = btrfs_header_level(buf);
5766 path.skip_check_block = 1;
5767 if (path.lowest_level)
5768 btrfs_node_key_to_cpu(buf, &key, 0);
5770 btrfs_item_key_to_cpu(buf, &key, 0);
5771 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5774 btrfs_commit_transaction(trans, search_root);
5777 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778 ret = fix_key_order(search_root, &path);
5779 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780 ret = fix_item_offset(search_root, &path);
5782 btrfs_commit_transaction(trans, search_root);
5785 btrfs_release_path(&path);
5786 btrfs_commit_transaction(trans, search_root);
5789 btrfs_release_path(&path);
5793 static int check_block(struct btrfs_root *root,
5794 struct cache_tree *extent_cache,
5795 struct extent_buffer *buf, u64 flags)
5797 struct extent_record *rec;
5798 struct cache_extent *cache;
5799 struct btrfs_key key;
5800 enum btrfs_tree_block_status status;
5804 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5807 rec = container_of(cache, struct extent_record, cache);
5808 rec->generation = btrfs_header_generation(buf);
5810 level = btrfs_header_level(buf);
5811 if (btrfs_header_nritems(buf) > 0) {
5814 btrfs_item_key_to_cpu(buf, &key, 0);
5816 btrfs_node_key_to_cpu(buf, &key, 0);
5818 rec->info_objectid = key.objectid;
5820 rec->info_level = level;
5822 if (btrfs_is_leaf(buf))
5823 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5825 status = btrfs_check_node(root, &rec->parent_key, buf);
5827 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5829 status = try_to_fix_bad_block(root, buf, status);
5830 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5832 fprintf(stderr, "bad block %llu\n",
5833 (unsigned long long)buf->start);
5836 * Signal to callers we need to start the scan over
5837 * again since we'll have cowed blocks.
5842 rec->content_checked = 1;
5843 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844 rec->owner_ref_checked = 1;
5846 ret = check_owner_ref(root, rec, buf);
5848 rec->owner_ref_checked = 1;
5852 maybe_free_extent_rec(extent_cache, rec);
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857 u64 parent, u64 root)
5859 struct list_head *cur = rec->backrefs.next;
5860 struct extent_backref *node;
5861 struct tree_backref *back;
5863 while(cur != &rec->backrefs) {
5864 node = to_extent_backref(cur);
5868 back = to_tree_backref(node);
5870 if (!node->full_backref)
5872 if (parent == back->parent)
5875 if (node->full_backref)
5877 if (back->root == root)
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885 u64 parent, u64 root)
5887 struct tree_backref *ref = malloc(sizeof(*ref));
5891 memset(&ref->node, 0, sizeof(ref->node));
5893 ref->parent = parent;
5894 ref->node.full_backref = 1;
5897 ref->node.full_backref = 0;
5899 list_add_tail(&ref->node.list, &rec->backrefs);
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905 u64 parent, u64 root,
5906 u64 owner, u64 offset,
5908 u64 disk_bytenr, u64 bytes)
5910 struct list_head *cur = rec->backrefs.next;
5911 struct extent_backref *node;
5912 struct data_backref *back;
5914 while(cur != &rec->backrefs) {
5915 node = to_extent_backref(cur);
5919 back = to_data_backref(node);
5921 if (!node->full_backref)
5923 if (parent == back->parent)
5926 if (node->full_backref)
5928 if (back->root == root && back->owner == owner &&
5929 back->offset == offset) {
5930 if (found_ref && node->found_ref &&
5931 (back->bytes != bytes ||
5932 back->disk_bytenr != disk_bytenr))
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942 u64 parent, u64 root,
5943 u64 owner, u64 offset,
5946 struct data_backref *ref = malloc(sizeof(*ref));
5950 memset(&ref->node, 0, sizeof(ref->node));
5951 ref->node.is_data = 1;
5954 ref->parent = parent;
5957 ref->node.full_backref = 1;
5961 ref->offset = offset;
5962 ref->node.full_backref = 0;
5964 ref->bytes = max_size;
5967 list_add_tail(&ref->node.list, &rec->backrefs);
5968 if (max_size > rec->max_size)
5969 rec->max_size = max_size;
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5976 struct btrfs_block_group_cache *bg_cache;
5978 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5982 /* data extent, check chunk directly*/
5983 if (!rec->metadata) {
5984 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985 rec->wrong_chunk_type = 1;
5989 /* metadata extent, check the obvious case first */
5990 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991 BTRFS_BLOCK_GROUP_METADATA))) {
5992 rec->wrong_chunk_type = 1;
5997 * Check SYSTEM extent, as it's also marked as metadata, we can only
5998 * make sure it's a SYSTEM extent by its backref
6000 if (!list_empty(&rec->backrefs)) {
6001 struct extent_backref *node;
6002 struct tree_backref *tback;
6005 node = to_extent_backref(rec->backrefs.next);
6006 if (node->is_data) {
6007 /* tree block shouldn't have data backref */
6008 rec->wrong_chunk_type = 1;
6011 tback = container_of(node, struct tree_backref, node);
6013 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6016 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017 if (!(bg_cache->flags & bg_type))
6018 rec->wrong_chunk_type = 1;
6023 * Allocate a new extent record, fill default values from @tmpl and insert int
6024 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025 * the cache, otherwise it fails.
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028 struct extent_record *tmpl)
6030 struct extent_record *rec;
6033 BUG_ON(tmpl->max_size == 0);
6034 rec = malloc(sizeof(*rec));
6037 rec->start = tmpl->start;
6038 rec->max_size = tmpl->max_size;
6039 rec->nr = max(tmpl->nr, tmpl->max_size);
6040 rec->found_rec = tmpl->found_rec;
6041 rec->content_checked = tmpl->content_checked;
6042 rec->owner_ref_checked = tmpl->owner_ref_checked;
6043 rec->num_duplicates = 0;
6044 rec->metadata = tmpl->metadata;
6045 rec->flag_block_full_backref = FLAG_UNSET;
6046 rec->bad_full_backref = 0;
6047 rec->crossing_stripes = 0;
6048 rec->wrong_chunk_type = 0;
6049 rec->is_root = tmpl->is_root;
6050 rec->refs = tmpl->refs;
6051 rec->extent_item_refs = tmpl->extent_item_refs;
6052 rec->parent_generation = tmpl->parent_generation;
6053 INIT_LIST_HEAD(&rec->backrefs);
6054 INIT_LIST_HEAD(&rec->dups);
6055 INIT_LIST_HEAD(&rec->list);
6056 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6057 rec->cache.start = tmpl->start;
6058 rec->cache.size = tmpl->nr;
6059 ret = insert_cache_extent(extent_cache, &rec->cache);
6064 bytes_used += rec->nr;
6067 rec->crossing_stripes = check_crossing_stripes(global_info,
6068 rec->start, global_info->tree_root->nodesize);
6069 check_extent_type(rec);
6074 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6076 * - refs - if found, increase refs
6077 * - is_root - if found, set
6078 * - content_checked - if found, set
6079 * - owner_ref_checked - if found, set
6081 * If not found, create a new one, initialize and insert.
6083 static int add_extent_rec(struct cache_tree *extent_cache,
6084 struct extent_record *tmpl)
6086 struct extent_record *rec;
6087 struct cache_extent *cache;
6091 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6093 rec = container_of(cache, struct extent_record, cache);
6097 rec->nr = max(tmpl->nr, tmpl->max_size);
6100 * We need to make sure to reset nr to whatever the extent
6101 * record says was the real size, this way we can compare it to
6104 if (tmpl->found_rec) {
6105 if (tmpl->start != rec->start || rec->found_rec) {
6106 struct extent_record *tmp;
6109 if (list_empty(&rec->list))
6110 list_add_tail(&rec->list,
6111 &duplicate_extents);
6114 * We have to do this song and dance in case we
6115 * find an extent record that falls inside of
6116 * our current extent record but does not have
6117 * the same objectid.
6119 tmp = malloc(sizeof(*tmp));
6122 tmp->start = tmpl->start;
6123 tmp->max_size = tmpl->max_size;
6126 tmp->metadata = tmpl->metadata;
6127 tmp->extent_item_refs = tmpl->extent_item_refs;
6128 INIT_LIST_HEAD(&tmp->list);
6129 list_add_tail(&tmp->list, &rec->dups);
6130 rec->num_duplicates++;
6137 if (tmpl->extent_item_refs && !dup) {
6138 if (rec->extent_item_refs) {
6139 fprintf(stderr, "block %llu rec "
6140 "extent_item_refs %llu, passed %llu\n",
6141 (unsigned long long)tmpl->start,
6142 (unsigned long long)
6143 rec->extent_item_refs,
6144 (unsigned long long)tmpl->extent_item_refs);
6146 rec->extent_item_refs = tmpl->extent_item_refs;
6150 if (tmpl->content_checked)
6151 rec->content_checked = 1;
6152 if (tmpl->owner_ref_checked)
6153 rec->owner_ref_checked = 1;
6154 memcpy(&rec->parent_key, &tmpl->parent_key,
6155 sizeof(tmpl->parent_key));
6156 if (tmpl->parent_generation)
6157 rec->parent_generation = tmpl->parent_generation;
6158 if (rec->max_size < tmpl->max_size)
6159 rec->max_size = tmpl->max_size;
6162 * A metadata extent can't cross stripe_len boundary, otherwise
6163 * kernel scrub won't be able to handle it.
6164 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6168 rec->crossing_stripes = check_crossing_stripes(
6169 global_info, rec->start,
6170 global_info->tree_root->nodesize);
6171 check_extent_type(rec);
6172 maybe_free_extent_rec(extent_cache, rec);
6176 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6181 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6182 u64 parent, u64 root, int found_ref)
6184 struct extent_record *rec;
6185 struct tree_backref *back;
6186 struct cache_extent *cache;
6189 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6191 struct extent_record tmpl;
6193 memset(&tmpl, 0, sizeof(tmpl));
6194 tmpl.start = bytenr;
6198 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6202 /* really a bug in cache_extent implement now */
6203 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6208 rec = container_of(cache, struct extent_record, cache);
6209 if (rec->start != bytenr) {
6211 * Several cause, from unaligned bytenr to over lapping extents
6216 back = find_tree_backref(rec, parent, root);
6218 back = alloc_tree_backref(rec, parent, root);
6224 if (back->node.found_ref) {
6225 fprintf(stderr, "Extent back ref already exists "
6226 "for %llu parent %llu root %llu \n",
6227 (unsigned long long)bytenr,
6228 (unsigned long long)parent,
6229 (unsigned long long)root);
6231 back->node.found_ref = 1;
6233 if (back->node.found_extent_tree) {
6234 fprintf(stderr, "Extent back ref already exists "
6235 "for %llu parent %llu root %llu \n",
6236 (unsigned long long)bytenr,
6237 (unsigned long long)parent,
6238 (unsigned long long)root);
6240 back->node.found_extent_tree = 1;
6242 check_extent_type(rec);
6243 maybe_free_extent_rec(extent_cache, rec);
6247 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6248 u64 parent, u64 root, u64 owner, u64 offset,
6249 u32 num_refs, int found_ref, u64 max_size)
6251 struct extent_record *rec;
6252 struct data_backref *back;
6253 struct cache_extent *cache;
6256 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6258 struct extent_record tmpl;
6260 memset(&tmpl, 0, sizeof(tmpl));
6261 tmpl.start = bytenr;
6263 tmpl.max_size = max_size;
6265 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6269 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6274 rec = container_of(cache, struct extent_record, cache);
6275 if (rec->max_size < max_size)
6276 rec->max_size = max_size;
6279 * If found_ref is set then max_size is the real size and must match the
6280 * existing refs. So if we have already found a ref then we need to
6281 * make sure that this ref matches the existing one, otherwise we need
6282 * to add a new backref so we can notice that the backrefs don't match
6283 * and we need to figure out who is telling the truth. This is to
6284 * account for that awful fsync bug I introduced where we'd end up with
6285 * a btrfs_file_extent_item that would have its length include multiple
6286 * prealloc extents or point inside of a prealloc extent.
6288 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6291 back = alloc_data_backref(rec, parent, root, owner, offset,
6297 BUG_ON(num_refs != 1);
6298 if (back->node.found_ref)
6299 BUG_ON(back->bytes != max_size);
6300 back->node.found_ref = 1;
6301 back->found_ref += 1;
6302 back->bytes = max_size;
6303 back->disk_bytenr = bytenr;
6305 rec->content_checked = 1;
6306 rec->owner_ref_checked = 1;
6308 if (back->node.found_extent_tree) {
6309 fprintf(stderr, "Extent back ref already exists "
6310 "for %llu parent %llu root %llu "
6311 "owner %llu offset %llu num_refs %lu\n",
6312 (unsigned long long)bytenr,
6313 (unsigned long long)parent,
6314 (unsigned long long)root,
6315 (unsigned long long)owner,
6316 (unsigned long long)offset,
6317 (unsigned long)num_refs);
6319 back->num_refs = num_refs;
6320 back->node.found_extent_tree = 1;
6322 maybe_free_extent_rec(extent_cache, rec);
6326 static int add_pending(struct cache_tree *pending,
6327 struct cache_tree *seen, u64 bytenr, u32 size)
6330 ret = add_cache_extent(seen, bytenr, size);
6333 add_cache_extent(pending, bytenr, size);
6337 static int pick_next_pending(struct cache_tree *pending,
6338 struct cache_tree *reada,
6339 struct cache_tree *nodes,
6340 u64 last, struct block_info *bits, int bits_nr,
6343 unsigned long node_start = last;
6344 struct cache_extent *cache;
6347 cache = search_cache_extent(reada, 0);
6349 bits[0].start = cache->start;
6350 bits[0].size = cache->size;
6355 if (node_start > 32768)
6356 node_start -= 32768;
6358 cache = search_cache_extent(nodes, node_start);
6360 cache = search_cache_extent(nodes, 0);
6363 cache = search_cache_extent(pending, 0);
6368 bits[ret].start = cache->start;
6369 bits[ret].size = cache->size;
6370 cache = next_cache_extent(cache);
6372 } while (cache && ret < bits_nr);
6378 bits[ret].start = cache->start;
6379 bits[ret].size = cache->size;
6380 cache = next_cache_extent(cache);
6382 } while (cache && ret < bits_nr);
6384 if (bits_nr - ret > 8) {
6385 u64 lookup = bits[0].start + bits[0].size;
6386 struct cache_extent *next;
6387 next = search_cache_extent(pending, lookup);
6389 if (next->start - lookup > 32768)
6391 bits[ret].start = next->start;
6392 bits[ret].size = next->size;
6393 lookup = next->start + next->size;
6397 next = next_cache_extent(next);
6405 static void free_chunk_record(struct cache_extent *cache)
6407 struct chunk_record *rec;
6409 rec = container_of(cache, struct chunk_record, cache);
6410 list_del_init(&rec->list);
6411 list_del_init(&rec->dextents);
6415 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6417 cache_tree_free_extents(chunk_cache, free_chunk_record);
6420 static void free_device_record(struct rb_node *node)
6422 struct device_record *rec;
6424 rec = container_of(node, struct device_record, node);
6428 FREE_RB_BASED_TREE(device_cache, free_device_record);
6430 int insert_block_group_record(struct block_group_tree *tree,
6431 struct block_group_record *bg_rec)
6435 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6439 list_add_tail(&bg_rec->list, &tree->block_groups);
6443 static void free_block_group_record(struct cache_extent *cache)
6445 struct block_group_record *rec;
6447 rec = container_of(cache, struct block_group_record, cache);
6448 list_del_init(&rec->list);
6452 void free_block_group_tree(struct block_group_tree *tree)
6454 cache_tree_free_extents(&tree->tree, free_block_group_record);
6457 int insert_device_extent_record(struct device_extent_tree *tree,
6458 struct device_extent_record *de_rec)
6463 * Device extent is a bit different from the other extents, because
6464 * the extents which belong to the different devices may have the
6465 * same start and size, so we need use the special extent cache
6466 * search/insert functions.
6468 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6472 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6473 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6477 static void free_device_extent_record(struct cache_extent *cache)
6479 struct device_extent_record *rec;
6481 rec = container_of(cache, struct device_extent_record, cache);
6482 if (!list_empty(&rec->chunk_list))
6483 list_del_init(&rec->chunk_list);
6484 if (!list_empty(&rec->device_list))
6485 list_del_init(&rec->device_list);
6489 void free_device_extent_tree(struct device_extent_tree *tree)
6491 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6494 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6495 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6496 struct extent_buffer *leaf, int slot)
6498 struct btrfs_extent_ref_v0 *ref0;
6499 struct btrfs_key key;
6502 btrfs_item_key_to_cpu(leaf, &key, slot);
6503 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6504 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6505 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6508 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6509 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6515 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6516 struct btrfs_key *key,
6519 struct btrfs_chunk *ptr;
6520 struct chunk_record *rec;
6523 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6524 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6526 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6528 fprintf(stderr, "memory allocation failed\n");
6532 INIT_LIST_HEAD(&rec->list);
6533 INIT_LIST_HEAD(&rec->dextents);
6536 rec->cache.start = key->offset;
6537 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6539 rec->generation = btrfs_header_generation(leaf);
6541 rec->objectid = key->objectid;
6542 rec->type = key->type;
6543 rec->offset = key->offset;
6545 rec->length = rec->cache.size;
6546 rec->owner = btrfs_chunk_owner(leaf, ptr);
6547 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6548 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6549 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6550 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6551 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6552 rec->num_stripes = num_stripes;
6553 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6555 for (i = 0; i < rec->num_stripes; ++i) {
6556 rec->stripes[i].devid =
6557 btrfs_stripe_devid_nr(leaf, ptr, i);
6558 rec->stripes[i].offset =
6559 btrfs_stripe_offset_nr(leaf, ptr, i);
6560 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6561 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6568 static int process_chunk_item(struct cache_tree *chunk_cache,
6569 struct btrfs_key *key, struct extent_buffer *eb,
6572 struct chunk_record *rec;
6573 struct btrfs_chunk *chunk;
6576 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6578 * Do extra check for this chunk item,
6580 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6581 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6582 * and owner<->key_type check.
6584 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6587 error("chunk(%llu, %llu) is not valid, ignore it",
6588 key->offset, btrfs_chunk_length(eb, chunk));
6591 rec = btrfs_new_chunk_record(eb, key, slot);
6592 ret = insert_cache_extent(chunk_cache, &rec->cache);
6594 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6595 rec->offset, rec->length);
6602 static int process_device_item(struct rb_root *dev_cache,
6603 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6605 struct btrfs_dev_item *ptr;
6606 struct device_record *rec;
6609 ptr = btrfs_item_ptr(eb,
6610 slot, struct btrfs_dev_item);
6612 rec = malloc(sizeof(*rec));
6614 fprintf(stderr, "memory allocation failed\n");
6618 rec->devid = key->offset;
6619 rec->generation = btrfs_header_generation(eb);
6621 rec->objectid = key->objectid;
6622 rec->type = key->type;
6623 rec->offset = key->offset;
6625 rec->devid = btrfs_device_id(eb, ptr);
6626 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6627 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6629 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6631 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6638 struct block_group_record *
6639 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6642 struct btrfs_block_group_item *ptr;
6643 struct block_group_record *rec;
6645 rec = calloc(1, sizeof(*rec));
6647 fprintf(stderr, "memory allocation failed\n");
6651 rec->cache.start = key->objectid;
6652 rec->cache.size = key->offset;
6654 rec->generation = btrfs_header_generation(leaf);
6656 rec->objectid = key->objectid;
6657 rec->type = key->type;
6658 rec->offset = key->offset;
6660 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6661 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6663 INIT_LIST_HEAD(&rec->list);
6668 static int process_block_group_item(struct block_group_tree *block_group_cache,
6669 struct btrfs_key *key,
6670 struct extent_buffer *eb, int slot)
6672 struct block_group_record *rec;
6675 rec = btrfs_new_block_group_record(eb, key, slot);
6676 ret = insert_block_group_record(block_group_cache, rec);
6678 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6679 rec->objectid, rec->offset);
6686 struct device_extent_record *
6687 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6688 struct btrfs_key *key, int slot)
6690 struct device_extent_record *rec;
6691 struct btrfs_dev_extent *ptr;
6693 rec = calloc(1, sizeof(*rec));
6695 fprintf(stderr, "memory allocation failed\n");
6699 rec->cache.objectid = key->objectid;
6700 rec->cache.start = key->offset;
6702 rec->generation = btrfs_header_generation(leaf);
6704 rec->objectid = key->objectid;
6705 rec->type = key->type;
6706 rec->offset = key->offset;
6708 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6709 rec->chunk_objecteid =
6710 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6712 btrfs_dev_extent_chunk_offset(leaf, ptr);
6713 rec->length = btrfs_dev_extent_length(leaf, ptr);
6714 rec->cache.size = rec->length;
6716 INIT_LIST_HEAD(&rec->chunk_list);
6717 INIT_LIST_HEAD(&rec->device_list);
6723 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6724 struct btrfs_key *key, struct extent_buffer *eb,
6727 struct device_extent_record *rec;
6730 rec = btrfs_new_device_extent_record(eb, key, slot);
6731 ret = insert_device_extent_record(dev_extent_cache, rec);
6734 "Device extent[%llu, %llu, %llu] existed.\n",
6735 rec->objectid, rec->offset, rec->length);
6742 static int process_extent_item(struct btrfs_root *root,
6743 struct cache_tree *extent_cache,
6744 struct extent_buffer *eb, int slot)
6746 struct btrfs_extent_item *ei;
6747 struct btrfs_extent_inline_ref *iref;
6748 struct btrfs_extent_data_ref *dref;
6749 struct btrfs_shared_data_ref *sref;
6750 struct btrfs_key key;
6751 struct extent_record tmpl;
6756 u32 item_size = btrfs_item_size_nr(eb, slot);
6762 btrfs_item_key_to_cpu(eb, &key, slot);
6764 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6766 num_bytes = root->nodesize;
6768 num_bytes = key.offset;
6771 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6772 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6773 key.objectid, root->sectorsize);
6776 if (item_size < sizeof(*ei)) {
6777 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6778 struct btrfs_extent_item_v0 *ei0;
6779 BUG_ON(item_size != sizeof(*ei0));
6780 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6781 refs = btrfs_extent_refs_v0(eb, ei0);
6785 memset(&tmpl, 0, sizeof(tmpl));
6786 tmpl.start = key.objectid;
6787 tmpl.nr = num_bytes;
6788 tmpl.extent_item_refs = refs;
6789 tmpl.metadata = metadata;
6791 tmpl.max_size = num_bytes;
6793 return add_extent_rec(extent_cache, &tmpl);
6796 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6797 refs = btrfs_extent_refs(eb, ei);
6798 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6802 if (metadata && num_bytes != root->nodesize) {
6803 error("ignore invalid metadata extent, length %llu does not equal to %u",
6804 num_bytes, root->nodesize);
6807 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6808 error("ignore invalid data extent, length %llu is not aligned to %u",
6809 num_bytes, root->sectorsize);
6813 memset(&tmpl, 0, sizeof(tmpl));
6814 tmpl.start = key.objectid;
6815 tmpl.nr = num_bytes;
6816 tmpl.extent_item_refs = refs;
6817 tmpl.metadata = metadata;
6819 tmpl.max_size = num_bytes;
6820 add_extent_rec(extent_cache, &tmpl);
6822 ptr = (unsigned long)(ei + 1);
6823 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6824 key.type == BTRFS_EXTENT_ITEM_KEY)
6825 ptr += sizeof(struct btrfs_tree_block_info);
6827 end = (unsigned long)ei + item_size;
6829 iref = (struct btrfs_extent_inline_ref *)ptr;
6830 type = btrfs_extent_inline_ref_type(eb, iref);
6831 offset = btrfs_extent_inline_ref_offset(eb, iref);
6833 case BTRFS_TREE_BLOCK_REF_KEY:
6834 ret = add_tree_backref(extent_cache, key.objectid,
6838 "add_tree_backref failed (extent items tree block): %s",
6841 case BTRFS_SHARED_BLOCK_REF_KEY:
6842 ret = add_tree_backref(extent_cache, key.objectid,
6846 "add_tree_backref failed (extent items shared block): %s",
6849 case BTRFS_EXTENT_DATA_REF_KEY:
6850 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6851 add_data_backref(extent_cache, key.objectid, 0,
6852 btrfs_extent_data_ref_root(eb, dref),
6853 btrfs_extent_data_ref_objectid(eb,
6855 btrfs_extent_data_ref_offset(eb, dref),
6856 btrfs_extent_data_ref_count(eb, dref),
6859 case BTRFS_SHARED_DATA_REF_KEY:
6860 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6861 add_data_backref(extent_cache, key.objectid, offset,
6863 btrfs_shared_data_ref_count(eb, sref),
6867 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6868 key.objectid, key.type, num_bytes);
6871 ptr += btrfs_extent_inline_ref_size(type);
6878 static int check_cache_range(struct btrfs_root *root,
6879 struct btrfs_block_group_cache *cache,
6880 u64 offset, u64 bytes)
6882 struct btrfs_free_space *entry;
6888 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6889 bytenr = btrfs_sb_offset(i);
6890 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6891 cache->key.objectid, bytenr, 0,
6892 &logical, &nr, &stripe_len);
6897 if (logical[nr] + stripe_len <= offset)
6899 if (offset + bytes <= logical[nr])
6901 if (logical[nr] == offset) {
6902 if (stripe_len >= bytes) {
6906 bytes -= stripe_len;
6907 offset += stripe_len;
6908 } else if (logical[nr] < offset) {
6909 if (logical[nr] + stripe_len >=
6914 bytes = (offset + bytes) -
6915 (logical[nr] + stripe_len);
6916 offset = logical[nr] + stripe_len;
6919 * Could be tricky, the super may land in the
6920 * middle of the area we're checking. First
6921 * check the easiest case, it's at the end.
6923 if (logical[nr] + stripe_len >=
6925 bytes = logical[nr] - offset;
6929 /* Check the left side */
6930 ret = check_cache_range(root, cache,
6932 logical[nr] - offset);
6938 /* Now we continue with the right side */
6939 bytes = (offset + bytes) -
6940 (logical[nr] + stripe_len);
6941 offset = logical[nr] + stripe_len;
6948 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6950 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6951 offset, offset+bytes);
6955 if (entry->offset != offset) {
6956 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6961 if (entry->bytes != bytes) {
6962 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6963 bytes, entry->bytes, offset);
6967 unlink_free_space(cache->free_space_ctl, entry);
6972 static int verify_space_cache(struct btrfs_root *root,
6973 struct btrfs_block_group_cache *cache)
6975 struct btrfs_path path;
6976 struct extent_buffer *leaf;
6977 struct btrfs_key key;
6981 root = root->fs_info->extent_root;
6983 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6985 btrfs_init_path(&path);
6986 key.objectid = last;
6988 key.type = BTRFS_EXTENT_ITEM_KEY;
6989 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6994 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6995 ret = btrfs_next_leaf(root, &path);
7003 leaf = path.nodes[0];
7004 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7005 if (key.objectid >= cache->key.offset + cache->key.objectid)
7007 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7008 key.type != BTRFS_METADATA_ITEM_KEY) {
7013 if (last == key.objectid) {
7014 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7015 last = key.objectid + key.offset;
7017 last = key.objectid + root->nodesize;
7022 ret = check_cache_range(root, cache, last,
7023 key.objectid - last);
7026 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7027 last = key.objectid + key.offset;
7029 last = key.objectid + root->nodesize;
7033 if (last < cache->key.objectid + cache->key.offset)
7034 ret = check_cache_range(root, cache, last,
7035 cache->key.objectid +
7036 cache->key.offset - last);
7039 btrfs_release_path(&path);
7042 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7043 fprintf(stderr, "There are still entries left in the space "
7051 static int check_space_cache(struct btrfs_root *root)
7053 struct btrfs_block_group_cache *cache;
7054 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7058 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7059 btrfs_super_generation(root->fs_info->super_copy) !=
7060 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7061 printf("cache and super generation don't match, space cache "
7062 "will be invalidated\n");
7066 if (ctx.progress_enabled) {
7067 ctx.tp = TASK_FREE_SPACE;
7068 task_start(ctx.info);
7072 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7076 start = cache->key.objectid + cache->key.offset;
7077 if (!cache->free_space_ctl) {
7078 if (btrfs_init_free_space_ctl(cache,
7079 root->sectorsize)) {
7084 btrfs_remove_free_space_cache(cache);
7087 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7088 ret = exclude_super_stripes(root, cache);
7090 fprintf(stderr, "could not exclude super stripes: %s\n",
7095 ret = load_free_space_tree(root->fs_info, cache);
7096 free_excluded_extents(root, cache);
7098 fprintf(stderr, "could not load free space tree: %s\n",
7105 ret = load_free_space_cache(root->fs_info, cache);
7110 ret = verify_space_cache(root, cache);
7112 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7113 cache->key.objectid);
7118 task_stop(ctx.info);
7120 return error ? -EINVAL : 0;
7123 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7124 u64 num_bytes, unsigned long leaf_offset,
7125 struct extent_buffer *eb) {
7128 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7130 unsigned long csum_offset;
7134 u64 data_checked = 0;
7140 if (num_bytes % root->sectorsize)
7143 data = malloc(num_bytes);
7147 while (offset < num_bytes) {
7150 read_len = num_bytes - offset;
7151 /* read as much space once a time */
7152 ret = read_extent_data(root, data + offset,
7153 bytenr + offset, &read_len, mirror);
7157 /* verify every 4k data's checksum */
7158 while (data_checked < read_len) {
7160 tmp = offset + data_checked;
7162 csum = btrfs_csum_data((char *)data + tmp,
7163 csum, root->sectorsize);
7164 btrfs_csum_final(csum, (u8 *)&csum);
7166 csum_offset = leaf_offset +
7167 tmp / root->sectorsize * csum_size;
7168 read_extent_buffer(eb, (char *)&csum_expected,
7169 csum_offset, csum_size);
7170 /* try another mirror */
7171 if (csum != csum_expected) {
7172 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7173 mirror, bytenr + tmp,
7174 csum, csum_expected);
7175 num_copies = btrfs_num_copies(
7176 &root->fs_info->mapping_tree,
7178 if (mirror < num_copies - 1) {
7183 data_checked += root->sectorsize;
7192 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7195 struct btrfs_path path;
7196 struct extent_buffer *leaf;
7197 struct btrfs_key key;
7200 btrfs_init_path(&path);
7201 key.objectid = bytenr;
7202 key.type = BTRFS_EXTENT_ITEM_KEY;
7203 key.offset = (u64)-1;
7206 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7209 fprintf(stderr, "Error looking up extent record %d\n", ret);
7210 btrfs_release_path(&path);
7213 if (path.slots[0] > 0) {
7216 ret = btrfs_prev_leaf(root, &path);
7219 } else if (ret > 0) {
7226 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7229 * Block group items come before extent items if they have the same
7230 * bytenr, so walk back one more just in case. Dear future traveller,
7231 * first congrats on mastering time travel. Now if it's not too much
7232 * trouble could you go back to 2006 and tell Chris to make the
7233 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7234 * EXTENT_ITEM_KEY please?
7236 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7237 if (path.slots[0] > 0) {
7240 ret = btrfs_prev_leaf(root, &path);
7243 } else if (ret > 0) {
7248 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7252 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7253 ret = btrfs_next_leaf(root, &path);
7255 fprintf(stderr, "Error going to next leaf "
7257 btrfs_release_path(&path);
7263 leaf = path.nodes[0];
7264 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7265 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7269 if (key.objectid + key.offset < bytenr) {
7273 if (key.objectid > bytenr + num_bytes)
7276 if (key.objectid == bytenr) {
7277 if (key.offset >= num_bytes) {
7281 num_bytes -= key.offset;
7282 bytenr += key.offset;
7283 } else if (key.objectid < bytenr) {
7284 if (key.objectid + key.offset >= bytenr + num_bytes) {
7288 num_bytes = (bytenr + num_bytes) -
7289 (key.objectid + key.offset);
7290 bytenr = key.objectid + key.offset;
7292 if (key.objectid + key.offset < bytenr + num_bytes) {
7293 u64 new_start = key.objectid + key.offset;
7294 u64 new_bytes = bytenr + num_bytes - new_start;
7297 * Weird case, the extent is in the middle of
7298 * our range, we'll have to search one side
7299 * and then the other. Not sure if this happens
7300 * in real life, but no harm in coding it up
7301 * anyway just in case.
7303 btrfs_release_path(&path);
7304 ret = check_extent_exists(root, new_start,
7307 fprintf(stderr, "Right section didn't "
7311 num_bytes = key.objectid - bytenr;
7314 num_bytes = key.objectid - bytenr;
7321 if (num_bytes && !ret) {
7322 fprintf(stderr, "There are no extents for csum range "
7323 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7327 btrfs_release_path(&path);
7331 static int check_csums(struct btrfs_root *root)
7333 struct btrfs_path path;
7334 struct extent_buffer *leaf;
7335 struct btrfs_key key;
7336 u64 offset = 0, num_bytes = 0;
7337 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7341 unsigned long leaf_offset;
7343 root = root->fs_info->csum_root;
7344 if (!extent_buffer_uptodate(root->node)) {
7345 fprintf(stderr, "No valid csum tree found\n");
7349 btrfs_init_path(&path);
7350 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7351 key.type = BTRFS_EXTENT_CSUM_KEY;
7353 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7355 fprintf(stderr, "Error searching csum tree %d\n", ret);
7356 btrfs_release_path(&path);
7360 if (ret > 0 && path.slots[0])
7365 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7366 ret = btrfs_next_leaf(root, &path);
7368 fprintf(stderr, "Error going to next leaf "
7375 leaf = path.nodes[0];
7377 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7378 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7383 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7384 csum_size) * root->sectorsize;
7385 if (!check_data_csum)
7386 goto skip_csum_check;
7387 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7388 ret = check_extent_csums(root, key.offset, data_len,
7394 offset = key.offset;
7395 } else if (key.offset != offset + num_bytes) {
7396 ret = check_extent_exists(root, offset, num_bytes);
7398 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7399 "there is no extent record\n",
7400 offset, offset+num_bytes);
7403 offset = key.offset;
7406 num_bytes += data_len;
7410 btrfs_release_path(&path);
7414 static int is_dropped_key(struct btrfs_key *key,
7415 struct btrfs_key *drop_key) {
7416 if (key->objectid < drop_key->objectid)
7418 else if (key->objectid == drop_key->objectid) {
7419 if (key->type < drop_key->type)
7421 else if (key->type == drop_key->type) {
7422 if (key->offset < drop_key->offset)
7430 * Here are the rules for FULL_BACKREF.
7432 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7433 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7435 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7436 * if it happened after the relocation occurred since we'll have dropped the
7437 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7438 * have no real way to know for sure.
7440 * We process the blocks one root at a time, and we start from the lowest root
7441 * objectid and go to the highest. So we can just lookup the owner backref for
7442 * the record and if we don't find it then we know it doesn't exist and we have
7445 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7446 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7447 * be set or not and then we can check later once we've gathered all the refs.
7449 static int calc_extent_flag(struct cache_tree *extent_cache,
7450 struct extent_buffer *buf,
7451 struct root_item_record *ri,
7454 struct extent_record *rec;
7455 struct cache_extent *cache;
7456 struct tree_backref *tback;
7459 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7460 /* we have added this extent before */
7464 rec = container_of(cache, struct extent_record, cache);
7467 * Except file/reloc tree, we can not have
7470 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7475 if (buf->start == ri->bytenr)
7478 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7481 owner = btrfs_header_owner(buf);
7482 if (owner == ri->objectid)
7485 tback = find_tree_backref(rec, 0, owner);
7490 if (rec->flag_block_full_backref != FLAG_UNSET &&
7491 rec->flag_block_full_backref != 0)
7492 rec->bad_full_backref = 1;
7495 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7496 if (rec->flag_block_full_backref != FLAG_UNSET &&
7497 rec->flag_block_full_backref != 1)
7498 rec->bad_full_backref = 1;
7502 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7504 fprintf(stderr, "Invalid key type(");
7505 print_key_type(stderr, 0, key_type);
7506 fprintf(stderr, ") found in root(");
7507 print_objectid(stderr, rootid, 0);
7508 fprintf(stderr, ")\n");
7512 * Check if the key is valid with its extent buffer.
7514 * This is a early check in case invalid key exists in a extent buffer
7515 * This is not comprehensive yet, but should prevent wrong key/item passed
7518 static int check_type_with_root(u64 rootid, u8 key_type)
7521 /* Only valid in chunk tree */
7522 case BTRFS_DEV_ITEM_KEY:
7523 case BTRFS_CHUNK_ITEM_KEY:
7524 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7527 /* valid in csum and log tree */
7528 case BTRFS_CSUM_TREE_OBJECTID:
7529 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7533 case BTRFS_EXTENT_ITEM_KEY:
7534 case BTRFS_METADATA_ITEM_KEY:
7535 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7536 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7539 case BTRFS_ROOT_ITEM_KEY:
7540 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7543 case BTRFS_DEV_EXTENT_KEY:
7544 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7550 report_mismatch_key_root(key_type, rootid);
7554 static int run_next_block(struct btrfs_root *root,
7555 struct block_info *bits,
7558 struct cache_tree *pending,
7559 struct cache_tree *seen,
7560 struct cache_tree *reada,
7561 struct cache_tree *nodes,
7562 struct cache_tree *extent_cache,
7563 struct cache_tree *chunk_cache,
7564 struct rb_root *dev_cache,
7565 struct block_group_tree *block_group_cache,
7566 struct device_extent_tree *dev_extent_cache,
7567 struct root_item_record *ri)
7569 struct extent_buffer *buf;
7570 struct extent_record *rec = NULL;
7581 struct btrfs_key key;
7582 struct cache_extent *cache;
7585 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7586 bits_nr, &reada_bits);
7591 for(i = 0; i < nritems; i++) {
7592 ret = add_cache_extent(reada, bits[i].start,
7597 /* fixme, get the parent transid */
7598 readahead_tree_block(root, bits[i].start,
7602 *last = bits[0].start;
7603 bytenr = bits[0].start;
7604 size = bits[0].size;
7606 cache = lookup_cache_extent(pending, bytenr, size);
7608 remove_cache_extent(pending, cache);
7611 cache = lookup_cache_extent(reada, bytenr, size);
7613 remove_cache_extent(reada, cache);
7616 cache = lookup_cache_extent(nodes, bytenr, size);
7618 remove_cache_extent(nodes, cache);
7621 cache = lookup_cache_extent(extent_cache, bytenr, size);
7623 rec = container_of(cache, struct extent_record, cache);
7624 gen = rec->parent_generation;
7627 /* fixme, get the real parent transid */
7628 buf = read_tree_block(root, bytenr, size, gen);
7629 if (!extent_buffer_uptodate(buf)) {
7630 record_bad_block_io(root->fs_info,
7631 extent_cache, bytenr, size);
7635 nritems = btrfs_header_nritems(buf);
7638 if (!init_extent_tree) {
7639 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7640 btrfs_header_level(buf), 1, NULL,
7643 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7645 fprintf(stderr, "Couldn't calc extent flags\n");
7646 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7651 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7653 fprintf(stderr, "Couldn't calc extent flags\n");
7654 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7658 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7660 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7661 ri->objectid == btrfs_header_owner(buf)) {
7663 * Ok we got to this block from it's original owner and
7664 * we have FULL_BACKREF set. Relocation can leave
7665 * converted blocks over so this is altogether possible,
7666 * however it's not possible if the generation > the
7667 * last snapshot, so check for this case.
7669 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7670 btrfs_header_generation(buf) > ri->last_snapshot) {
7671 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7672 rec->bad_full_backref = 1;
7677 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7678 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7679 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7680 rec->bad_full_backref = 1;
7684 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7685 rec->flag_block_full_backref = 1;
7689 rec->flag_block_full_backref = 0;
7691 owner = btrfs_header_owner(buf);
7694 ret = check_block(root, extent_cache, buf, flags);
7698 if (btrfs_is_leaf(buf)) {
7699 btree_space_waste += btrfs_leaf_free_space(root, buf);
7700 for (i = 0; i < nritems; i++) {
7701 struct btrfs_file_extent_item *fi;
7702 btrfs_item_key_to_cpu(buf, &key, i);
7704 * Check key type against the leaf owner.
7705 * Could filter quite a lot of early error if
7708 if (check_type_with_root(btrfs_header_owner(buf),
7710 fprintf(stderr, "ignoring invalid key\n");
7713 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7714 process_extent_item(root, extent_cache, buf,
7718 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7719 process_extent_item(root, extent_cache, buf,
7723 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7725 btrfs_item_size_nr(buf, i);
7728 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7729 process_chunk_item(chunk_cache, &key, buf, i);
7732 if (key.type == BTRFS_DEV_ITEM_KEY) {
7733 process_device_item(dev_cache, &key, buf, i);
7736 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7737 process_block_group_item(block_group_cache,
7741 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7742 process_device_extent_item(dev_extent_cache,
7747 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7748 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7749 process_extent_ref_v0(extent_cache, buf, i);
7756 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7757 ret = add_tree_backref(extent_cache,
7758 key.objectid, 0, key.offset, 0);
7761 "add_tree_backref failed (leaf tree block): %s",
7765 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7766 ret = add_tree_backref(extent_cache,
7767 key.objectid, key.offset, 0, 0);
7770 "add_tree_backref failed (leaf shared block): %s",
7774 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7775 struct btrfs_extent_data_ref *ref;
7776 ref = btrfs_item_ptr(buf, i,
7777 struct btrfs_extent_data_ref);
7778 add_data_backref(extent_cache,
7780 btrfs_extent_data_ref_root(buf, ref),
7781 btrfs_extent_data_ref_objectid(buf,
7783 btrfs_extent_data_ref_offset(buf, ref),
7784 btrfs_extent_data_ref_count(buf, ref),
7785 0, root->sectorsize);
7788 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7789 struct btrfs_shared_data_ref *ref;
7790 ref = btrfs_item_ptr(buf, i,
7791 struct btrfs_shared_data_ref);
7792 add_data_backref(extent_cache,
7793 key.objectid, key.offset, 0, 0, 0,
7794 btrfs_shared_data_ref_count(buf, ref),
7795 0, root->sectorsize);
7798 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7799 struct bad_item *bad;
7801 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7805 bad = malloc(sizeof(struct bad_item));
7808 INIT_LIST_HEAD(&bad->list);
7809 memcpy(&bad->key, &key,
7810 sizeof(struct btrfs_key));
7811 bad->root_id = owner;
7812 list_add_tail(&bad->list, &delete_items);
7815 if (key.type != BTRFS_EXTENT_DATA_KEY)
7817 fi = btrfs_item_ptr(buf, i,
7818 struct btrfs_file_extent_item);
7819 if (btrfs_file_extent_type(buf, fi) ==
7820 BTRFS_FILE_EXTENT_INLINE)
7822 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7825 data_bytes_allocated +=
7826 btrfs_file_extent_disk_num_bytes(buf, fi);
7827 if (data_bytes_allocated < root->sectorsize) {
7830 data_bytes_referenced +=
7831 btrfs_file_extent_num_bytes(buf, fi);
7832 add_data_backref(extent_cache,
7833 btrfs_file_extent_disk_bytenr(buf, fi),
7834 parent, owner, key.objectid, key.offset -
7835 btrfs_file_extent_offset(buf, fi), 1, 1,
7836 btrfs_file_extent_disk_num_bytes(buf, fi));
7840 struct btrfs_key first_key;
7842 first_key.objectid = 0;
7845 btrfs_item_key_to_cpu(buf, &first_key, 0);
7846 level = btrfs_header_level(buf);
7847 for (i = 0; i < nritems; i++) {
7848 struct extent_record tmpl;
7850 ptr = btrfs_node_blockptr(buf, i);
7851 size = root->nodesize;
7852 btrfs_node_key_to_cpu(buf, &key, i);
7854 if ((level == ri->drop_level)
7855 && is_dropped_key(&key, &ri->drop_key)) {
7860 memset(&tmpl, 0, sizeof(tmpl));
7861 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7862 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7867 tmpl.max_size = size;
7868 ret = add_extent_rec(extent_cache, &tmpl);
7872 ret = add_tree_backref(extent_cache, ptr, parent,
7876 "add_tree_backref failed (non-leaf block): %s",
7882 add_pending(nodes, seen, ptr, size);
7884 add_pending(pending, seen, ptr, size);
7887 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7888 nritems) * sizeof(struct btrfs_key_ptr);
7890 total_btree_bytes += buf->len;
7891 if (fs_root_objectid(btrfs_header_owner(buf)))
7892 total_fs_tree_bytes += buf->len;
7893 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7894 total_extent_tree_bytes += buf->len;
7895 if (!found_old_backref &&
7896 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7897 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7898 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7899 found_old_backref = 1;
7901 free_extent_buffer(buf);
7905 static int add_root_to_pending(struct extent_buffer *buf,
7906 struct cache_tree *extent_cache,
7907 struct cache_tree *pending,
7908 struct cache_tree *seen,
7909 struct cache_tree *nodes,
7912 struct extent_record tmpl;
7915 if (btrfs_header_level(buf) > 0)
7916 add_pending(nodes, seen, buf->start, buf->len);
7918 add_pending(pending, seen, buf->start, buf->len);
7920 memset(&tmpl, 0, sizeof(tmpl));
7921 tmpl.start = buf->start;
7926 tmpl.max_size = buf->len;
7927 add_extent_rec(extent_cache, &tmpl);
7929 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7930 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7931 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7934 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7939 /* as we fix the tree, we might be deleting blocks that
7940 * we're tracking for repair. This hook makes sure we
7941 * remove any backrefs for blocks as we are fixing them.
7943 static int free_extent_hook(struct btrfs_trans_handle *trans,
7944 struct btrfs_root *root,
7945 u64 bytenr, u64 num_bytes, u64 parent,
7946 u64 root_objectid, u64 owner, u64 offset,
7949 struct extent_record *rec;
7950 struct cache_extent *cache;
7952 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7954 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7955 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7959 rec = container_of(cache, struct extent_record, cache);
7961 struct data_backref *back;
7962 back = find_data_backref(rec, parent, root_objectid, owner,
7963 offset, 1, bytenr, num_bytes);
7966 if (back->node.found_ref) {
7967 back->found_ref -= refs_to_drop;
7969 rec->refs -= refs_to_drop;
7971 if (back->node.found_extent_tree) {
7972 back->num_refs -= refs_to_drop;
7973 if (rec->extent_item_refs)
7974 rec->extent_item_refs -= refs_to_drop;
7976 if (back->found_ref == 0)
7977 back->node.found_ref = 0;
7978 if (back->num_refs == 0)
7979 back->node.found_extent_tree = 0;
7981 if (!back->node.found_extent_tree && back->node.found_ref) {
7982 list_del(&back->node.list);
7986 struct tree_backref *back;
7987 back = find_tree_backref(rec, parent, root_objectid);
7990 if (back->node.found_ref) {
7993 back->node.found_ref = 0;
7995 if (back->node.found_extent_tree) {
7996 if (rec->extent_item_refs)
7997 rec->extent_item_refs--;
7998 back->node.found_extent_tree = 0;
8000 if (!back->node.found_extent_tree && back->node.found_ref) {
8001 list_del(&back->node.list);
8005 maybe_free_extent_rec(extent_cache, rec);
8010 static int delete_extent_records(struct btrfs_trans_handle *trans,
8011 struct btrfs_root *root,
8012 struct btrfs_path *path,
8015 struct btrfs_key key;
8016 struct btrfs_key found_key;
8017 struct extent_buffer *leaf;
8022 key.objectid = bytenr;
8024 key.offset = (u64)-1;
8027 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8034 if (path->slots[0] == 0)
8040 leaf = path->nodes[0];
8041 slot = path->slots[0];
8043 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8044 if (found_key.objectid != bytenr)
8047 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8048 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8049 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8050 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8051 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8052 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8053 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8054 btrfs_release_path(path);
8055 if (found_key.type == 0) {
8056 if (found_key.offset == 0)
8058 key.offset = found_key.offset - 1;
8059 key.type = found_key.type;
8061 key.type = found_key.type - 1;
8062 key.offset = (u64)-1;
8066 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8067 found_key.objectid, found_key.type, found_key.offset);
8069 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8072 btrfs_release_path(path);
8074 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8075 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8076 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8077 found_key.offset : root->nodesize;
8079 ret = btrfs_update_block_group(trans, root, bytenr,
8086 btrfs_release_path(path);
8091 * for a single backref, this will allocate a new extent
8092 * and add the backref to it.
8094 static int record_extent(struct btrfs_trans_handle *trans,
8095 struct btrfs_fs_info *info,
8096 struct btrfs_path *path,
8097 struct extent_record *rec,
8098 struct extent_backref *back,
8099 int allocated, u64 flags)
8102 struct btrfs_root *extent_root = info->extent_root;
8103 struct extent_buffer *leaf;
8104 struct btrfs_key ins_key;
8105 struct btrfs_extent_item *ei;
8106 struct data_backref *dback;
8107 struct btrfs_tree_block_info *bi;
8110 rec->max_size = max_t(u64, rec->max_size,
8111 info->extent_root->nodesize);
8114 u32 item_size = sizeof(*ei);
8117 item_size += sizeof(*bi);
8119 ins_key.objectid = rec->start;
8120 ins_key.offset = rec->max_size;
8121 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8123 ret = btrfs_insert_empty_item(trans, extent_root, path,
8124 &ins_key, item_size);
8128 leaf = path->nodes[0];
8129 ei = btrfs_item_ptr(leaf, path->slots[0],
8130 struct btrfs_extent_item);
8132 btrfs_set_extent_refs(leaf, ei, 0);
8133 btrfs_set_extent_generation(leaf, ei, rec->generation);
8135 if (back->is_data) {
8136 btrfs_set_extent_flags(leaf, ei,
8137 BTRFS_EXTENT_FLAG_DATA);
8139 struct btrfs_disk_key copy_key;;
8141 bi = (struct btrfs_tree_block_info *)(ei + 1);
8142 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8145 btrfs_set_disk_key_objectid(©_key,
8146 rec->info_objectid);
8147 btrfs_set_disk_key_type(©_key, 0);
8148 btrfs_set_disk_key_offset(©_key, 0);
8150 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8151 btrfs_set_tree_block_key(leaf, bi, ©_key);
8153 btrfs_set_extent_flags(leaf, ei,
8154 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8157 btrfs_mark_buffer_dirty(leaf);
8158 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8159 rec->max_size, 1, 0);
8162 btrfs_release_path(path);
8165 if (back->is_data) {
8169 dback = to_data_backref(back);
8170 if (back->full_backref)
8171 parent = dback->parent;
8175 for (i = 0; i < dback->found_ref; i++) {
8176 /* if parent != 0, we're doing a full backref
8177 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8178 * just makes the backref allocator create a data
8181 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8182 rec->start, rec->max_size,
8186 BTRFS_FIRST_FREE_OBJECTID :
8192 fprintf(stderr, "adding new data backref"
8193 " on %llu %s %llu owner %llu"
8194 " offset %llu found %d\n",
8195 (unsigned long long)rec->start,
8196 back->full_backref ?
8198 back->full_backref ?
8199 (unsigned long long)parent :
8200 (unsigned long long)dback->root,
8201 (unsigned long long)dback->owner,
8202 (unsigned long long)dback->offset,
8206 struct tree_backref *tback;
8208 tback = to_tree_backref(back);
8209 if (back->full_backref)
8210 parent = tback->parent;
8214 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8215 rec->start, rec->max_size,
8216 parent, tback->root, 0, 0);
8217 fprintf(stderr, "adding new tree backref on "
8218 "start %llu len %llu parent %llu root %llu\n",
8219 rec->start, rec->max_size, parent, tback->root);
8222 btrfs_release_path(path);
8226 static struct extent_entry *find_entry(struct list_head *entries,
8227 u64 bytenr, u64 bytes)
8229 struct extent_entry *entry = NULL;
8231 list_for_each_entry(entry, entries, list) {
8232 if (entry->bytenr == bytenr && entry->bytes == bytes)
8239 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8241 struct extent_entry *entry, *best = NULL, *prev = NULL;
8243 list_for_each_entry(entry, entries, list) {
8245 * If there are as many broken entries as entries then we know
8246 * not to trust this particular entry.
8248 if (entry->broken == entry->count)
8252 * Special case, when there are only two entries and 'best' is
8262 * If our current entry == best then we can't be sure our best
8263 * is really the best, so we need to keep searching.
8265 if (best && best->count == entry->count) {
8271 /* Prev == entry, not good enough, have to keep searching */
8272 if (!prev->broken && prev->count == entry->count)
8276 best = (prev->count > entry->count) ? prev : entry;
8277 else if (best->count < entry->count)
8285 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8286 struct data_backref *dback, struct extent_entry *entry)
8288 struct btrfs_trans_handle *trans;
8289 struct btrfs_root *root;
8290 struct btrfs_file_extent_item *fi;
8291 struct extent_buffer *leaf;
8292 struct btrfs_key key;
8296 key.objectid = dback->root;
8297 key.type = BTRFS_ROOT_ITEM_KEY;
8298 key.offset = (u64)-1;
8299 root = btrfs_read_fs_root(info, &key);
8301 fprintf(stderr, "Couldn't find root for our ref\n");
8306 * The backref points to the original offset of the extent if it was
8307 * split, so we need to search down to the offset we have and then walk
8308 * forward until we find the backref we're looking for.
8310 key.objectid = dback->owner;
8311 key.type = BTRFS_EXTENT_DATA_KEY;
8312 key.offset = dback->offset;
8313 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8315 fprintf(stderr, "Error looking up ref %d\n", ret);
8320 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8321 ret = btrfs_next_leaf(root, path);
8323 fprintf(stderr, "Couldn't find our ref, next\n");
8327 leaf = path->nodes[0];
8328 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8329 if (key.objectid != dback->owner ||
8330 key.type != BTRFS_EXTENT_DATA_KEY) {
8331 fprintf(stderr, "Couldn't find our ref, search\n");
8334 fi = btrfs_item_ptr(leaf, path->slots[0],
8335 struct btrfs_file_extent_item);
8336 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8337 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8339 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8344 btrfs_release_path(path);
8346 trans = btrfs_start_transaction(root, 1);
8348 return PTR_ERR(trans);
8351 * Ok we have the key of the file extent we want to fix, now we can cow
8352 * down to the thing and fix it.
8354 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8356 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8357 key.objectid, key.type, key.offset, ret);
8361 fprintf(stderr, "Well that's odd, we just found this key "
8362 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8367 leaf = path->nodes[0];
8368 fi = btrfs_item_ptr(leaf, path->slots[0],
8369 struct btrfs_file_extent_item);
8371 if (btrfs_file_extent_compression(leaf, fi) &&
8372 dback->disk_bytenr != entry->bytenr) {
8373 fprintf(stderr, "Ref doesn't match the record start and is "
8374 "compressed, please take a btrfs-image of this file "
8375 "system and send it to a btrfs developer so they can "
8376 "complete this functionality for bytenr %Lu\n",
8377 dback->disk_bytenr);
8382 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8383 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8384 } else if (dback->disk_bytenr > entry->bytenr) {
8385 u64 off_diff, offset;
8387 off_diff = dback->disk_bytenr - entry->bytenr;
8388 offset = btrfs_file_extent_offset(leaf, fi);
8389 if (dback->disk_bytenr + offset +
8390 btrfs_file_extent_num_bytes(leaf, fi) >
8391 entry->bytenr + entry->bytes) {
8392 fprintf(stderr, "Ref is past the entry end, please "
8393 "take a btrfs-image of this file system and "
8394 "send it to a btrfs developer, ref %Lu\n",
8395 dback->disk_bytenr);
8400 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8401 btrfs_set_file_extent_offset(leaf, fi, offset);
8402 } else if (dback->disk_bytenr < entry->bytenr) {
8405 offset = btrfs_file_extent_offset(leaf, fi);
8406 if (dback->disk_bytenr + offset < entry->bytenr) {
8407 fprintf(stderr, "Ref is before the entry start, please"
8408 " take a btrfs-image of this file system and "
8409 "send it to a btrfs developer, ref %Lu\n",
8410 dback->disk_bytenr);
8415 offset += dback->disk_bytenr;
8416 offset -= entry->bytenr;
8417 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8418 btrfs_set_file_extent_offset(leaf, fi, offset);
8421 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8424 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8425 * only do this if we aren't using compression, otherwise it's a
8428 if (!btrfs_file_extent_compression(leaf, fi))
8429 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8431 printf("ram bytes may be wrong?\n");
8432 btrfs_mark_buffer_dirty(leaf);
8434 err = btrfs_commit_transaction(trans, root);
8435 btrfs_release_path(path);
8436 return ret ? ret : err;
8439 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8440 struct extent_record *rec)
8442 struct extent_backref *back;
8443 struct data_backref *dback;
8444 struct extent_entry *entry, *best = NULL;
8447 int broken_entries = 0;
8452 * Metadata is easy and the backrefs should always agree on bytenr and
8453 * size, if not we've got bigger issues.
8458 list_for_each_entry(back, &rec->backrefs, list) {
8459 if (back->full_backref || !back->is_data)
8462 dback = to_data_backref(back);
8465 * We only pay attention to backrefs that we found a real
8468 if (dback->found_ref == 0)
8472 * For now we only catch when the bytes don't match, not the
8473 * bytenr. We can easily do this at the same time, but I want
8474 * to have a fs image to test on before we just add repair
8475 * functionality willy-nilly so we know we won't screw up the
8479 entry = find_entry(&entries, dback->disk_bytenr,
8482 entry = malloc(sizeof(struct extent_entry));
8487 memset(entry, 0, sizeof(*entry));
8488 entry->bytenr = dback->disk_bytenr;
8489 entry->bytes = dback->bytes;
8490 list_add_tail(&entry->list, &entries);
8495 * If we only have on entry we may think the entries agree when
8496 * in reality they don't so we have to do some extra checking.
8498 if (dback->disk_bytenr != rec->start ||
8499 dback->bytes != rec->nr || back->broken)
8510 /* Yay all the backrefs agree, carry on good sir */
8511 if (nr_entries <= 1 && !mismatch)
8514 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8515 "%Lu\n", rec->start);
8518 * First we want to see if the backrefs can agree amongst themselves who
8519 * is right, so figure out which one of the entries has the highest
8522 best = find_most_right_entry(&entries);
8525 * Ok so we may have an even split between what the backrefs think, so
8526 * this is where we use the extent ref to see what it thinks.
8529 entry = find_entry(&entries, rec->start, rec->nr);
8530 if (!entry && (!broken_entries || !rec->found_rec)) {
8531 fprintf(stderr, "Backrefs don't agree with each other "
8532 "and extent record doesn't agree with anybody,"
8533 " so we can't fix bytenr %Lu bytes %Lu\n",
8534 rec->start, rec->nr);
8537 } else if (!entry) {
8539 * Ok our backrefs were broken, we'll assume this is the
8540 * correct value and add an entry for this range.
8542 entry = malloc(sizeof(struct extent_entry));
8547 memset(entry, 0, sizeof(*entry));
8548 entry->bytenr = rec->start;
8549 entry->bytes = rec->nr;
8550 list_add_tail(&entry->list, &entries);
8554 best = find_most_right_entry(&entries);
8556 fprintf(stderr, "Backrefs and extent record evenly "
8557 "split on who is right, this is going to "
8558 "require user input to fix bytenr %Lu bytes "
8559 "%Lu\n", rec->start, rec->nr);
8566 * I don't think this can happen currently as we'll abort() if we catch
8567 * this case higher up, but in case somebody removes that we still can't
8568 * deal with it properly here yet, so just bail out of that's the case.
8570 if (best->bytenr != rec->start) {
8571 fprintf(stderr, "Extent start and backref starts don't match, "
8572 "please use btrfs-image on this file system and send "
8573 "it to a btrfs developer so they can make fsck fix "
8574 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8575 rec->start, rec->nr);
8581 * Ok great we all agreed on an extent record, let's go find the real
8582 * references and fix up the ones that don't match.
8584 list_for_each_entry(back, &rec->backrefs, list) {
8585 if (back->full_backref || !back->is_data)
8588 dback = to_data_backref(back);
8591 * Still ignoring backrefs that don't have a real ref attached
8594 if (dback->found_ref == 0)
8597 if (dback->bytes == best->bytes &&
8598 dback->disk_bytenr == best->bytenr)
8601 ret = repair_ref(info, path, dback, best);
8607 * Ok we messed with the actual refs, which means we need to drop our
8608 * entire cache and go back and rescan. I know this is a huge pain and
8609 * adds a lot of extra work, but it's the only way to be safe. Once all
8610 * the backrefs agree we may not need to do anything to the extent
8615 while (!list_empty(&entries)) {
8616 entry = list_entry(entries.next, struct extent_entry, list);
8617 list_del_init(&entry->list);
8623 static int process_duplicates(struct cache_tree *extent_cache,
8624 struct extent_record *rec)
8626 struct extent_record *good, *tmp;
8627 struct cache_extent *cache;
8631 * If we found a extent record for this extent then return, or if we
8632 * have more than one duplicate we are likely going to need to delete
8635 if (rec->found_rec || rec->num_duplicates > 1)
8638 /* Shouldn't happen but just in case */
8639 BUG_ON(!rec->num_duplicates);
8642 * So this happens if we end up with a backref that doesn't match the
8643 * actual extent entry. So either the backref is bad or the extent
8644 * entry is bad. Either way we want to have the extent_record actually
8645 * reflect what we found in the extent_tree, so we need to take the
8646 * duplicate out and use that as the extent_record since the only way we
8647 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8649 remove_cache_extent(extent_cache, &rec->cache);
8651 good = to_extent_record(rec->dups.next);
8652 list_del_init(&good->list);
8653 INIT_LIST_HEAD(&good->backrefs);
8654 INIT_LIST_HEAD(&good->dups);
8655 good->cache.start = good->start;
8656 good->cache.size = good->nr;
8657 good->content_checked = 0;
8658 good->owner_ref_checked = 0;
8659 good->num_duplicates = 0;
8660 good->refs = rec->refs;
8661 list_splice_init(&rec->backrefs, &good->backrefs);
8663 cache = lookup_cache_extent(extent_cache, good->start,
8667 tmp = container_of(cache, struct extent_record, cache);
8670 * If we find another overlapping extent and it's found_rec is
8671 * set then it's a duplicate and we need to try and delete
8674 if (tmp->found_rec || tmp->num_duplicates > 0) {
8675 if (list_empty(&good->list))
8676 list_add_tail(&good->list,
8677 &duplicate_extents);
8678 good->num_duplicates += tmp->num_duplicates + 1;
8679 list_splice_init(&tmp->dups, &good->dups);
8680 list_del_init(&tmp->list);
8681 list_add_tail(&tmp->list, &good->dups);
8682 remove_cache_extent(extent_cache, &tmp->cache);
8687 * Ok we have another non extent item backed extent rec, so lets
8688 * just add it to this extent and carry on like we did above.
8690 good->refs += tmp->refs;
8691 list_splice_init(&tmp->backrefs, &good->backrefs);
8692 remove_cache_extent(extent_cache, &tmp->cache);
8695 ret = insert_cache_extent(extent_cache, &good->cache);
8698 return good->num_duplicates ? 0 : 1;
8701 static int delete_duplicate_records(struct btrfs_root *root,
8702 struct extent_record *rec)
8704 struct btrfs_trans_handle *trans;
8705 LIST_HEAD(delete_list);
8706 struct btrfs_path path;
8707 struct extent_record *tmp, *good, *n;
8710 struct btrfs_key key;
8712 btrfs_init_path(&path);
8715 /* Find the record that covers all of the duplicates. */
8716 list_for_each_entry(tmp, &rec->dups, list) {
8717 if (good->start < tmp->start)
8719 if (good->nr > tmp->nr)
8722 if (tmp->start + tmp->nr < good->start + good->nr) {
8723 fprintf(stderr, "Ok we have overlapping extents that "
8724 "aren't completely covered by each other, this "
8725 "is going to require more careful thought. "
8726 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8727 tmp->start, tmp->nr, good->start, good->nr);
8734 list_add_tail(&rec->list, &delete_list);
8736 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8739 list_move_tail(&tmp->list, &delete_list);
8742 root = root->fs_info->extent_root;
8743 trans = btrfs_start_transaction(root, 1);
8744 if (IS_ERR(trans)) {
8745 ret = PTR_ERR(trans);
8749 list_for_each_entry(tmp, &delete_list, list) {
8750 if (tmp->found_rec == 0)
8752 key.objectid = tmp->start;
8753 key.type = BTRFS_EXTENT_ITEM_KEY;
8754 key.offset = tmp->nr;
8756 /* Shouldn't happen but just in case */
8757 if (tmp->metadata) {
8758 fprintf(stderr, "Well this shouldn't happen, extent "
8759 "record overlaps but is metadata? "
8760 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8764 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8770 ret = btrfs_del_item(trans, root, &path);
8773 btrfs_release_path(&path);
8776 err = btrfs_commit_transaction(trans, root);
8780 while (!list_empty(&delete_list)) {
8781 tmp = to_extent_record(delete_list.next);
8782 list_del_init(&tmp->list);
8788 while (!list_empty(&rec->dups)) {
8789 tmp = to_extent_record(rec->dups.next);
8790 list_del_init(&tmp->list);
8794 btrfs_release_path(&path);
8796 if (!ret && !nr_del)
8797 rec->num_duplicates = 0;
8799 return ret ? ret : nr_del;
8802 static int find_possible_backrefs(struct btrfs_fs_info *info,
8803 struct btrfs_path *path,
8804 struct cache_tree *extent_cache,
8805 struct extent_record *rec)
8807 struct btrfs_root *root;
8808 struct extent_backref *back;
8809 struct data_backref *dback;
8810 struct cache_extent *cache;
8811 struct btrfs_file_extent_item *fi;
8812 struct btrfs_key key;
8816 list_for_each_entry(back, &rec->backrefs, list) {
8817 /* Don't care about full backrefs (poor unloved backrefs) */
8818 if (back->full_backref || !back->is_data)
8821 dback = to_data_backref(back);
8823 /* We found this one, we don't need to do a lookup */
8824 if (dback->found_ref)
8827 key.objectid = dback->root;
8828 key.type = BTRFS_ROOT_ITEM_KEY;
8829 key.offset = (u64)-1;
8831 root = btrfs_read_fs_root(info, &key);
8833 /* No root, definitely a bad ref, skip */
8834 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8836 /* Other err, exit */
8838 return PTR_ERR(root);
8840 key.objectid = dback->owner;
8841 key.type = BTRFS_EXTENT_DATA_KEY;
8842 key.offset = dback->offset;
8843 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8845 btrfs_release_path(path);
8848 /* Didn't find it, we can carry on */
8853 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8854 struct btrfs_file_extent_item);
8855 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8856 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8857 btrfs_release_path(path);
8858 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8860 struct extent_record *tmp;
8861 tmp = container_of(cache, struct extent_record, cache);
8864 * If we found an extent record for the bytenr for this
8865 * particular backref then we can't add it to our
8866 * current extent record. We only want to add backrefs
8867 * that don't have a corresponding extent item in the
8868 * extent tree since they likely belong to this record
8869 * and we need to fix it if it doesn't match bytenrs.
8875 dback->found_ref += 1;
8876 dback->disk_bytenr = bytenr;
8877 dback->bytes = bytes;
8880 * Set this so the verify backref code knows not to trust the
8881 * values in this backref.
8890 * Record orphan data ref into corresponding root.
8892 * Return 0 if the extent item contains data ref and recorded.
8893 * Return 1 if the extent item contains no useful data ref
8894 * On that case, it may contains only shared_dataref or metadata backref
8895 * or the file extent exists(this should be handled by the extent bytenr
8897 * Return <0 if something goes wrong.
8899 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8900 struct extent_record *rec)
8902 struct btrfs_key key;
8903 struct btrfs_root *dest_root;
8904 struct extent_backref *back;
8905 struct data_backref *dback;
8906 struct orphan_data_extent *orphan;
8907 struct btrfs_path path;
8908 int recorded_data_ref = 0;
8913 btrfs_init_path(&path);
8914 list_for_each_entry(back, &rec->backrefs, list) {
8915 if (back->full_backref || !back->is_data ||
8916 !back->found_extent_tree)
8918 dback = to_data_backref(back);
8919 if (dback->found_ref)
8921 key.objectid = dback->root;
8922 key.type = BTRFS_ROOT_ITEM_KEY;
8923 key.offset = (u64)-1;
8925 dest_root = btrfs_read_fs_root(fs_info, &key);
8927 /* For non-exist root we just skip it */
8928 if (IS_ERR(dest_root) || !dest_root)
8931 key.objectid = dback->owner;
8932 key.type = BTRFS_EXTENT_DATA_KEY;
8933 key.offset = dback->offset;
8935 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8936 btrfs_release_path(&path);
8938 * For ret < 0, it's OK since the fs-tree may be corrupted,
8939 * we need to record it for inode/file extent rebuild.
8940 * For ret > 0, we record it only for file extent rebuild.
8941 * For ret == 0, the file extent exists but only bytenr
8942 * mismatch, let the original bytenr fix routine to handle,
8948 orphan = malloc(sizeof(*orphan));
8953 INIT_LIST_HEAD(&orphan->list);
8954 orphan->root = dback->root;
8955 orphan->objectid = dback->owner;
8956 orphan->offset = dback->offset;
8957 orphan->disk_bytenr = rec->cache.start;
8958 orphan->disk_len = rec->cache.size;
8959 list_add(&dest_root->orphan_data_extents, &orphan->list);
8960 recorded_data_ref = 1;
8963 btrfs_release_path(&path);
8965 return !recorded_data_ref;
8971 * when an incorrect extent item is found, this will delete
8972 * all of the existing entries for it and recreate them
8973 * based on what the tree scan found.
8975 static int fixup_extent_refs(struct btrfs_fs_info *info,
8976 struct cache_tree *extent_cache,
8977 struct extent_record *rec)
8979 struct btrfs_trans_handle *trans = NULL;
8981 struct btrfs_path path;
8982 struct list_head *cur = rec->backrefs.next;
8983 struct cache_extent *cache;
8984 struct extent_backref *back;
8988 if (rec->flag_block_full_backref)
8989 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8991 btrfs_init_path(&path);
8992 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8994 * Sometimes the backrefs themselves are so broken they don't
8995 * get attached to any meaningful rec, so first go back and
8996 * check any of our backrefs that we couldn't find and throw
8997 * them into the list if we find the backref so that
8998 * verify_backrefs can figure out what to do.
9000 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9005 /* step one, make sure all of the backrefs agree */
9006 ret = verify_backrefs(info, &path, rec);
9010 trans = btrfs_start_transaction(info->extent_root, 1);
9011 if (IS_ERR(trans)) {
9012 ret = PTR_ERR(trans);
9016 /* step two, delete all the existing records */
9017 ret = delete_extent_records(trans, info->extent_root, &path,
9023 /* was this block corrupt? If so, don't add references to it */
9024 cache = lookup_cache_extent(info->corrupt_blocks,
9025 rec->start, rec->max_size);
9031 /* step three, recreate all the refs we did find */
9032 while(cur != &rec->backrefs) {
9033 back = to_extent_backref(cur);
9037 * if we didn't find any references, don't create a
9040 if (!back->found_ref)
9043 rec->bad_full_backref = 0;
9044 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9052 int err = btrfs_commit_transaction(trans, info->extent_root);
9058 fprintf(stderr, "Repaired extent references for %llu\n",
9059 (unsigned long long)rec->start);
9061 btrfs_release_path(&path);
9065 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9066 struct extent_record *rec)
9068 struct btrfs_trans_handle *trans;
9069 struct btrfs_root *root = fs_info->extent_root;
9070 struct btrfs_path path;
9071 struct btrfs_extent_item *ei;
9072 struct btrfs_key key;
9076 key.objectid = rec->start;
9077 if (rec->metadata) {
9078 key.type = BTRFS_METADATA_ITEM_KEY;
9079 key.offset = rec->info_level;
9081 key.type = BTRFS_EXTENT_ITEM_KEY;
9082 key.offset = rec->max_size;
9085 trans = btrfs_start_transaction(root, 0);
9087 return PTR_ERR(trans);
9089 btrfs_init_path(&path);
9090 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9092 btrfs_release_path(&path);
9093 btrfs_commit_transaction(trans, root);
9096 fprintf(stderr, "Didn't find extent for %llu\n",
9097 (unsigned long long)rec->start);
9098 btrfs_release_path(&path);
9099 btrfs_commit_transaction(trans, root);
9103 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9104 struct btrfs_extent_item);
9105 flags = btrfs_extent_flags(path.nodes[0], ei);
9106 if (rec->flag_block_full_backref) {
9107 fprintf(stderr, "setting full backref on %llu\n",
9108 (unsigned long long)key.objectid);
9109 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9111 fprintf(stderr, "clearing full backref on %llu\n",
9112 (unsigned long long)key.objectid);
9113 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9115 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9116 btrfs_mark_buffer_dirty(path.nodes[0]);
9117 btrfs_release_path(&path);
9118 ret = btrfs_commit_transaction(trans, root);
9120 fprintf(stderr, "Repaired extent flags for %llu\n",
9121 (unsigned long long)rec->start);
9126 /* right now we only prune from the extent allocation tree */
9127 static int prune_one_block(struct btrfs_trans_handle *trans,
9128 struct btrfs_fs_info *info,
9129 struct btrfs_corrupt_block *corrupt)
9132 struct btrfs_path path;
9133 struct extent_buffer *eb;
9137 int level = corrupt->level + 1;
9139 btrfs_init_path(&path);
9141 /* we want to stop at the parent to our busted block */
9142 path.lowest_level = level;
9144 ret = btrfs_search_slot(trans, info->extent_root,
9145 &corrupt->key, &path, -1, 1);
9150 eb = path.nodes[level];
9157 * hopefully the search gave us the block we want to prune,
9158 * lets try that first
9160 slot = path.slots[level];
9161 found = btrfs_node_blockptr(eb, slot);
9162 if (found == corrupt->cache.start)
9165 nritems = btrfs_header_nritems(eb);
9167 /* the search failed, lets scan this node and hope we find it */
9168 for (slot = 0; slot < nritems; slot++) {
9169 found = btrfs_node_blockptr(eb, slot);
9170 if (found == corrupt->cache.start)
9174 * we couldn't find the bad block. TODO, search all the nodes for pointers
9177 if (eb == info->extent_root->node) {
9182 btrfs_release_path(&path);
9187 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9188 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9191 btrfs_release_path(&path);
9195 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9197 struct btrfs_trans_handle *trans = NULL;
9198 struct cache_extent *cache;
9199 struct btrfs_corrupt_block *corrupt;
9202 cache = search_cache_extent(info->corrupt_blocks, 0);
9206 trans = btrfs_start_transaction(info->extent_root, 1);
9208 return PTR_ERR(trans);
9210 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9211 prune_one_block(trans, info, corrupt);
9212 remove_cache_extent(info->corrupt_blocks, cache);
9215 return btrfs_commit_transaction(trans, info->extent_root);
9219 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9221 struct btrfs_block_group_cache *cache;
9226 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9227 &start, &end, EXTENT_DIRTY);
9230 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9235 cache = btrfs_lookup_first_block_group(fs_info, start);
9240 start = cache->key.objectid + cache->key.offset;
9244 static int check_extent_refs(struct btrfs_root *root,
9245 struct cache_tree *extent_cache)
9247 struct extent_record *rec;
9248 struct cache_extent *cache;
9254 * if we're doing a repair, we have to make sure
9255 * we don't allocate from the problem extents.
9256 * In the worst case, this will be all the
9259 cache = search_cache_extent(extent_cache, 0);
9261 rec = container_of(cache, struct extent_record, cache);
9262 set_extent_dirty(root->fs_info->excluded_extents,
9264 rec->start + rec->max_size - 1);
9265 cache = next_cache_extent(cache);
9268 /* pin down all the corrupted blocks too */
9269 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9271 set_extent_dirty(root->fs_info->excluded_extents,
9273 cache->start + cache->size - 1);
9274 cache = next_cache_extent(cache);
9276 prune_corrupt_blocks(root->fs_info);
9277 reset_cached_block_groups(root->fs_info);
9280 reset_cached_block_groups(root->fs_info);
9283 * We need to delete any duplicate entries we find first otherwise we
9284 * could mess up the extent tree when we have backrefs that actually
9285 * belong to a different extent item and not the weird duplicate one.
9287 while (repair && !list_empty(&duplicate_extents)) {
9288 rec = to_extent_record(duplicate_extents.next);
9289 list_del_init(&rec->list);
9291 /* Sometimes we can find a backref before we find an actual
9292 * extent, so we need to process it a little bit to see if there
9293 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9294 * if this is a backref screwup. If we need to delete stuff
9295 * process_duplicates() will return 0, otherwise it will return
9298 if (process_duplicates(extent_cache, rec))
9300 ret = delete_duplicate_records(root, rec);
9304 * delete_duplicate_records will return the number of entries
9305 * deleted, so if it's greater than 0 then we know we actually
9306 * did something and we need to remove.
9319 cache = search_cache_extent(extent_cache, 0);
9322 rec = container_of(cache, struct extent_record, cache);
9323 if (rec->num_duplicates) {
9324 fprintf(stderr, "extent item %llu has multiple extent "
9325 "items\n", (unsigned long long)rec->start);
9329 if (rec->refs != rec->extent_item_refs) {
9330 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9331 (unsigned long long)rec->start,
9332 (unsigned long long)rec->nr);
9333 fprintf(stderr, "extent item %llu, found %llu\n",
9334 (unsigned long long)rec->extent_item_refs,
9335 (unsigned long long)rec->refs);
9336 ret = record_orphan_data_extents(root->fs_info, rec);
9342 if (all_backpointers_checked(rec, 1)) {
9343 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9344 (unsigned long long)rec->start,
9345 (unsigned long long)rec->nr);
9349 if (!rec->owner_ref_checked) {
9350 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9351 (unsigned long long)rec->start,
9352 (unsigned long long)rec->nr);
9357 if (repair && fix) {
9358 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9364 if (rec->bad_full_backref) {
9365 fprintf(stderr, "bad full backref, on [%llu]\n",
9366 (unsigned long long)rec->start);
9368 ret = fixup_extent_flags(root->fs_info, rec);
9376 * Although it's not a extent ref's problem, we reuse this
9377 * routine for error reporting.
9378 * No repair function yet.
9380 if (rec->crossing_stripes) {
9382 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9383 rec->start, rec->start + rec->max_size);
9387 if (rec->wrong_chunk_type) {
9389 "bad extent [%llu, %llu), type mismatch with chunk\n",
9390 rec->start, rec->start + rec->max_size);
9394 remove_cache_extent(extent_cache, cache);
9395 free_all_extent_backrefs(rec);
9396 if (!init_extent_tree && repair && (!cur_err || fix))
9397 clear_extent_dirty(root->fs_info->excluded_extents,
9399 rec->start + rec->max_size - 1);
9404 if (ret && ret != -EAGAIN) {
9405 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9408 struct btrfs_trans_handle *trans;
9410 root = root->fs_info->extent_root;
9411 trans = btrfs_start_transaction(root, 1);
9412 if (IS_ERR(trans)) {
9413 ret = PTR_ERR(trans);
9417 btrfs_fix_block_accounting(trans, root);
9418 ret = btrfs_commit_transaction(trans, root);
9427 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9431 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9432 stripe_size = length;
9433 stripe_size /= num_stripes;
9434 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9435 stripe_size = length * 2;
9436 stripe_size /= num_stripes;
9437 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9438 stripe_size = length;
9439 stripe_size /= (num_stripes - 1);
9440 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9441 stripe_size = length;
9442 stripe_size /= (num_stripes - 2);
9444 stripe_size = length;
9450 * Check the chunk with its block group/dev list ref:
9451 * Return 0 if all refs seems valid.
9452 * Return 1 if part of refs seems valid, need later check for rebuild ref
9453 * like missing block group and needs to search extent tree to rebuild them.
9454 * Return -1 if essential refs are missing and unable to rebuild.
9456 static int check_chunk_refs(struct chunk_record *chunk_rec,
9457 struct block_group_tree *block_group_cache,
9458 struct device_extent_tree *dev_extent_cache,
9461 struct cache_extent *block_group_item;
9462 struct block_group_record *block_group_rec;
9463 struct cache_extent *dev_extent_item;
9464 struct device_extent_record *dev_extent_rec;
9468 int metadump_v2 = 0;
9472 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9475 if (block_group_item) {
9476 block_group_rec = container_of(block_group_item,
9477 struct block_group_record,
9479 if (chunk_rec->length != block_group_rec->offset ||
9480 chunk_rec->offset != block_group_rec->objectid ||
9482 chunk_rec->type_flags != block_group_rec->flags)) {
9485 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9486 chunk_rec->objectid,
9491 chunk_rec->type_flags,
9492 block_group_rec->objectid,
9493 block_group_rec->type,
9494 block_group_rec->offset,
9495 block_group_rec->offset,
9496 block_group_rec->objectid,
9497 block_group_rec->flags);
9500 list_del_init(&block_group_rec->list);
9501 chunk_rec->bg_rec = block_group_rec;
9506 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9507 chunk_rec->objectid,
9512 chunk_rec->type_flags);
9519 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9520 chunk_rec->num_stripes);
9521 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9522 devid = chunk_rec->stripes[i].devid;
9523 offset = chunk_rec->stripes[i].offset;
9524 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9525 devid, offset, length);
9526 if (dev_extent_item) {
9527 dev_extent_rec = container_of(dev_extent_item,
9528 struct device_extent_record,
9530 if (dev_extent_rec->objectid != devid ||
9531 dev_extent_rec->offset != offset ||
9532 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9533 dev_extent_rec->length != length) {
9536 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9537 chunk_rec->objectid,
9540 chunk_rec->stripes[i].devid,
9541 chunk_rec->stripes[i].offset,
9542 dev_extent_rec->objectid,
9543 dev_extent_rec->offset,
9544 dev_extent_rec->length);
9547 list_move(&dev_extent_rec->chunk_list,
9548 &chunk_rec->dextents);
9553 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9554 chunk_rec->objectid,
9557 chunk_rec->stripes[i].devid,
9558 chunk_rec->stripes[i].offset);
9565 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9566 int check_chunks(struct cache_tree *chunk_cache,
9567 struct block_group_tree *block_group_cache,
9568 struct device_extent_tree *dev_extent_cache,
9569 struct list_head *good, struct list_head *bad,
9570 struct list_head *rebuild, int silent)
9572 struct cache_extent *chunk_item;
9573 struct chunk_record *chunk_rec;
9574 struct block_group_record *bg_rec;
9575 struct device_extent_record *dext_rec;
9579 chunk_item = first_cache_extent(chunk_cache);
9580 while (chunk_item) {
9581 chunk_rec = container_of(chunk_item, struct chunk_record,
9583 err = check_chunk_refs(chunk_rec, block_group_cache,
9584 dev_extent_cache, silent);
9587 if (err == 0 && good)
9588 list_add_tail(&chunk_rec->list, good);
9589 if (err > 0 && rebuild)
9590 list_add_tail(&chunk_rec->list, rebuild);
9592 list_add_tail(&chunk_rec->list, bad);
9593 chunk_item = next_cache_extent(chunk_item);
9596 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9599 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9607 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9611 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9622 static int check_device_used(struct device_record *dev_rec,
9623 struct device_extent_tree *dext_cache)
9625 struct cache_extent *cache;
9626 struct device_extent_record *dev_extent_rec;
9629 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9631 dev_extent_rec = container_of(cache,
9632 struct device_extent_record,
9634 if (dev_extent_rec->objectid != dev_rec->devid)
9637 list_del_init(&dev_extent_rec->device_list);
9638 total_byte += dev_extent_rec->length;
9639 cache = next_cache_extent(cache);
9642 if (total_byte != dev_rec->byte_used) {
9644 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9645 total_byte, dev_rec->byte_used, dev_rec->objectid,
9646 dev_rec->type, dev_rec->offset);
9653 /* check btrfs_dev_item -> btrfs_dev_extent */
9654 static int check_devices(struct rb_root *dev_cache,
9655 struct device_extent_tree *dev_extent_cache)
9657 struct rb_node *dev_node;
9658 struct device_record *dev_rec;
9659 struct device_extent_record *dext_rec;
9663 dev_node = rb_first(dev_cache);
9665 dev_rec = container_of(dev_node, struct device_record, node);
9666 err = check_device_used(dev_rec, dev_extent_cache);
9670 dev_node = rb_next(dev_node);
9672 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9675 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9676 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9683 static int add_root_item_to_list(struct list_head *head,
9684 u64 objectid, u64 bytenr, u64 last_snapshot,
9685 u8 level, u8 drop_level,
9686 int level_size, struct btrfs_key *drop_key)
9689 struct root_item_record *ri_rec;
9690 ri_rec = malloc(sizeof(*ri_rec));
9693 ri_rec->bytenr = bytenr;
9694 ri_rec->objectid = objectid;
9695 ri_rec->level = level;
9696 ri_rec->level_size = level_size;
9697 ri_rec->drop_level = drop_level;
9698 ri_rec->last_snapshot = last_snapshot;
9700 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9701 list_add_tail(&ri_rec->list, head);
9706 static void free_root_item_list(struct list_head *list)
9708 struct root_item_record *ri_rec;
9710 while (!list_empty(list)) {
9711 ri_rec = list_first_entry(list, struct root_item_record,
9713 list_del_init(&ri_rec->list);
9718 static int deal_root_from_list(struct list_head *list,
9719 struct btrfs_root *root,
9720 struct block_info *bits,
9722 struct cache_tree *pending,
9723 struct cache_tree *seen,
9724 struct cache_tree *reada,
9725 struct cache_tree *nodes,
9726 struct cache_tree *extent_cache,
9727 struct cache_tree *chunk_cache,
9728 struct rb_root *dev_cache,
9729 struct block_group_tree *block_group_cache,
9730 struct device_extent_tree *dev_extent_cache)
9735 while (!list_empty(list)) {
9736 struct root_item_record *rec;
9737 struct extent_buffer *buf;
9738 rec = list_entry(list->next,
9739 struct root_item_record, list);
9741 buf = read_tree_block(root->fs_info->tree_root,
9742 rec->bytenr, rec->level_size, 0);
9743 if (!extent_buffer_uptodate(buf)) {
9744 free_extent_buffer(buf);
9748 ret = add_root_to_pending(buf, extent_cache, pending,
9749 seen, nodes, rec->objectid);
9753 * To rebuild extent tree, we need deal with snapshot
9754 * one by one, otherwise we deal with node firstly which
9755 * can maximize readahead.
9758 ret = run_next_block(root, bits, bits_nr, &last,
9759 pending, seen, reada, nodes,
9760 extent_cache, chunk_cache,
9761 dev_cache, block_group_cache,
9762 dev_extent_cache, rec);
9766 free_extent_buffer(buf);
9767 list_del(&rec->list);
9773 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9774 reada, nodes, extent_cache, chunk_cache,
9775 dev_cache, block_group_cache,
9776 dev_extent_cache, NULL);
9786 static int check_chunks_and_extents(struct btrfs_root *root)
9788 struct rb_root dev_cache;
9789 struct cache_tree chunk_cache;
9790 struct block_group_tree block_group_cache;
9791 struct device_extent_tree dev_extent_cache;
9792 struct cache_tree extent_cache;
9793 struct cache_tree seen;
9794 struct cache_tree pending;
9795 struct cache_tree reada;
9796 struct cache_tree nodes;
9797 struct extent_io_tree excluded_extents;
9798 struct cache_tree corrupt_blocks;
9799 struct btrfs_path path;
9800 struct btrfs_key key;
9801 struct btrfs_key found_key;
9803 struct block_info *bits;
9805 struct extent_buffer *leaf;
9807 struct btrfs_root_item ri;
9808 struct list_head dropping_trees;
9809 struct list_head normal_trees;
9810 struct btrfs_root *root1;
9815 dev_cache = RB_ROOT;
9816 cache_tree_init(&chunk_cache);
9817 block_group_tree_init(&block_group_cache);
9818 device_extent_tree_init(&dev_extent_cache);
9820 cache_tree_init(&extent_cache);
9821 cache_tree_init(&seen);
9822 cache_tree_init(&pending);
9823 cache_tree_init(&nodes);
9824 cache_tree_init(&reada);
9825 cache_tree_init(&corrupt_blocks);
9826 extent_io_tree_init(&excluded_extents);
9827 INIT_LIST_HEAD(&dropping_trees);
9828 INIT_LIST_HEAD(&normal_trees);
9831 root->fs_info->excluded_extents = &excluded_extents;
9832 root->fs_info->fsck_extent_cache = &extent_cache;
9833 root->fs_info->free_extent_hook = free_extent_hook;
9834 root->fs_info->corrupt_blocks = &corrupt_blocks;
9838 bits = malloc(bits_nr * sizeof(struct block_info));
9844 if (ctx.progress_enabled) {
9845 ctx.tp = TASK_EXTENTS;
9846 task_start(ctx.info);
9850 root1 = root->fs_info->tree_root;
9851 level = btrfs_header_level(root1->node);
9852 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9853 root1->node->start, 0, level, 0,
9854 root1->nodesize, NULL);
9857 root1 = root->fs_info->chunk_root;
9858 level = btrfs_header_level(root1->node);
9859 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9860 root1->node->start, 0, level, 0,
9861 root1->nodesize, NULL);
9864 btrfs_init_path(&path);
9867 key.type = BTRFS_ROOT_ITEM_KEY;
9868 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9873 leaf = path.nodes[0];
9874 slot = path.slots[0];
9875 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9876 ret = btrfs_next_leaf(root, &path);
9879 leaf = path.nodes[0];
9880 slot = path.slots[0];
9882 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9883 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9884 unsigned long offset;
9887 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9888 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9889 last_snapshot = btrfs_root_last_snapshot(&ri);
9890 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9891 level = btrfs_root_level(&ri);
9892 level_size = root->nodesize;
9893 ret = add_root_item_to_list(&normal_trees,
9895 btrfs_root_bytenr(&ri),
9896 last_snapshot, level,
9897 0, level_size, NULL);
9901 level = btrfs_root_level(&ri);
9902 level_size = root->nodesize;
9903 objectid = found_key.objectid;
9904 btrfs_disk_key_to_cpu(&found_key,
9906 ret = add_root_item_to_list(&dropping_trees,
9908 btrfs_root_bytenr(&ri),
9909 last_snapshot, level,
9911 level_size, &found_key);
9918 btrfs_release_path(&path);
9921 * check_block can return -EAGAIN if it fixes something, please keep
9922 * this in mind when dealing with return values from these functions, if
9923 * we get -EAGAIN we want to fall through and restart the loop.
9925 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9926 &seen, &reada, &nodes, &extent_cache,
9927 &chunk_cache, &dev_cache, &block_group_cache,
9934 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9935 &pending, &seen, &reada, &nodes,
9936 &extent_cache, &chunk_cache, &dev_cache,
9937 &block_group_cache, &dev_extent_cache);
9944 ret = check_chunks(&chunk_cache, &block_group_cache,
9945 &dev_extent_cache, NULL, NULL, NULL, 0);
9952 ret = check_extent_refs(root, &extent_cache);
9959 ret = check_devices(&dev_cache, &dev_extent_cache);
9964 task_stop(ctx.info);
9966 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9967 extent_io_tree_cleanup(&excluded_extents);
9968 root->fs_info->fsck_extent_cache = NULL;
9969 root->fs_info->free_extent_hook = NULL;
9970 root->fs_info->corrupt_blocks = NULL;
9971 root->fs_info->excluded_extents = NULL;
9974 free_chunk_cache_tree(&chunk_cache);
9975 free_device_cache_tree(&dev_cache);
9976 free_block_group_tree(&block_group_cache);
9977 free_device_extent_tree(&dev_extent_cache);
9978 free_extent_cache_tree(&seen);
9979 free_extent_cache_tree(&pending);
9980 free_extent_cache_tree(&reada);
9981 free_extent_cache_tree(&nodes);
9982 free_root_item_list(&normal_trees);
9983 free_root_item_list(&dropping_trees);
9986 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9987 free_extent_cache_tree(&seen);
9988 free_extent_cache_tree(&pending);
9989 free_extent_cache_tree(&reada);
9990 free_extent_cache_tree(&nodes);
9991 free_chunk_cache_tree(&chunk_cache);
9992 free_block_group_tree(&block_group_cache);
9993 free_device_cache_tree(&dev_cache);
9994 free_device_extent_tree(&dev_extent_cache);
9995 free_extent_record_cache(&extent_cache);
9996 free_root_item_list(&normal_trees);
9997 free_root_item_list(&dropping_trees);
9998 extent_io_tree_cleanup(&excluded_extents);
10003 * Check backrefs of a tree block given by @bytenr or @eb.
10005 * @root: the root containing the @bytenr or @eb
10006 * @eb: tree block extent buffer, can be NULL
10007 * @bytenr: bytenr of the tree block to search
10008 * @level: tree level of the tree block
10009 * @owner: owner of the tree block
10011 * Return >0 for any error found and output error message
10012 * Return 0 for no error found
10014 static int check_tree_block_ref(struct btrfs_root *root,
10015 struct extent_buffer *eb, u64 bytenr,
10016 int level, u64 owner)
10018 struct btrfs_key key;
10019 struct btrfs_root *extent_root = root->fs_info->extent_root;
10020 struct btrfs_path path;
10021 struct btrfs_extent_item *ei;
10022 struct btrfs_extent_inline_ref *iref;
10023 struct extent_buffer *leaf;
10029 u32 nodesize = root->nodesize;
10032 int tree_reloc_root = 0;
10037 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10038 btrfs_header_bytenr(root->node) == bytenr)
10039 tree_reloc_root = 1;
10041 btrfs_init_path(&path);
10042 key.objectid = bytenr;
10043 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10044 key.type = BTRFS_METADATA_ITEM_KEY;
10046 key.type = BTRFS_EXTENT_ITEM_KEY;
10047 key.offset = (u64)-1;
10049 /* Search for the backref in extent tree */
10050 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10052 err |= BACKREF_MISSING;
10055 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10057 err |= BACKREF_MISSING;
10061 leaf = path.nodes[0];
10062 slot = path.slots[0];
10063 btrfs_item_key_to_cpu(leaf, &key, slot);
10065 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10067 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10068 skinny_level = (int)key.offset;
10069 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10071 struct btrfs_tree_block_info *info;
10073 info = (struct btrfs_tree_block_info *)(ei + 1);
10074 skinny_level = btrfs_tree_block_level(leaf, info);
10075 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10082 if (!(btrfs_extent_flags(leaf, ei) &
10083 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10085 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10086 key.objectid, nodesize,
10087 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10088 err = BACKREF_MISMATCH;
10090 header_gen = btrfs_header_generation(eb);
10091 extent_gen = btrfs_extent_generation(leaf, ei);
10092 if (header_gen != extent_gen) {
10094 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10095 key.objectid, nodesize, header_gen,
10097 err = BACKREF_MISMATCH;
10099 if (level != skinny_level) {
10101 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10102 key.objectid, nodesize, level, skinny_level);
10103 err = BACKREF_MISMATCH;
10105 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10107 "extent[%llu %u] is referred by other roots than %llu",
10108 key.objectid, nodesize, root->objectid);
10109 err = BACKREF_MISMATCH;
10114 * Iterate the extent/metadata item to find the exact backref
10116 item_size = btrfs_item_size_nr(leaf, slot);
10117 ptr = (unsigned long)iref;
10118 end = (unsigned long)ei + item_size;
10119 while (ptr < end) {
10120 iref = (struct btrfs_extent_inline_ref *)ptr;
10121 type = btrfs_extent_inline_ref_type(leaf, iref);
10122 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10124 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10125 (offset == root->objectid || offset == owner)) {
10127 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10129 * Backref of tree reloc root points to itself, no need
10130 * to check backref any more.
10132 if (tree_reloc_root)
10135 /* Check if the backref points to valid referencer */
10136 found_ref = !check_tree_block_ref(root, NULL,
10137 offset, level + 1, owner);
10142 ptr += btrfs_extent_inline_ref_size(type);
10146 * Inlined extent item doesn't have what we need, check
10147 * TREE_BLOCK_REF_KEY
10150 btrfs_release_path(&path);
10151 key.objectid = bytenr;
10152 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10153 key.offset = root->objectid;
10155 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10160 err |= BACKREF_MISSING;
10162 btrfs_release_path(&path);
10163 if (eb && (err & BACKREF_MISSING))
10164 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10165 bytenr, nodesize, owner, level);
10170 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10172 * Return >0 any error found and output error message
10173 * Return 0 for no error found
10175 static int check_extent_data_item(struct btrfs_root *root,
10176 struct extent_buffer *eb, int slot)
10178 struct btrfs_file_extent_item *fi;
10179 struct btrfs_path path;
10180 struct btrfs_root *extent_root = root->fs_info->extent_root;
10181 struct btrfs_key fi_key;
10182 struct btrfs_key dbref_key;
10183 struct extent_buffer *leaf;
10184 struct btrfs_extent_item *ei;
10185 struct btrfs_extent_inline_ref *iref;
10186 struct btrfs_extent_data_ref *dref;
10189 u64 disk_num_bytes;
10190 u64 extent_num_bytes;
10197 int found_dbackref = 0;
10201 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10202 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10204 /* Nothing to check for hole and inline data extents */
10205 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10206 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10209 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10210 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10211 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10213 /* Check unaligned disk_num_bytes and num_bytes */
10214 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10216 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10217 fi_key.objectid, fi_key.offset, disk_num_bytes,
10219 err |= BYTES_UNALIGNED;
10221 data_bytes_allocated += disk_num_bytes;
10223 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10225 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10226 fi_key.objectid, fi_key.offset, extent_num_bytes,
10228 err |= BYTES_UNALIGNED;
10230 data_bytes_referenced += extent_num_bytes;
10232 owner = btrfs_header_owner(eb);
10234 /* Check the extent item of the file extent in extent tree */
10235 btrfs_init_path(&path);
10236 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10237 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10238 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10240 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10244 leaf = path.nodes[0];
10245 slot = path.slots[0];
10246 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10248 extent_flags = btrfs_extent_flags(leaf, ei);
10250 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10252 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10253 disk_bytenr, disk_num_bytes,
10254 BTRFS_EXTENT_FLAG_DATA);
10255 err |= BACKREF_MISMATCH;
10258 /* Check data backref inside that extent item */
10259 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10260 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10261 ptr = (unsigned long)iref;
10262 end = (unsigned long)ei + item_size;
10263 while (ptr < end) {
10264 iref = (struct btrfs_extent_inline_ref *)ptr;
10265 type = btrfs_extent_inline_ref_type(leaf, iref);
10266 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10268 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10269 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10270 if (ref_root == owner || ref_root == root->objectid)
10271 found_dbackref = 1;
10272 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10273 found_dbackref = !check_tree_block_ref(root, NULL,
10274 btrfs_extent_inline_ref_offset(leaf, iref),
10278 if (found_dbackref)
10280 ptr += btrfs_extent_inline_ref_size(type);
10283 if (!found_dbackref) {
10284 btrfs_release_path(&path);
10286 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10287 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10288 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10289 dbref_key.offset = hash_extent_data_ref(root->objectid,
10290 fi_key.objectid, fi_key.offset);
10292 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10293 &dbref_key, &path, 0, 0);
10295 found_dbackref = 1;
10299 btrfs_release_path(&path);
10302 * Neither inlined nor EXTENT_DATA_REF found, try
10303 * SHARED_DATA_REF as last chance.
10305 dbref_key.objectid = disk_bytenr;
10306 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10307 dbref_key.offset = eb->start;
10309 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10310 &dbref_key, &path, 0, 0);
10312 found_dbackref = 1;
10318 if (!found_dbackref)
10319 err |= BACKREF_MISSING;
10320 btrfs_release_path(&path);
10321 if (err & BACKREF_MISSING) {
10322 error("data extent[%llu %llu] backref lost",
10323 disk_bytenr, disk_num_bytes);
10329 * Get real tree block level for the case like shared block
10330 * Return >= 0 as tree level
10331 * Return <0 for error
10333 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10335 struct extent_buffer *eb;
10336 struct btrfs_path path;
10337 struct btrfs_key key;
10338 struct btrfs_extent_item *ei;
10341 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10346 /* Search extent tree for extent generation and level */
10347 key.objectid = bytenr;
10348 key.type = BTRFS_METADATA_ITEM_KEY;
10349 key.offset = (u64)-1;
10351 btrfs_init_path(&path);
10352 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10355 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10363 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10364 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10365 struct btrfs_extent_item);
10366 flags = btrfs_extent_flags(path.nodes[0], ei);
10367 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10372 /* Get transid for later read_tree_block() check */
10373 transid = btrfs_extent_generation(path.nodes[0], ei);
10375 /* Get backref level as one source */
10376 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10377 backref_level = key.offset;
10379 struct btrfs_tree_block_info *info;
10381 info = (struct btrfs_tree_block_info *)(ei + 1);
10382 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10384 btrfs_release_path(&path);
10386 /* Get level from tree block as an alternative source */
10387 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10388 if (!extent_buffer_uptodate(eb)) {
10389 free_extent_buffer(eb);
10392 header_level = btrfs_header_level(eb);
10393 free_extent_buffer(eb);
10395 if (header_level != backref_level)
10397 return header_level;
10400 btrfs_release_path(&path);
10405 * Check if a tree block backref is valid (points to a valid tree block)
10406 * if level == -1, level will be resolved
10407 * Return >0 for any error found and print error message
10409 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10410 u64 bytenr, int level)
10412 struct btrfs_root *root;
10413 struct btrfs_key key;
10414 struct btrfs_path path;
10415 struct extent_buffer *eb;
10416 struct extent_buffer *node;
10417 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10421 /* Query level for level == -1 special case */
10423 level = query_tree_block_level(fs_info, bytenr);
10425 err |= REFERENCER_MISSING;
10429 key.objectid = root_id;
10430 key.type = BTRFS_ROOT_ITEM_KEY;
10431 key.offset = (u64)-1;
10433 root = btrfs_read_fs_root(fs_info, &key);
10434 if (IS_ERR(root)) {
10435 err |= REFERENCER_MISSING;
10439 /* Read out the tree block to get item/node key */
10440 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10441 if (!extent_buffer_uptodate(eb)) {
10442 err |= REFERENCER_MISSING;
10443 free_extent_buffer(eb);
10447 /* Empty tree, no need to check key */
10448 if (!btrfs_header_nritems(eb) && !level) {
10449 free_extent_buffer(eb);
10454 btrfs_node_key_to_cpu(eb, &key, 0);
10456 btrfs_item_key_to_cpu(eb, &key, 0);
10458 free_extent_buffer(eb);
10460 btrfs_init_path(&path);
10461 path.lowest_level = level;
10462 /* Search with the first key, to ensure we can reach it */
10463 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10465 err |= REFERENCER_MISSING;
10469 node = path.nodes[level];
10470 if (btrfs_header_bytenr(node) != bytenr) {
10472 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10473 bytenr, nodesize, bytenr,
10474 btrfs_header_bytenr(node));
10475 err |= REFERENCER_MISMATCH;
10477 if (btrfs_header_level(node) != level) {
10479 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10480 bytenr, nodesize, level,
10481 btrfs_header_level(node));
10482 err |= REFERENCER_MISMATCH;
10486 btrfs_release_path(&path);
10488 if (err & REFERENCER_MISSING) {
10490 error("extent [%llu %d] lost referencer (owner: %llu)",
10491 bytenr, nodesize, root_id);
10494 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10495 bytenr, nodesize, root_id, level);
10502 * Check if tree block @eb is tree reloc root.
10503 * Return 0 if it's not or any problem happens
10504 * Return 1 if it's a tree reloc root
10506 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10507 struct extent_buffer *eb)
10509 struct btrfs_root *tree_reloc_root;
10510 struct btrfs_key key;
10511 u64 bytenr = btrfs_header_bytenr(eb);
10512 u64 owner = btrfs_header_owner(eb);
10515 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10516 key.offset = owner;
10517 key.type = BTRFS_ROOT_ITEM_KEY;
10519 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10520 if (IS_ERR(tree_reloc_root))
10523 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10525 btrfs_free_fs_root(tree_reloc_root);
10530 * Check referencer for shared block backref
10531 * If level == -1, this function will resolve the level.
10533 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10534 u64 parent, u64 bytenr, int level)
10536 struct extent_buffer *eb;
10537 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10539 int found_parent = 0;
10542 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10543 if (!extent_buffer_uptodate(eb))
10547 level = query_tree_block_level(fs_info, bytenr);
10551 /* It's possible it's a tree reloc root */
10552 if (parent == bytenr) {
10553 if (is_tree_reloc_root(fs_info, eb))
10558 if (level + 1 != btrfs_header_level(eb))
10561 nr = btrfs_header_nritems(eb);
10562 for (i = 0; i < nr; i++) {
10563 if (bytenr == btrfs_node_blockptr(eb, i)) {
10569 free_extent_buffer(eb);
10570 if (!found_parent) {
10572 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10573 bytenr, nodesize, parent, level);
10574 return REFERENCER_MISSING;
10580 * Check referencer for normal (inlined) data ref
10581 * If len == 0, it will be resolved by searching in extent tree
10583 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10584 u64 root_id, u64 objectid, u64 offset,
10585 u64 bytenr, u64 len, u32 count)
10587 struct btrfs_root *root;
10588 struct btrfs_root *extent_root = fs_info->extent_root;
10589 struct btrfs_key key;
10590 struct btrfs_path path;
10591 struct extent_buffer *leaf;
10592 struct btrfs_file_extent_item *fi;
10593 u32 found_count = 0;
10598 key.objectid = bytenr;
10599 key.type = BTRFS_EXTENT_ITEM_KEY;
10600 key.offset = (u64)-1;
10602 btrfs_init_path(&path);
10603 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10606 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10609 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10610 if (key.objectid != bytenr ||
10611 key.type != BTRFS_EXTENT_ITEM_KEY)
10614 btrfs_release_path(&path);
10616 key.objectid = root_id;
10617 key.type = BTRFS_ROOT_ITEM_KEY;
10618 key.offset = (u64)-1;
10619 btrfs_init_path(&path);
10621 root = btrfs_read_fs_root(fs_info, &key);
10625 key.objectid = objectid;
10626 key.type = BTRFS_EXTENT_DATA_KEY;
10628 * It can be nasty as data backref offset is
10629 * file offset - file extent offset, which is smaller or
10630 * equal to original backref offset. The only special case is
10631 * overflow. So we need to special check and do further search.
10633 key.offset = offset & (1ULL << 63) ? 0 : offset;
10635 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10640 * Search afterwards to get correct one
10641 * NOTE: As we must do a comprehensive check on the data backref to
10642 * make sure the dref count also matches, we must iterate all file
10643 * extents for that inode.
10646 leaf = path.nodes[0];
10647 slot = path.slots[0];
10649 if (slot >= btrfs_header_nritems(leaf))
10651 btrfs_item_key_to_cpu(leaf, &key, slot);
10652 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10654 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10656 * Except normal disk bytenr and disk num bytes, we still
10657 * need to do extra check on dbackref offset as
10658 * dbackref offset = file_offset - file_extent_offset
10660 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10661 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10662 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10667 ret = btrfs_next_item(root, &path);
10672 btrfs_release_path(&path);
10673 if (found_count != count) {
10675 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10676 bytenr, len, root_id, objectid, offset, count, found_count);
10677 return REFERENCER_MISSING;
10683 * Check if the referencer of a shared data backref exists
10685 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10686 u64 parent, u64 bytenr)
10688 struct extent_buffer *eb;
10689 struct btrfs_key key;
10690 struct btrfs_file_extent_item *fi;
10691 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10693 int found_parent = 0;
10696 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10697 if (!extent_buffer_uptodate(eb))
10700 nr = btrfs_header_nritems(eb);
10701 for (i = 0; i < nr; i++) {
10702 btrfs_item_key_to_cpu(eb, &key, i);
10703 if (key.type != BTRFS_EXTENT_DATA_KEY)
10706 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10707 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10710 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10717 free_extent_buffer(eb);
10718 if (!found_parent) {
10719 error("shared extent %llu referencer lost (parent: %llu)",
10721 return REFERENCER_MISSING;
10727 * This function will check a given extent item, including its backref and
10728 * itself (like crossing stripe boundary and type)
10730 * Since we don't use extent_record anymore, introduce new error bit
10732 static int check_extent_item(struct btrfs_fs_info *fs_info,
10733 struct extent_buffer *eb, int slot)
10735 struct btrfs_extent_item *ei;
10736 struct btrfs_extent_inline_ref *iref;
10737 struct btrfs_extent_data_ref *dref;
10741 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10742 u32 item_size = btrfs_item_size_nr(eb, slot);
10747 struct btrfs_key key;
10751 btrfs_item_key_to_cpu(eb, &key, slot);
10752 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10753 bytes_used += key.offset;
10755 bytes_used += nodesize;
10757 if (item_size < sizeof(*ei)) {
10759 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10760 * old thing when on disk format is still un-determined.
10761 * No need to care about it anymore
10763 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10767 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10768 flags = btrfs_extent_flags(eb, ei);
10770 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10772 if (metadata && check_crossing_stripes(global_info, key.objectid,
10774 error("bad metadata [%llu, %llu) crossing stripe boundary",
10775 key.objectid, key.objectid + nodesize);
10776 err |= CROSSING_STRIPE_BOUNDARY;
10779 ptr = (unsigned long)(ei + 1);
10781 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10782 /* Old EXTENT_ITEM metadata */
10783 struct btrfs_tree_block_info *info;
10785 info = (struct btrfs_tree_block_info *)ptr;
10786 level = btrfs_tree_block_level(eb, info);
10787 ptr += sizeof(struct btrfs_tree_block_info);
10789 /* New METADATA_ITEM */
10790 level = key.offset;
10792 end = (unsigned long)ei + item_size;
10795 /* Reached extent item end normally */
10799 /* Beyond extent item end, wrong item size */
10801 err |= ITEM_SIZE_MISMATCH;
10802 error("extent item at bytenr %llu slot %d has wrong size",
10807 /* Now check every backref in this extent item */
10808 iref = (struct btrfs_extent_inline_ref *)ptr;
10809 type = btrfs_extent_inline_ref_type(eb, iref);
10810 offset = btrfs_extent_inline_ref_offset(eb, iref);
10812 case BTRFS_TREE_BLOCK_REF_KEY:
10813 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10817 case BTRFS_SHARED_BLOCK_REF_KEY:
10818 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10822 case BTRFS_EXTENT_DATA_REF_KEY:
10823 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10824 ret = check_extent_data_backref(fs_info,
10825 btrfs_extent_data_ref_root(eb, dref),
10826 btrfs_extent_data_ref_objectid(eb, dref),
10827 btrfs_extent_data_ref_offset(eb, dref),
10828 key.objectid, key.offset,
10829 btrfs_extent_data_ref_count(eb, dref));
10832 case BTRFS_SHARED_DATA_REF_KEY:
10833 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10837 error("extent[%llu %d %llu] has unknown ref type: %d",
10838 key.objectid, key.type, key.offset, type);
10839 err |= UNKNOWN_TYPE;
10843 ptr += btrfs_extent_inline_ref_size(type);
10851 * Check if a dev extent item is referred correctly by its chunk
10853 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10854 struct extent_buffer *eb, int slot)
10856 struct btrfs_root *chunk_root = fs_info->chunk_root;
10857 struct btrfs_dev_extent *ptr;
10858 struct btrfs_path path;
10859 struct btrfs_key chunk_key;
10860 struct btrfs_key devext_key;
10861 struct btrfs_chunk *chunk;
10862 struct extent_buffer *l;
10866 int found_chunk = 0;
10869 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10870 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10871 length = btrfs_dev_extent_length(eb, ptr);
10873 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10874 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10875 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10877 btrfs_init_path(&path);
10878 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10883 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10884 if (btrfs_chunk_length(l, chunk) != length)
10887 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10888 for (i = 0; i < num_stripes; i++) {
10889 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10890 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10892 if (devid == devext_key.objectid &&
10893 offset == devext_key.offset) {
10899 btrfs_release_path(&path);
10900 if (!found_chunk) {
10902 "device extent[%llu, %llu, %llu] did not find the related chunk",
10903 devext_key.objectid, devext_key.offset, length);
10904 return REFERENCER_MISSING;
10910 * Check if the used space is correct with the dev item
10912 static int check_dev_item(struct btrfs_fs_info *fs_info,
10913 struct extent_buffer *eb, int slot)
10915 struct btrfs_root *dev_root = fs_info->dev_root;
10916 struct btrfs_dev_item *dev_item;
10917 struct btrfs_path path;
10918 struct btrfs_key key;
10919 struct btrfs_dev_extent *ptr;
10925 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10926 dev_id = btrfs_device_id(eb, dev_item);
10927 used = btrfs_device_bytes_used(eb, dev_item);
10929 key.objectid = dev_id;
10930 key.type = BTRFS_DEV_EXTENT_KEY;
10933 btrfs_init_path(&path);
10934 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10936 btrfs_item_key_to_cpu(eb, &key, slot);
10937 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10938 key.objectid, key.type, key.offset);
10939 btrfs_release_path(&path);
10940 return REFERENCER_MISSING;
10943 /* Iterate dev_extents to calculate the used space of a device */
10945 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10948 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10949 if (key.objectid > dev_id)
10951 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10954 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10955 struct btrfs_dev_extent);
10956 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10958 ret = btrfs_next_item(dev_root, &path);
10962 btrfs_release_path(&path);
10964 if (used != total) {
10965 btrfs_item_key_to_cpu(eb, &key, slot);
10967 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10968 total, used, BTRFS_ROOT_TREE_OBJECTID,
10969 BTRFS_DEV_EXTENT_KEY, dev_id);
10970 return ACCOUNTING_MISMATCH;
10976 * Check a block group item with its referener (chunk) and its used space
10977 * with extent/metadata item
10979 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10980 struct extent_buffer *eb, int slot)
10982 struct btrfs_root *extent_root = fs_info->extent_root;
10983 struct btrfs_root *chunk_root = fs_info->chunk_root;
10984 struct btrfs_block_group_item *bi;
10985 struct btrfs_block_group_item bg_item;
10986 struct btrfs_path path;
10987 struct btrfs_key bg_key;
10988 struct btrfs_key chunk_key;
10989 struct btrfs_key extent_key;
10990 struct btrfs_chunk *chunk;
10991 struct extent_buffer *leaf;
10992 struct btrfs_extent_item *ei;
10993 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11001 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11002 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11003 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11004 used = btrfs_block_group_used(&bg_item);
11005 bg_flags = btrfs_block_group_flags(&bg_item);
11007 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11008 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11009 chunk_key.offset = bg_key.objectid;
11011 btrfs_init_path(&path);
11012 /* Search for the referencer chunk */
11013 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11016 "block group[%llu %llu] did not find the related chunk item",
11017 bg_key.objectid, bg_key.offset);
11018 err |= REFERENCER_MISSING;
11020 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11021 struct btrfs_chunk);
11022 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11025 "block group[%llu %llu] related chunk item length does not match",
11026 bg_key.objectid, bg_key.offset);
11027 err |= REFERENCER_MISMATCH;
11030 btrfs_release_path(&path);
11032 /* Search from the block group bytenr */
11033 extent_key.objectid = bg_key.objectid;
11034 extent_key.type = 0;
11035 extent_key.offset = 0;
11037 btrfs_init_path(&path);
11038 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11042 /* Iterate extent tree to account used space */
11044 leaf = path.nodes[0];
11046 /* Search slot can point to the last item beyond leaf nritems */
11047 if (path.slots[0] >= btrfs_header_nritems(leaf))
11050 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11051 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11054 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11055 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11057 if (extent_key.objectid < bg_key.objectid)
11060 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11063 total += extent_key.offset;
11065 ei = btrfs_item_ptr(leaf, path.slots[0],
11066 struct btrfs_extent_item);
11067 flags = btrfs_extent_flags(leaf, ei);
11068 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11069 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11071 "bad extent[%llu, %llu) type mismatch with chunk",
11072 extent_key.objectid,
11073 extent_key.objectid + extent_key.offset);
11074 err |= CHUNK_TYPE_MISMATCH;
11076 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11077 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11078 BTRFS_BLOCK_GROUP_METADATA))) {
11080 "bad extent[%llu, %llu) type mismatch with chunk",
11081 extent_key.objectid,
11082 extent_key.objectid + nodesize);
11083 err |= CHUNK_TYPE_MISMATCH;
11087 ret = btrfs_next_item(extent_root, &path);
11093 btrfs_release_path(&path);
11095 if (total != used) {
11097 "block group[%llu %llu] used %llu but extent items used %llu",
11098 bg_key.objectid, bg_key.offset, used, total);
11099 err |= ACCOUNTING_MISMATCH;
11105 * Check a chunk item.
11106 * Including checking all referred dev_extents and block group
11108 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11109 struct extent_buffer *eb, int slot)
11111 struct btrfs_root *extent_root = fs_info->extent_root;
11112 struct btrfs_root *dev_root = fs_info->dev_root;
11113 struct btrfs_path path;
11114 struct btrfs_key chunk_key;
11115 struct btrfs_key bg_key;
11116 struct btrfs_key devext_key;
11117 struct btrfs_chunk *chunk;
11118 struct extent_buffer *leaf;
11119 struct btrfs_block_group_item *bi;
11120 struct btrfs_block_group_item bg_item;
11121 struct btrfs_dev_extent *ptr;
11122 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11134 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11135 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11136 length = btrfs_chunk_length(eb, chunk);
11137 chunk_end = chunk_key.offset + length;
11138 if (!IS_ALIGNED(length, sectorsize)) {
11139 error("chunk[%llu %llu) not aligned to %u",
11140 chunk_key.offset, chunk_end, sectorsize);
11141 err |= BYTES_UNALIGNED;
11145 type = btrfs_chunk_type(eb, chunk);
11146 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11147 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11148 error("chunk[%llu %llu) has no chunk type",
11149 chunk_key.offset, chunk_end);
11150 err |= UNKNOWN_TYPE;
11152 if (profile && (profile & (profile - 1))) {
11153 error("chunk[%llu %llu) multiple profiles detected: %llx",
11154 chunk_key.offset, chunk_end, profile);
11155 err |= UNKNOWN_TYPE;
11158 bg_key.objectid = chunk_key.offset;
11159 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11160 bg_key.offset = length;
11162 btrfs_init_path(&path);
11163 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11166 "chunk[%llu %llu) did not find the related block group item",
11167 chunk_key.offset, chunk_end);
11168 err |= REFERENCER_MISSING;
11170 leaf = path.nodes[0];
11171 bi = btrfs_item_ptr(leaf, path.slots[0],
11172 struct btrfs_block_group_item);
11173 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11175 if (btrfs_block_group_flags(&bg_item) != type) {
11177 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11178 chunk_key.offset, chunk_end, type,
11179 btrfs_block_group_flags(&bg_item));
11180 err |= REFERENCER_MISSING;
11184 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11185 for (i = 0; i < num_stripes; i++) {
11186 btrfs_release_path(&path);
11187 btrfs_init_path(&path);
11188 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11189 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11190 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11192 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11195 goto not_match_dev;
11197 leaf = path.nodes[0];
11198 ptr = btrfs_item_ptr(leaf, path.slots[0],
11199 struct btrfs_dev_extent);
11200 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11201 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11202 if (objectid != chunk_key.objectid ||
11203 offset != chunk_key.offset ||
11204 btrfs_dev_extent_length(leaf, ptr) != length)
11205 goto not_match_dev;
11208 err |= BACKREF_MISSING;
11210 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11211 chunk_key.objectid, chunk_end, i);
11214 btrfs_release_path(&path);
11220 * Main entry function to check known items and update related accounting info
11222 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11224 struct btrfs_fs_info *fs_info = root->fs_info;
11225 struct btrfs_key key;
11228 struct btrfs_extent_data_ref *dref;
11233 btrfs_item_key_to_cpu(eb, &key, slot);
11237 case BTRFS_EXTENT_DATA_KEY:
11238 ret = check_extent_data_item(root, eb, slot);
11241 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11242 ret = check_block_group_item(fs_info, eb, slot);
11245 case BTRFS_DEV_ITEM_KEY:
11246 ret = check_dev_item(fs_info, eb, slot);
11249 case BTRFS_CHUNK_ITEM_KEY:
11250 ret = check_chunk_item(fs_info, eb, slot);
11253 case BTRFS_DEV_EXTENT_KEY:
11254 ret = check_dev_extent_item(fs_info, eb, slot);
11257 case BTRFS_EXTENT_ITEM_KEY:
11258 case BTRFS_METADATA_ITEM_KEY:
11259 ret = check_extent_item(fs_info, eb, slot);
11262 case BTRFS_EXTENT_CSUM_KEY:
11263 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11265 case BTRFS_TREE_BLOCK_REF_KEY:
11266 ret = check_tree_block_backref(fs_info, key.offset,
11270 case BTRFS_EXTENT_DATA_REF_KEY:
11271 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11272 ret = check_extent_data_backref(fs_info,
11273 btrfs_extent_data_ref_root(eb, dref),
11274 btrfs_extent_data_ref_objectid(eb, dref),
11275 btrfs_extent_data_ref_offset(eb, dref),
11277 btrfs_extent_data_ref_count(eb, dref));
11280 case BTRFS_SHARED_BLOCK_REF_KEY:
11281 ret = check_shared_block_backref(fs_info, key.offset,
11285 case BTRFS_SHARED_DATA_REF_KEY:
11286 ret = check_shared_data_backref(fs_info, key.offset,
11294 if (++slot < btrfs_header_nritems(eb))
11301 * Helper function for later fs/subvol tree check. To determine if a tree
11302 * block should be checked.
11303 * This function will ensure only the direct referencer with lowest rootid to
11304 * check a fs/subvolume tree block.
11306 * Backref check at extent tree would detect errors like missing subvolume
11307 * tree, so we can do aggressive check to reduce duplicated checks.
11309 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11311 struct btrfs_root *extent_root = root->fs_info->extent_root;
11312 struct btrfs_key key;
11313 struct btrfs_path path;
11314 struct extent_buffer *leaf;
11316 struct btrfs_extent_item *ei;
11322 struct btrfs_extent_inline_ref *iref;
11325 btrfs_init_path(&path);
11326 key.objectid = btrfs_header_bytenr(eb);
11327 key.type = BTRFS_METADATA_ITEM_KEY;
11328 key.offset = (u64)-1;
11331 * Any failure in backref resolving means we can't determine
11332 * whom the tree block belongs to.
11333 * So in that case, we need to check that tree block
11335 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11339 ret = btrfs_previous_extent_item(extent_root, &path,
11340 btrfs_header_bytenr(eb));
11344 leaf = path.nodes[0];
11345 slot = path.slots[0];
11346 btrfs_item_key_to_cpu(leaf, &key, slot);
11347 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11349 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11350 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11352 struct btrfs_tree_block_info *info;
11354 info = (struct btrfs_tree_block_info *)(ei + 1);
11355 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11358 item_size = btrfs_item_size_nr(leaf, slot);
11359 ptr = (unsigned long)iref;
11360 end = (unsigned long)ei + item_size;
11361 while (ptr < end) {
11362 iref = (struct btrfs_extent_inline_ref *)ptr;
11363 type = btrfs_extent_inline_ref_type(leaf, iref);
11364 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11367 * We only check the tree block if current root is
11368 * the lowest referencer of it.
11370 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11371 offset < root->objectid) {
11372 btrfs_release_path(&path);
11376 ptr += btrfs_extent_inline_ref_size(type);
11379 * Normally we should also check keyed tree block ref, but that may be
11380 * very time consuming. Inlined ref should already make us skip a lot
11381 * of refs now. So skip search keyed tree block ref.
11385 btrfs_release_path(&path);
11390 * Traversal function for tree block. We will do:
11391 * 1) Skip shared fs/subvolume tree blocks
11392 * 2) Update related bytes accounting
11393 * 3) Pre-order traversal
11395 static int traverse_tree_block(struct btrfs_root *root,
11396 struct extent_buffer *node)
11398 struct extent_buffer *eb;
11399 struct btrfs_key key;
11400 struct btrfs_key drop_key;
11408 * Skip shared fs/subvolume tree block, in that case they will
11409 * be checked by referencer with lowest rootid
11411 if (is_fstree(root->objectid) && !should_check(root, node))
11414 /* Update bytes accounting */
11415 total_btree_bytes += node->len;
11416 if (fs_root_objectid(btrfs_header_owner(node)))
11417 total_fs_tree_bytes += node->len;
11418 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11419 total_extent_tree_bytes += node->len;
11420 if (!found_old_backref &&
11421 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11422 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11423 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11424 found_old_backref = 1;
11426 /* pre-order tranversal, check itself first */
11427 level = btrfs_header_level(node);
11428 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11429 btrfs_header_level(node),
11430 btrfs_header_owner(node));
11434 "check %s failed root %llu bytenr %llu level %d, force continue check",
11435 level ? "node":"leaf", root->objectid,
11436 btrfs_header_bytenr(node), btrfs_header_level(node));
11439 btree_space_waste += btrfs_leaf_free_space(root, node);
11440 ret = check_leaf_items(root, node);
11445 nr = btrfs_header_nritems(node);
11446 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11447 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11448 sizeof(struct btrfs_key_ptr);
11450 /* Then check all its children */
11451 for (i = 0; i < nr; i++) {
11452 u64 blocknr = btrfs_node_blockptr(node, i);
11454 btrfs_node_key_to_cpu(node, &key, i);
11455 if (level == root->root_item.drop_level &&
11456 is_dropped_key(&key, &drop_key))
11460 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11461 * to call the function itself.
11463 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11464 if (extent_buffer_uptodate(eb)) {
11465 ret = traverse_tree_block(root, eb);
11468 free_extent_buffer(eb);
11475 * Low memory usage version check_chunks_and_extents.
11477 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11479 struct btrfs_path path;
11480 struct btrfs_key key;
11481 struct btrfs_root *root1;
11482 struct btrfs_root *cur_root;
11486 root1 = root->fs_info->chunk_root;
11487 ret = traverse_tree_block(root1, root1->node);
11490 root1 = root->fs_info->tree_root;
11491 ret = traverse_tree_block(root1, root1->node);
11494 btrfs_init_path(&path);
11495 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11497 key.type = BTRFS_ROOT_ITEM_KEY;
11499 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11501 error("cannot find extent treet in tree_root");
11506 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11507 if (key.type != BTRFS_ROOT_ITEM_KEY)
11509 key.offset = (u64)-1;
11511 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11512 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11515 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11516 if (IS_ERR(cur_root) || !cur_root) {
11517 error("failed to read tree: %lld", key.objectid);
11521 ret = traverse_tree_block(cur_root, cur_root->node);
11524 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11525 btrfs_free_fs_root(cur_root);
11527 ret = btrfs_next_item(root1, &path);
11533 btrfs_release_path(&path);
11537 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11538 struct btrfs_root *root, int overwrite)
11540 struct extent_buffer *c;
11541 struct extent_buffer *old = root->node;
11544 struct btrfs_disk_key disk_key = {0,0,0};
11550 extent_buffer_get(c);
11553 c = btrfs_alloc_free_block(trans, root,
11555 root->root_key.objectid,
11556 &disk_key, level, 0, 0);
11559 extent_buffer_get(c);
11563 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11564 btrfs_set_header_level(c, level);
11565 btrfs_set_header_bytenr(c, c->start);
11566 btrfs_set_header_generation(c, trans->transid);
11567 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11568 btrfs_set_header_owner(c, root->root_key.objectid);
11570 write_extent_buffer(c, root->fs_info->fsid,
11571 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11573 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11574 btrfs_header_chunk_tree_uuid(c),
11577 btrfs_mark_buffer_dirty(c);
11579 * this case can happen in the following case:
11581 * 1.overwrite previous root.
11583 * 2.reinit reloc data root, this is because we skip pin
11584 * down reloc data tree before which means we can allocate
11585 * same block bytenr here.
11587 if (old->start == c->start) {
11588 btrfs_set_root_generation(&root->root_item,
11590 root->root_item.level = btrfs_header_level(root->node);
11591 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11592 &root->root_key, &root->root_item);
11594 free_extent_buffer(c);
11598 free_extent_buffer(old);
11600 add_root_to_dirty_list(root);
11604 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11605 struct extent_buffer *eb, int tree_root)
11607 struct extent_buffer *tmp;
11608 struct btrfs_root_item *ri;
11609 struct btrfs_key key;
11612 int level = btrfs_header_level(eb);
11618 * If we have pinned this block before, don't pin it again.
11619 * This can not only avoid forever loop with broken filesystem
11620 * but also give us some speedups.
11622 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11623 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11626 btrfs_pin_extent(fs_info, eb->start, eb->len);
11628 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11629 nritems = btrfs_header_nritems(eb);
11630 for (i = 0; i < nritems; i++) {
11632 btrfs_item_key_to_cpu(eb, &key, i);
11633 if (key.type != BTRFS_ROOT_ITEM_KEY)
11635 /* Skip the extent root and reloc roots */
11636 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11637 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11638 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11640 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11641 bytenr = btrfs_disk_root_bytenr(eb, ri);
11644 * If at any point we start needing the real root we
11645 * will have to build a stump root for the root we are
11646 * in, but for now this doesn't actually use the root so
11647 * just pass in extent_root.
11649 tmp = read_tree_block(fs_info->extent_root, bytenr,
11651 if (!extent_buffer_uptodate(tmp)) {
11652 fprintf(stderr, "Error reading root block\n");
11655 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11656 free_extent_buffer(tmp);
11660 bytenr = btrfs_node_blockptr(eb, i);
11662 /* If we aren't the tree root don't read the block */
11663 if (level == 1 && !tree_root) {
11664 btrfs_pin_extent(fs_info, bytenr, nodesize);
11668 tmp = read_tree_block(fs_info->extent_root, bytenr,
11670 if (!extent_buffer_uptodate(tmp)) {
11671 fprintf(stderr, "Error reading tree block\n");
11674 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11675 free_extent_buffer(tmp);
11684 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11688 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11692 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11695 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11697 struct btrfs_block_group_cache *cache;
11698 struct btrfs_path path;
11699 struct extent_buffer *leaf;
11700 struct btrfs_chunk *chunk;
11701 struct btrfs_key key;
11705 btrfs_init_path(&path);
11707 key.type = BTRFS_CHUNK_ITEM_KEY;
11709 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11711 btrfs_release_path(&path);
11716 * We do this in case the block groups were screwed up and had alloc
11717 * bits that aren't actually set on the chunks. This happens with
11718 * restored images every time and could happen in real life I guess.
11720 fs_info->avail_data_alloc_bits = 0;
11721 fs_info->avail_metadata_alloc_bits = 0;
11722 fs_info->avail_system_alloc_bits = 0;
11724 /* First we need to create the in-memory block groups */
11726 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11727 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11729 btrfs_release_path(&path);
11737 leaf = path.nodes[0];
11738 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11739 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11744 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11745 btrfs_add_block_group(fs_info, 0,
11746 btrfs_chunk_type(leaf, chunk),
11747 key.objectid, key.offset,
11748 btrfs_chunk_length(leaf, chunk));
11749 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11750 key.offset + btrfs_chunk_length(leaf, chunk));
11755 cache = btrfs_lookup_first_block_group(fs_info, start);
11759 start = cache->key.objectid + cache->key.offset;
11762 btrfs_release_path(&path);
11766 static int reset_balance(struct btrfs_trans_handle *trans,
11767 struct btrfs_fs_info *fs_info)
11769 struct btrfs_root *root = fs_info->tree_root;
11770 struct btrfs_path path;
11771 struct extent_buffer *leaf;
11772 struct btrfs_key key;
11773 int del_slot, del_nr = 0;
11777 btrfs_init_path(&path);
11778 key.objectid = BTRFS_BALANCE_OBJECTID;
11779 key.type = BTRFS_BALANCE_ITEM_KEY;
11781 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11786 goto reinit_data_reloc;
11791 ret = btrfs_del_item(trans, root, &path);
11794 btrfs_release_path(&path);
11796 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11797 key.type = BTRFS_ROOT_ITEM_KEY;
11799 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11803 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11808 ret = btrfs_del_items(trans, root, &path,
11815 btrfs_release_path(&path);
11818 ret = btrfs_search_slot(trans, root, &key, &path,
11825 leaf = path.nodes[0];
11826 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11827 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11829 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11834 del_slot = path.slots[0];
11843 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11847 btrfs_release_path(&path);
11850 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11851 key.type = BTRFS_ROOT_ITEM_KEY;
11852 key.offset = (u64)-1;
11853 root = btrfs_read_fs_root(fs_info, &key);
11854 if (IS_ERR(root)) {
11855 fprintf(stderr, "Error reading data reloc tree\n");
11856 ret = PTR_ERR(root);
11859 record_root_in_trans(trans, root);
11860 ret = btrfs_fsck_reinit_root(trans, root, 0);
11863 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11865 btrfs_release_path(&path);
11869 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11870 struct btrfs_fs_info *fs_info)
11876 * The only reason we don't do this is because right now we're just
11877 * walking the trees we find and pinning down their bytes, we don't look
11878 * at any of the leaves. In order to do mixed groups we'd have to check
11879 * the leaves of any fs roots and pin down the bytes for any file
11880 * extents we find. Not hard but why do it if we don't have to?
11882 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11883 fprintf(stderr, "We don't support re-initing the extent tree "
11884 "for mixed block groups yet, please notify a btrfs "
11885 "developer you want to do this so they can add this "
11886 "functionality.\n");
11891 * first we need to walk all of the trees except the extent tree and pin
11892 * down the bytes that are in use so we don't overwrite any existing
11895 ret = pin_metadata_blocks(fs_info);
11897 fprintf(stderr, "error pinning down used bytes\n");
11902 * Need to drop all the block groups since we're going to recreate all
11905 btrfs_free_block_groups(fs_info);
11906 ret = reset_block_groups(fs_info);
11908 fprintf(stderr, "error resetting the block groups\n");
11912 /* Ok we can allocate now, reinit the extent root */
11913 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11915 fprintf(stderr, "extent root initialization failed\n");
11917 * When the transaction code is updated we should end the
11918 * transaction, but for now progs only knows about commit so
11919 * just return an error.
11925 * Now we have all the in-memory block groups setup so we can make
11926 * allocations properly, and the metadata we care about is safe since we
11927 * pinned all of it above.
11930 struct btrfs_block_group_cache *cache;
11932 cache = btrfs_lookup_first_block_group(fs_info, start);
11935 start = cache->key.objectid + cache->key.offset;
11936 ret = btrfs_insert_item(trans, fs_info->extent_root,
11937 &cache->key, &cache->item,
11938 sizeof(cache->item));
11940 fprintf(stderr, "Error adding block group\n");
11943 btrfs_extent_post_op(trans, fs_info->extent_root);
11946 ret = reset_balance(trans, fs_info);
11948 fprintf(stderr, "error resetting the pending balance\n");
11953 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11955 struct btrfs_path path;
11956 struct btrfs_trans_handle *trans;
11957 struct btrfs_key key;
11960 printf("Recowing metadata block %llu\n", eb->start);
11961 key.objectid = btrfs_header_owner(eb);
11962 key.type = BTRFS_ROOT_ITEM_KEY;
11963 key.offset = (u64)-1;
11965 root = btrfs_read_fs_root(root->fs_info, &key);
11966 if (IS_ERR(root)) {
11967 fprintf(stderr, "Couldn't find owner root %llu\n",
11969 return PTR_ERR(root);
11972 trans = btrfs_start_transaction(root, 1);
11974 return PTR_ERR(trans);
11976 btrfs_init_path(&path);
11977 path.lowest_level = btrfs_header_level(eb);
11978 if (path.lowest_level)
11979 btrfs_node_key_to_cpu(eb, &key, 0);
11981 btrfs_item_key_to_cpu(eb, &key, 0);
11983 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11984 btrfs_commit_transaction(trans, root);
11985 btrfs_release_path(&path);
11989 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11991 struct btrfs_path path;
11992 struct btrfs_trans_handle *trans;
11993 struct btrfs_key key;
11996 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11997 bad->key.type, bad->key.offset);
11998 key.objectid = bad->root_id;
11999 key.type = BTRFS_ROOT_ITEM_KEY;
12000 key.offset = (u64)-1;
12002 root = btrfs_read_fs_root(root->fs_info, &key);
12003 if (IS_ERR(root)) {
12004 fprintf(stderr, "Couldn't find owner root %llu\n",
12006 return PTR_ERR(root);
12009 trans = btrfs_start_transaction(root, 1);
12011 return PTR_ERR(trans);
12013 btrfs_init_path(&path);
12014 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12020 ret = btrfs_del_item(trans, root, &path);
12022 btrfs_commit_transaction(trans, root);
12023 btrfs_release_path(&path);
12027 static int zero_log_tree(struct btrfs_root *root)
12029 struct btrfs_trans_handle *trans;
12032 trans = btrfs_start_transaction(root, 1);
12033 if (IS_ERR(trans)) {
12034 ret = PTR_ERR(trans);
12037 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12038 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12039 ret = btrfs_commit_transaction(trans, root);
12043 static int populate_csum(struct btrfs_trans_handle *trans,
12044 struct btrfs_root *csum_root, char *buf, u64 start,
12051 while (offset < len) {
12052 sectorsize = csum_root->sectorsize;
12053 ret = read_extent_data(csum_root, buf, start + offset,
12057 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12058 start + offset, buf, sectorsize);
12061 offset += sectorsize;
12066 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12067 struct btrfs_root *csum_root,
12068 struct btrfs_root *cur_root)
12070 struct btrfs_path path;
12071 struct btrfs_key key;
12072 struct extent_buffer *node;
12073 struct btrfs_file_extent_item *fi;
12080 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12084 btrfs_init_path(&path);
12088 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12091 /* Iterate all regular file extents and fill its csum */
12093 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12095 if (key.type != BTRFS_EXTENT_DATA_KEY)
12097 node = path.nodes[0];
12098 slot = path.slots[0];
12099 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12100 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12102 start = btrfs_file_extent_disk_bytenr(node, fi);
12103 len = btrfs_file_extent_disk_num_bytes(node, fi);
12105 ret = populate_csum(trans, csum_root, buf, start, len);
12106 if (ret == -EEXIST)
12112 * TODO: if next leaf is corrupted, jump to nearest next valid
12115 ret = btrfs_next_item(cur_root, &path);
12125 btrfs_release_path(&path);
12130 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12131 struct btrfs_root *csum_root)
12133 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12134 struct btrfs_path path;
12135 struct btrfs_root *tree_root = fs_info->tree_root;
12136 struct btrfs_root *cur_root;
12137 struct extent_buffer *node;
12138 struct btrfs_key key;
12142 btrfs_init_path(&path);
12143 key.objectid = BTRFS_FS_TREE_OBJECTID;
12145 key.type = BTRFS_ROOT_ITEM_KEY;
12146 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12155 node = path.nodes[0];
12156 slot = path.slots[0];
12157 btrfs_item_key_to_cpu(node, &key, slot);
12158 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12160 if (key.type != BTRFS_ROOT_ITEM_KEY)
12162 if (!is_fstree(key.objectid))
12164 key.offset = (u64)-1;
12166 cur_root = btrfs_read_fs_root(fs_info, &key);
12167 if (IS_ERR(cur_root) || !cur_root) {
12168 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12172 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12177 ret = btrfs_next_item(tree_root, &path);
12187 btrfs_release_path(&path);
12191 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12192 struct btrfs_root *csum_root)
12194 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12195 struct btrfs_path path;
12196 struct btrfs_extent_item *ei;
12197 struct extent_buffer *leaf;
12199 struct btrfs_key key;
12202 btrfs_init_path(&path);
12204 key.type = BTRFS_EXTENT_ITEM_KEY;
12206 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12208 btrfs_release_path(&path);
12212 buf = malloc(csum_root->sectorsize);
12214 btrfs_release_path(&path);
12219 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12220 ret = btrfs_next_leaf(extent_root, &path);
12228 leaf = path.nodes[0];
12230 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12231 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12236 ei = btrfs_item_ptr(leaf, path.slots[0],
12237 struct btrfs_extent_item);
12238 if (!(btrfs_extent_flags(leaf, ei) &
12239 BTRFS_EXTENT_FLAG_DATA)) {
12244 ret = populate_csum(trans, csum_root, buf, key.objectid,
12251 btrfs_release_path(&path);
12257 * Recalculate the csum and put it into the csum tree.
12259 * Extent tree init will wipe out all the extent info, so in that case, we
12260 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12261 * will use fs/subvol trees to init the csum tree.
12263 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12264 struct btrfs_root *csum_root,
12265 int search_fs_tree)
12267 if (search_fs_tree)
12268 return fill_csum_tree_from_fs(trans, csum_root);
12270 return fill_csum_tree_from_extent(trans, csum_root);
12273 static void free_roots_info_cache(void)
12275 if (!roots_info_cache)
12278 while (!cache_tree_empty(roots_info_cache)) {
12279 struct cache_extent *entry;
12280 struct root_item_info *rii;
12282 entry = first_cache_extent(roots_info_cache);
12285 remove_cache_extent(roots_info_cache, entry);
12286 rii = container_of(entry, struct root_item_info, cache_extent);
12290 free(roots_info_cache);
12291 roots_info_cache = NULL;
12294 static int build_roots_info_cache(struct btrfs_fs_info *info)
12297 struct btrfs_key key;
12298 struct extent_buffer *leaf;
12299 struct btrfs_path path;
12301 if (!roots_info_cache) {
12302 roots_info_cache = malloc(sizeof(*roots_info_cache));
12303 if (!roots_info_cache)
12305 cache_tree_init(roots_info_cache);
12308 btrfs_init_path(&path);
12310 key.type = BTRFS_EXTENT_ITEM_KEY;
12312 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12315 leaf = path.nodes[0];
12318 struct btrfs_key found_key;
12319 struct btrfs_extent_item *ei;
12320 struct btrfs_extent_inline_ref *iref;
12321 int slot = path.slots[0];
12326 struct cache_extent *entry;
12327 struct root_item_info *rii;
12329 if (slot >= btrfs_header_nritems(leaf)) {
12330 ret = btrfs_next_leaf(info->extent_root, &path);
12337 leaf = path.nodes[0];
12338 slot = path.slots[0];
12341 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12343 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12344 found_key.type != BTRFS_METADATA_ITEM_KEY)
12347 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12348 flags = btrfs_extent_flags(leaf, ei);
12350 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12351 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12354 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12355 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12356 level = found_key.offset;
12358 struct btrfs_tree_block_info *binfo;
12360 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12361 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12362 level = btrfs_tree_block_level(leaf, binfo);
12366 * For a root extent, it must be of the following type and the
12367 * first (and only one) iref in the item.
12369 type = btrfs_extent_inline_ref_type(leaf, iref);
12370 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12373 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12374 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12376 rii = malloc(sizeof(struct root_item_info));
12381 rii->cache_extent.start = root_id;
12382 rii->cache_extent.size = 1;
12383 rii->level = (u8)-1;
12384 entry = &rii->cache_extent;
12385 ret = insert_cache_extent(roots_info_cache, entry);
12388 rii = container_of(entry, struct root_item_info,
12392 ASSERT(rii->cache_extent.start == root_id);
12393 ASSERT(rii->cache_extent.size == 1);
12395 if (level > rii->level || rii->level == (u8)-1) {
12396 rii->level = level;
12397 rii->bytenr = found_key.objectid;
12398 rii->gen = btrfs_extent_generation(leaf, ei);
12399 rii->node_count = 1;
12400 } else if (level == rii->level) {
12408 btrfs_release_path(&path);
12413 static int maybe_repair_root_item(struct btrfs_path *path,
12414 const struct btrfs_key *root_key,
12415 const int read_only_mode)
12417 const u64 root_id = root_key->objectid;
12418 struct cache_extent *entry;
12419 struct root_item_info *rii;
12420 struct btrfs_root_item ri;
12421 unsigned long offset;
12423 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12426 "Error: could not find extent items for root %llu\n",
12427 root_key->objectid);
12431 rii = container_of(entry, struct root_item_info, cache_extent);
12432 ASSERT(rii->cache_extent.start == root_id);
12433 ASSERT(rii->cache_extent.size == 1);
12435 if (rii->node_count != 1) {
12437 "Error: could not find btree root extent for root %llu\n",
12442 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12443 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12445 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12446 btrfs_root_level(&ri) != rii->level ||
12447 btrfs_root_generation(&ri) != rii->gen) {
12450 * If we're in repair mode but our caller told us to not update
12451 * the root item, i.e. just check if it needs to be updated, don't
12452 * print this message, since the caller will call us again shortly
12453 * for the same root item without read only mode (the caller will
12454 * open a transaction first).
12456 if (!(read_only_mode && repair))
12458 "%sroot item for root %llu,"
12459 " current bytenr %llu, current gen %llu, current level %u,"
12460 " new bytenr %llu, new gen %llu, new level %u\n",
12461 (read_only_mode ? "" : "fixing "),
12463 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12464 btrfs_root_level(&ri),
12465 rii->bytenr, rii->gen, rii->level);
12467 if (btrfs_root_generation(&ri) > rii->gen) {
12469 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12470 root_id, btrfs_root_generation(&ri), rii->gen);
12474 if (!read_only_mode) {
12475 btrfs_set_root_bytenr(&ri, rii->bytenr);
12476 btrfs_set_root_level(&ri, rii->level);
12477 btrfs_set_root_generation(&ri, rii->gen);
12478 write_extent_buffer(path->nodes[0], &ri,
12479 offset, sizeof(ri));
12489 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12490 * caused read-only snapshots to be corrupted if they were created at a moment
12491 * when the source subvolume/snapshot had orphan items. The issue was that the
12492 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12493 * node instead of the post orphan cleanup root node.
12494 * So this function, and its callees, just detects and fixes those cases. Even
12495 * though the regression was for read-only snapshots, this function applies to
12496 * any snapshot/subvolume root.
12497 * This must be run before any other repair code - not doing it so, makes other
12498 * repair code delete or modify backrefs in the extent tree for example, which
12499 * will result in an inconsistent fs after repairing the root items.
12501 static int repair_root_items(struct btrfs_fs_info *info)
12503 struct btrfs_path path;
12504 struct btrfs_key key;
12505 struct extent_buffer *leaf;
12506 struct btrfs_trans_handle *trans = NULL;
12509 int need_trans = 0;
12511 btrfs_init_path(&path);
12513 ret = build_roots_info_cache(info);
12517 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12518 key.type = BTRFS_ROOT_ITEM_KEY;
12523 * Avoid opening and committing transactions if a leaf doesn't have
12524 * any root items that need to be fixed, so that we avoid rotating
12525 * backup roots unnecessarily.
12528 trans = btrfs_start_transaction(info->tree_root, 1);
12529 if (IS_ERR(trans)) {
12530 ret = PTR_ERR(trans);
12535 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12539 leaf = path.nodes[0];
12542 struct btrfs_key found_key;
12544 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12545 int no_more_keys = find_next_key(&path, &key);
12547 btrfs_release_path(&path);
12549 ret = btrfs_commit_transaction(trans,
12561 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12563 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12565 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12568 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12572 if (!trans && repair) {
12575 btrfs_release_path(&path);
12585 free_roots_info_cache();
12586 btrfs_release_path(&path);
12588 btrfs_commit_transaction(trans, info->tree_root);
12595 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12597 struct btrfs_trans_handle *trans;
12598 struct btrfs_block_group_cache *bg_cache;
12602 /* Clear all free space cache inodes and its extent data */
12604 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12607 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12610 current = bg_cache->key.objectid + bg_cache->key.offset;
12613 /* Don't forget to set cache_generation to -1 */
12614 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12615 if (IS_ERR(trans)) {
12616 error("failed to update super block cache generation");
12617 return PTR_ERR(trans);
12619 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12620 btrfs_commit_transaction(trans, fs_info->tree_root);
12625 const char * const cmd_check_usage[] = {
12626 "btrfs check [options] <device>",
12627 "Check structural integrity of a filesystem (unmounted).",
12628 "Check structural integrity of an unmounted filesystem. Verify internal",
12629 "trees' consistency and item connectivity. In the repair mode try to",
12630 "fix the problems found. ",
12631 "WARNING: the repair mode is considered dangerous",
12633 "-s|--super <superblock> use this superblock copy",
12634 "-b|--backup use the first valid backup root copy",
12635 "--repair try to repair the filesystem",
12636 "--readonly run in read-only mode (default)",
12637 "--init-csum-tree create a new CRC tree",
12638 "--init-extent-tree create a new extent tree",
12639 "--mode <MODE> allows choice of memory/IO trade-offs",
12640 " where MODE is one of:",
12641 " original - read inodes and extents to memory (requires",
12642 " more memory, does less IO)",
12643 " lowmem - try to use less memory but read blocks again",
12645 "--check-data-csum verify checksums of data blocks",
12646 "-Q|--qgroup-report print a report on qgroup consistency",
12647 "-E|--subvol-extents <subvolid>",
12648 " print subvolume extents and sharing state",
12649 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12650 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12651 "-p|--progress indicate progress",
12652 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12656 int cmd_check(int argc, char **argv)
12658 struct cache_tree root_cache;
12659 struct btrfs_root *root;
12660 struct btrfs_fs_info *info;
12663 u64 tree_root_bytenr = 0;
12664 u64 chunk_root_bytenr = 0;
12665 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12669 int init_csum_tree = 0;
12671 int clear_space_cache = 0;
12672 int qgroup_report = 0;
12673 int qgroups_repaired = 0;
12674 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12678 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12679 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12680 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12681 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12682 static const struct option long_options[] = {
12683 { "super", required_argument, NULL, 's' },
12684 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12685 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12686 { "init-csum-tree", no_argument, NULL,
12687 GETOPT_VAL_INIT_CSUM },
12688 { "init-extent-tree", no_argument, NULL,
12689 GETOPT_VAL_INIT_EXTENT },
12690 { "check-data-csum", no_argument, NULL,
12691 GETOPT_VAL_CHECK_CSUM },
12692 { "backup", no_argument, NULL, 'b' },
12693 { "subvol-extents", required_argument, NULL, 'E' },
12694 { "qgroup-report", no_argument, NULL, 'Q' },
12695 { "tree-root", required_argument, NULL, 'r' },
12696 { "chunk-root", required_argument, NULL,
12697 GETOPT_VAL_CHUNK_TREE },
12698 { "progress", no_argument, NULL, 'p' },
12699 { "mode", required_argument, NULL,
12701 { "clear-space-cache", required_argument, NULL,
12702 GETOPT_VAL_CLEAR_SPACE_CACHE},
12703 { NULL, 0, NULL, 0}
12706 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12710 case 'a': /* ignored */ break;
12712 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12715 num = arg_strtou64(optarg);
12716 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12718 "super mirror should be less than %d",
12719 BTRFS_SUPER_MIRROR_MAX);
12722 bytenr = btrfs_sb_offset(((int)num));
12723 printf("using SB copy %llu, bytenr %llu\n", num,
12724 (unsigned long long)bytenr);
12730 subvolid = arg_strtou64(optarg);
12733 tree_root_bytenr = arg_strtou64(optarg);
12735 case GETOPT_VAL_CHUNK_TREE:
12736 chunk_root_bytenr = arg_strtou64(optarg);
12739 ctx.progress_enabled = true;
12743 usage(cmd_check_usage);
12744 case GETOPT_VAL_REPAIR:
12745 printf("enabling repair mode\n");
12747 ctree_flags |= OPEN_CTREE_WRITES;
12749 case GETOPT_VAL_READONLY:
12752 case GETOPT_VAL_INIT_CSUM:
12753 printf("Creating a new CRC tree\n");
12754 init_csum_tree = 1;
12756 ctree_flags |= OPEN_CTREE_WRITES;
12758 case GETOPT_VAL_INIT_EXTENT:
12759 init_extent_tree = 1;
12760 ctree_flags |= (OPEN_CTREE_WRITES |
12761 OPEN_CTREE_NO_BLOCK_GROUPS);
12764 case GETOPT_VAL_CHECK_CSUM:
12765 check_data_csum = 1;
12767 case GETOPT_VAL_MODE:
12768 check_mode = parse_check_mode(optarg);
12769 if (check_mode == CHECK_MODE_UNKNOWN) {
12770 error("unknown mode: %s", optarg);
12774 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12775 if (strcmp(optarg, "v1") == 0) {
12776 clear_space_cache = 1;
12777 } else if (strcmp(optarg, "v2") == 0) {
12778 clear_space_cache = 2;
12779 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12782 "invalid argument to --clear-space-cache, must be v1 or v2");
12785 ctree_flags |= OPEN_CTREE_WRITES;
12790 if (check_argc_exact(argc - optind, 1))
12791 usage(cmd_check_usage);
12793 if (ctx.progress_enabled) {
12794 ctx.tp = TASK_NOTHING;
12795 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12798 /* This check is the only reason for --readonly to exist */
12799 if (readonly && repair) {
12800 error("repair options are not compatible with --readonly");
12805 * Not supported yet
12807 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12808 error("low memory mode doesn't support repair yet");
12813 cache_tree_init(&root_cache);
12815 if((ret = check_mounted(argv[optind])) < 0) {
12816 error("could not check mount status: %s", strerror(-ret));
12820 error("%s is currently mounted, aborting", argv[optind]);
12826 /* only allow partial opening under repair mode */
12828 ctree_flags |= OPEN_CTREE_PARTIAL;
12830 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12831 chunk_root_bytenr, ctree_flags);
12833 error("cannot open file system");
12839 global_info = info;
12840 root = info->fs_root;
12841 if (clear_space_cache == 1) {
12842 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12844 "free space cache v2 detected, use --clear-space-cache v2");
12848 printf("Clearing free space cache\n");
12849 ret = clear_free_space_cache(info);
12851 error("failed to clear free space cache");
12854 printf("Free space cache cleared\n");
12857 } else if (clear_space_cache == 2) {
12858 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12859 printf("no free space cache v2 to clear\n");
12863 printf("Clear free space cache v2\n");
12864 ret = btrfs_clear_free_space_tree(info);
12866 error("failed to clear free space cache v2: %d", ret);
12869 printf("free space cache v2 cleared\n");
12875 * repair mode will force us to commit transaction which
12876 * will make us fail to load log tree when mounting.
12878 if (repair && btrfs_super_log_root(info->super_copy)) {
12879 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12885 ret = zero_log_tree(root);
12888 error("failed to zero log tree: %d", ret);
12893 uuid_unparse(info->super_copy->fsid, uuidbuf);
12894 if (qgroup_report) {
12895 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12897 ret = qgroup_verify_all(info);
12904 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12905 subvolid, argv[optind], uuidbuf);
12906 ret = print_extent_state(info, subvolid);
12910 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12912 if (!extent_buffer_uptodate(info->tree_root->node) ||
12913 !extent_buffer_uptodate(info->dev_root->node) ||
12914 !extent_buffer_uptodate(info->chunk_root->node)) {
12915 error("critical roots corrupted, unable to check the filesystem");
12921 if (init_extent_tree || init_csum_tree) {
12922 struct btrfs_trans_handle *trans;
12924 trans = btrfs_start_transaction(info->extent_root, 0);
12925 if (IS_ERR(trans)) {
12926 error("error starting transaction");
12927 ret = PTR_ERR(trans);
12932 if (init_extent_tree) {
12933 printf("Creating a new extent tree\n");
12934 ret = reinit_extent_tree(trans, info);
12940 if (init_csum_tree) {
12941 printf("Reinitialize checksum tree\n");
12942 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12944 error("checksum tree initialization failed: %d",
12951 ret = fill_csum_tree(trans, info->csum_root,
12955 error("checksum tree refilling failed: %d", ret);
12960 * Ok now we commit and run the normal fsck, which will add
12961 * extent entries for all of the items it finds.
12963 ret = btrfs_commit_transaction(trans, info->extent_root);
12968 if (!extent_buffer_uptodate(info->extent_root->node)) {
12969 error("critical: extent_root, unable to check the filesystem");
12974 if (!extent_buffer_uptodate(info->csum_root->node)) {
12975 error("critical: csum_root, unable to check the filesystem");
12981 if (!ctx.progress_enabled)
12982 fprintf(stderr, "checking extents\n");
12983 if (check_mode == CHECK_MODE_LOWMEM)
12984 ret = check_chunks_and_extents_v2(root);
12986 ret = check_chunks_and_extents(root);
12990 "errors found in extent allocation tree or chunk allocation");
12992 ret = repair_root_items(info);
12995 error("failed to repair root items: %s", strerror(-ret));
12999 fprintf(stderr, "Fixed %d roots.\n", ret);
13001 } else if (ret > 0) {
13003 "Found %d roots with an outdated root item.\n",
13006 "Please run a filesystem check with the option --repair to fix them.\n");
13012 if (!ctx.progress_enabled) {
13013 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13014 fprintf(stderr, "checking free space tree\n");
13016 fprintf(stderr, "checking free space cache\n");
13018 ret = check_space_cache(root);
13021 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13022 error("errors found in free space tree");
13024 error("errors found in free space cache");
13029 * We used to have to have these hole extents in between our real
13030 * extents so if we don't have this flag set we need to make sure there
13031 * are no gaps in the file extents for inodes, otherwise we can just
13032 * ignore it when this happens.
13034 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13035 if (!ctx.progress_enabled)
13036 fprintf(stderr, "checking fs roots\n");
13037 if (check_mode == CHECK_MODE_LOWMEM)
13038 ret = check_fs_roots_v2(root->fs_info);
13040 ret = check_fs_roots(root, &root_cache);
13043 error("errors found in fs roots");
13047 fprintf(stderr, "checking csums\n");
13048 ret = check_csums(root);
13051 error("errors found in csum tree");
13055 fprintf(stderr, "checking root refs\n");
13056 /* For low memory mode, check_fs_roots_v2 handles root refs */
13057 if (check_mode != CHECK_MODE_LOWMEM) {
13058 ret = check_root_refs(root, &root_cache);
13061 error("errors found in root refs");
13066 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13067 struct extent_buffer *eb;
13069 eb = list_first_entry(&root->fs_info->recow_ebs,
13070 struct extent_buffer, recow);
13071 list_del_init(&eb->recow);
13072 ret = recow_extent_buffer(root, eb);
13075 error("fails to fix transid errors");
13080 while (!list_empty(&delete_items)) {
13081 struct bad_item *bad;
13083 bad = list_first_entry(&delete_items, struct bad_item, list);
13084 list_del_init(&bad->list);
13086 ret = delete_bad_item(root, bad);
13092 if (info->quota_enabled) {
13093 fprintf(stderr, "checking quota groups\n");
13094 ret = qgroup_verify_all(info);
13097 error("failed to check quota groups");
13101 ret = repair_qgroups(info, &qgroups_repaired);
13104 error("failed to repair quota groups");
13110 if (!list_empty(&root->fs_info->recow_ebs)) {
13111 error("transid errors in file system");
13116 if (found_old_backref) { /*
13117 * there was a disk format change when mixed
13118 * backref was in testing tree. The old format
13119 * existed about one week.
13121 printf("\n * Found old mixed backref format. "
13122 "The old format is not supported! *"
13123 "\n * Please mount the FS in readonly mode, "
13124 "backup data and re-format the FS. *\n\n");
13127 printf("found %llu bytes used, ",
13128 (unsigned long long)bytes_used);
13130 printf("error(s) found\n");
13132 printf("no error found\n");
13133 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13134 printf("total tree bytes: %llu\n",
13135 (unsigned long long)total_btree_bytes);
13136 printf("total fs tree bytes: %llu\n",
13137 (unsigned long long)total_fs_tree_bytes);
13138 printf("total extent tree bytes: %llu\n",
13139 (unsigned long long)total_extent_tree_bytes);
13140 printf("btree space waste bytes: %llu\n",
13141 (unsigned long long)btree_space_waste);
13142 printf("file data blocks allocated: %llu\n referenced %llu\n",
13143 (unsigned long long)data_bytes_allocated,
13144 (unsigned long long)data_bytes_referenced);
13146 free_qgroup_counts();
13147 free_root_recs_tree(&root_cache);
13151 if (ctx.progress_enabled)
13152 task_deinit(ctx.info);