2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1534 fprintf(stderr, "invalid location in dir item %u\n",
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1683 if (key.offset > start)
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1698 btrfs_release_path(&path);
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1833 BUG_ON(IS_ERR(active_node->current));
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1872 * Returns >0 Found error, not fatal, should continue
1873 * Returns <0 Fatal error, must exit the whole check
1874 * Returns 0 No errors found
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877 struct node_refs *nrefs, int *level, int ext_ref)
1879 struct extent_buffer *cur = path->nodes[0];
1880 struct btrfs_key key;
1884 int root_level = btrfs_header_level(root->node);
1886 int ret = 0; /* Final return value */
1887 int err = 0; /* Positive error bitmap */
1889 cur_bytenr = cur->start;
1891 /* skip to first inode item or the first inode number change */
1892 nritems = btrfs_header_nritems(cur);
1893 for (i = 0; i < nritems; i++) {
1894 btrfs_item_key_to_cpu(cur, &key, i);
1896 first_ino = key.objectid;
1897 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898 (first_ino && first_ino != key.objectid))
1902 path->slots[0] = nritems;
1908 err |= check_inode_item(root, path, ext_ref);
1910 if (err & LAST_ITEM)
1913 /* still have inode items in thie leaf */
1914 if (cur->start == cur_bytenr)
1918 * we have switched to another leaf, above nodes may
1919 * have changed, here walk down the path, if a node
1920 * or leaf is shared, check whether we can skip this
1923 for (i = root_level; i >= 0; i--) {
1924 if (path->nodes[i]->start == nrefs->bytenr[i])
1927 ret = update_nodes_refs(root,
1928 path->nodes[i]->start,
1933 if (!nrefs->need_check[i]) {
1939 for (i = 0; i < *level; i++) {
1940 free_extent_buffer(path->nodes[i]);
1941 path->nodes[i] = NULL;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1960 level = btrfs_header_level(node);
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2072 check = need_check(root, roots);
2074 nrefs->need_check[level] = check;
2076 nrefs->need_check[level] = 1;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134 ret = process_one_leaf(root, cur, wc);
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2157 ret = enter_shared_node(root, bytenr, refs,
2160 path->slots[*level]++;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2186 ret = check_child_node(cur, path->slots[*level], next);
2188 free_extent_buffer(next);
2193 if (btrfs_is_leaf(next))
2194 status = btrfs_check_leaf(root, NULL, next);
2196 status = btrfs_check_node(root, NULL, next);
2197 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198 free_extent_buffer(next);
2203 *level = *level - 1;
2204 free_extent_buffer(path->nodes[*level]);
2205 path->nodes[*level] = next;
2206 path->slots[*level] = 0;
2209 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214 unsigned int ext_ref);
2217 * Returns >0 Found error, should continue
2218 * Returns <0 Fatal error, must exit the whole check
2219 * Returns 0 No errors found
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222 int *level, struct node_refs *nrefs, int ext_ref)
2224 enum btrfs_tree_block_status status;
2227 struct extent_buffer *next;
2228 struct extent_buffer *cur;
2232 WARN_ON(*level < 0);
2233 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235 ret = update_nodes_refs(root, path->nodes[*level]->start,
2240 while (*level >= 0) {
2241 WARN_ON(*level < 0);
2242 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243 cur = path->nodes[*level];
2245 if (btrfs_header_level(cur) != *level)
2248 if (path->slots[*level] >= btrfs_header_nritems(cur))
2250 /* Don't forgot to check leaf/node validation */
2252 ret = btrfs_check_leaf(root, NULL, cur);
2253 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = process_one_leaf_v2(root, path, nrefs,
2261 ret = btrfs_check_node(root, NULL, cur);
2262 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2267 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269 blocksize = root->nodesize;
2271 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2274 if (!nrefs->need_check[*level - 1]) {
2275 path->slots[*level]++;
2279 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root, bytenr, blocksize,
2285 if (!extent_buffer_uptodate(next)) {
2286 struct btrfs_key node_key;
2288 btrfs_node_key_to_cpu(path->nodes[*level],
2290 path->slots[*level]);
2291 btrfs_add_corrupt_extent_record(root->fs_info,
2293 path->nodes[*level]->start,
2294 root->nodesize, *level);
2300 ret = check_child_node(cur, path->slots[*level], next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323 struct walk_control *wc, int *level)
2326 struct extent_buffer *leaf;
2328 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329 leaf = path->nodes[i];
2330 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = NULL;
2337 BUG_ON(*level > wc->active_node);
2338 if (*level == wc->active_node)
2339 leave_shared_node(root, wc, *level);
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2350 struct extent_buffer *leaf;
2352 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353 leaf = path->nodes[i];
2354 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2359 free_extent_buffer(path->nodes[*level]);
2360 path->nodes[*level] = NULL;
2367 static int check_root_dir(struct inode_record *rec)
2369 struct inode_backref *backref;
2372 if (!rec->found_inode_item || rec->errors)
2374 if (rec->nlink != 1 || rec->found_link != 0)
2376 if (list_empty(&rec->backrefs))
2378 backref = to_inode_backref(rec->backrefs.next);
2379 if (!backref->found_inode_ref)
2381 if (backref->index != 0 || backref->namelen != 2 ||
2382 memcmp(backref->name, "..", 2))
2384 if (backref->found_dir_index || backref->found_dir_item)
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root, struct btrfs_path *path,
2393 struct inode_record *rec)
2395 struct btrfs_inode_item *ei;
2396 struct btrfs_key key;
2399 key.objectid = rec->ino;
2400 key.type = BTRFS_INODE_ITEM_KEY;
2401 key.offset = (u64)-1;
2403 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2407 if (!path->slots[0]) {
2414 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415 if (key.objectid != rec->ino) {
2420 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421 struct btrfs_inode_item);
2422 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423 btrfs_mark_buffer_dirty(path->nodes[0]);
2424 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426 root->root_key.objectid);
2428 btrfs_release_path(path);
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433 struct btrfs_root *root,
2434 struct btrfs_path *path,
2435 struct inode_record *rec)
2439 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440 btrfs_release_path(path);
2442 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447 struct btrfs_root *root,
2448 struct btrfs_path *path,
2449 struct inode_record *rec)
2451 struct btrfs_inode_item *ei;
2452 struct btrfs_key key;
2455 key.objectid = rec->ino;
2456 key.type = BTRFS_INODE_ITEM_KEY;
2459 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2466 /* Since ret == 0, no need to check anything */
2467 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468 struct btrfs_inode_item);
2469 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470 btrfs_mark_buffer_dirty(path->nodes[0]);
2471 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472 printf("reset nbytes for ino %llu root %llu\n",
2473 rec->ino, root->root_key.objectid);
2475 btrfs_release_path(path);
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480 struct cache_tree *inode_cache,
2481 struct inode_record *rec,
2482 struct inode_backref *backref)
2484 struct btrfs_path path;
2485 struct btrfs_trans_handle *trans;
2486 struct btrfs_dir_item *dir_item;
2487 struct extent_buffer *leaf;
2488 struct btrfs_key key;
2489 struct btrfs_disk_key disk_key;
2490 struct inode_record *dir_rec;
2491 unsigned long name_ptr;
2492 u32 data_size = sizeof(*dir_item) + backref->namelen;
2495 trans = btrfs_start_transaction(root, 1);
2497 return PTR_ERR(trans);
2499 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500 (unsigned long long)rec->ino);
2502 btrfs_init_path(&path);
2503 key.objectid = backref->dir;
2504 key.type = BTRFS_DIR_INDEX_KEY;
2505 key.offset = backref->index;
2506 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2509 leaf = path.nodes[0];
2510 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2512 disk_key.objectid = cpu_to_le64(rec->ino);
2513 disk_key.type = BTRFS_INODE_ITEM_KEY;
2514 disk_key.offset = 0;
2516 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518 btrfs_set_dir_data_len(leaf, dir_item, 0);
2519 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520 name_ptr = (unsigned long)(dir_item + 1);
2521 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522 btrfs_mark_buffer_dirty(leaf);
2523 btrfs_release_path(&path);
2524 btrfs_commit_transaction(trans, root);
2526 backref->found_dir_index = 1;
2527 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528 BUG_ON(IS_ERR(dir_rec));
2531 dir_rec->found_size += backref->namelen;
2532 if (dir_rec->found_size == dir_rec->isize &&
2533 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535 if (dir_rec->found_size != dir_rec->isize)
2536 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2541 static int delete_dir_index(struct btrfs_root *root,
2542 struct inode_backref *backref)
2544 struct btrfs_trans_handle *trans;
2545 struct btrfs_dir_item *di;
2546 struct btrfs_path path;
2549 trans = btrfs_start_transaction(root, 1);
2551 return PTR_ERR(trans);
2553 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554 (unsigned long long)backref->dir,
2555 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556 (unsigned long long)root->objectid);
2558 btrfs_init_path(&path);
2559 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560 backref->name, backref->namelen,
2561 backref->index, -1);
2564 btrfs_release_path(&path);
2565 btrfs_commit_transaction(trans, root);
2572 ret = btrfs_del_item(trans, root, &path);
2574 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2576 btrfs_release_path(&path);
2577 btrfs_commit_transaction(trans, root);
2581 static int create_inode_item(struct btrfs_root *root,
2582 struct inode_record *rec,
2585 struct btrfs_trans_handle *trans;
2586 struct btrfs_inode_item inode_item;
2587 time_t now = time(NULL);
2590 trans = btrfs_start_transaction(root, 1);
2591 if (IS_ERR(trans)) {
2592 ret = PTR_ERR(trans);
2596 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597 "be incomplete, please check permissions and content after "
2598 "the fsck completes.\n", (unsigned long long)root->objectid,
2599 (unsigned long long)rec->ino);
2601 memset(&inode_item, 0, sizeof(inode_item));
2602 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2604 btrfs_set_stack_inode_nlink(&inode_item, 1);
2606 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608 if (rec->found_dir_item) {
2609 if (rec->found_file_extent)
2610 fprintf(stderr, "root %llu inode %llu has both a dir "
2611 "item and extents, unsure if it is a dir or a "
2612 "regular file so setting it as a directory\n",
2613 (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2615 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617 } else if (!rec->found_dir_item) {
2618 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2621 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2630 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2632 btrfs_commit_transaction(trans, root);
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637 struct inode_record *rec,
2638 struct cache_tree *inode_cache,
2641 struct inode_backref *tmp, *backref;
2642 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2646 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647 if (!delete && rec->ino == root_dirid) {
2648 if (!rec->found_inode_item) {
2649 ret = create_inode_item(root, rec, 1);
2656 /* Index 0 for root dir's are special, don't mess with it */
2657 if (rec->ino == root_dirid && backref->index == 0)
2661 ((backref->found_dir_index && !backref->found_inode_ref) ||
2662 (backref->found_dir_index && backref->found_inode_ref &&
2663 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664 ret = delete_dir_index(root, backref);
2668 list_del(&backref->list);
2673 if (!delete && !backref->found_dir_index &&
2674 backref->found_dir_item && backref->found_inode_ref) {
2675 ret = add_missing_dir_index(root, inode_cache, rec,
2680 if (backref->found_dir_item &&
2681 backref->found_dir_index) {
2682 if (!backref->errors &&
2683 backref->found_inode_ref) {
2684 list_del(&backref->list);
2691 if (!delete && (!backref->found_dir_index &&
2692 !backref->found_dir_item &&
2693 backref->found_inode_ref)) {
2694 struct btrfs_trans_handle *trans;
2695 struct btrfs_key location;
2697 ret = check_dir_conflict(root, backref->name,
2703 * let nlink fixing routine to handle it,
2704 * which can do it better.
2709 location.objectid = rec->ino;
2710 location.type = BTRFS_INODE_ITEM_KEY;
2711 location.offset = 0;
2713 trans = btrfs_start_transaction(root, 1);
2714 if (IS_ERR(trans)) {
2715 ret = PTR_ERR(trans);
2718 fprintf(stderr, "adding missing dir index/item pair "
2720 (unsigned long long)rec->ino);
2721 ret = btrfs_insert_dir_item(trans, root, backref->name,
2723 backref->dir, &location,
2724 imode_to_type(rec->imode),
2727 btrfs_commit_transaction(trans, root);
2731 if (!delete && (backref->found_inode_ref &&
2732 backref->found_dir_index &&
2733 backref->found_dir_item &&
2734 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2735 !rec->found_inode_item)) {
2736 ret = create_inode_item(root, rec, 0);
2743 return ret ? ret : repaired;
2747 * To determine the file type for nlink/inode_item repair
2749 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2750 * Return -ENOENT if file type is not found.
2752 static int find_file_type(struct inode_record *rec, u8 *type)
2754 struct inode_backref *backref;
2756 /* For inode item recovered case */
2757 if (rec->found_inode_item) {
2758 *type = imode_to_type(rec->imode);
2762 list_for_each_entry(backref, &rec->backrefs, list) {
2763 if (backref->found_dir_index || backref->found_dir_item) {
2764 *type = backref->filetype;
2772 * To determine the file name for nlink repair
2774 * Return 0 if file name is found, set name and namelen.
2775 * Return -ENOENT if file name is not found.
2777 static int find_file_name(struct inode_record *rec,
2778 char *name, int *namelen)
2780 struct inode_backref *backref;
2782 list_for_each_entry(backref, &rec->backrefs, list) {
2783 if (backref->found_dir_index || backref->found_dir_item ||
2784 backref->found_inode_ref) {
2785 memcpy(name, backref->name, backref->namelen);
2786 *namelen = backref->namelen;
2793 /* Reset the nlink of the inode to the correct one */
2794 static int reset_nlink(struct btrfs_trans_handle *trans,
2795 struct btrfs_root *root,
2796 struct btrfs_path *path,
2797 struct inode_record *rec)
2799 struct inode_backref *backref;
2800 struct inode_backref *tmp;
2801 struct btrfs_key key;
2802 struct btrfs_inode_item *inode_item;
2805 /* We don't believe this either, reset it and iterate backref */
2806 rec->found_link = 0;
2808 /* Remove all backref including the valid ones */
2809 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2810 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2811 backref->index, backref->name,
2812 backref->namelen, 0);
2816 /* remove invalid backref, so it won't be added back */
2817 if (!(backref->found_dir_index &&
2818 backref->found_dir_item &&
2819 backref->found_inode_ref)) {
2820 list_del(&backref->list);
2827 /* Set nlink to 0 */
2828 key.objectid = rec->ino;
2829 key.type = BTRFS_INODE_ITEM_KEY;
2831 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2838 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2839 struct btrfs_inode_item);
2840 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2841 btrfs_mark_buffer_dirty(path->nodes[0]);
2842 btrfs_release_path(path);
2845 * Add back valid inode_ref/dir_item/dir_index,
2846 * add_link() will handle the nlink inc, so new nlink must be correct
2848 list_for_each_entry(backref, &rec->backrefs, list) {
2849 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2850 backref->name, backref->namelen,
2851 backref->filetype, &backref->index, 1);
2856 btrfs_release_path(path);
2860 static int get_highest_inode(struct btrfs_trans_handle *trans,
2861 struct btrfs_root *root,
2862 struct btrfs_path *path,
2865 struct btrfs_key key, found_key;
2868 btrfs_init_path(path);
2869 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2871 key.type = BTRFS_INODE_ITEM_KEY;
2872 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2874 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2875 path->slots[0] - 1);
2876 *highest_ino = found_key.objectid;
2879 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2881 btrfs_release_path(path);
2885 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2886 struct btrfs_root *root,
2887 struct btrfs_path *path,
2888 struct inode_record *rec)
2890 char *dir_name = "lost+found";
2891 char namebuf[BTRFS_NAME_LEN] = {0};
2896 int name_recovered = 0;
2897 int type_recovered = 0;
2901 * Get file name and type first before these invalid inode ref
2902 * are deleted by remove_all_invalid_backref()
2904 name_recovered = !find_file_name(rec, namebuf, &namelen);
2905 type_recovered = !find_file_type(rec, &type);
2907 if (!name_recovered) {
2908 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2909 rec->ino, rec->ino);
2910 namelen = count_digits(rec->ino);
2911 sprintf(namebuf, "%llu", rec->ino);
2914 if (!type_recovered) {
2915 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2917 type = BTRFS_FT_REG_FILE;
2921 ret = reset_nlink(trans, root, path, rec);
2924 "Failed to reset nlink for inode %llu: %s\n",
2925 rec->ino, strerror(-ret));
2929 if (rec->found_link == 0) {
2930 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2934 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2935 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2938 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2939 dir_name, strerror(-ret));
2942 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2943 namebuf, namelen, type, NULL, 1);
2945 * Add ".INO" suffix several times to handle case where
2946 * "FILENAME.INO" is already taken by another file.
2948 while (ret == -EEXIST) {
2950 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2952 if (namelen + count_digits(rec->ino) + 1 >
2957 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2959 namelen += count_digits(rec->ino) + 1;
2960 ret = btrfs_add_link(trans, root, rec->ino,
2961 lost_found_ino, namebuf,
2962 namelen, type, NULL, 1);
2966 "Failed to link the inode %llu to %s dir: %s\n",
2967 rec->ino, dir_name, strerror(-ret));
2971 * Just increase the found_link, don't actually add the
2972 * backref. This will make things easier and this inode
2973 * record will be freed after the repair is done.
2974 * So fsck will not report problem about this inode.
2977 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2978 namelen, namebuf, dir_name);
2980 printf("Fixed the nlink of inode %llu\n", rec->ino);
2983 * Clear the flag anyway, or we will loop forever for the same inode
2984 * as it will not be removed from the bad inode list and the dead loop
2987 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2988 btrfs_release_path(path);
2993 * Check if there is any normal(reg or prealloc) file extent for given
2995 * This is used to determine the file type when neither its dir_index/item or
2996 * inode_item exists.
2998 * This will *NOT* report error, if any error happens, just consider it does
2999 * not have any normal file extent.
3001 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3003 struct btrfs_path path;
3004 struct btrfs_key key;
3005 struct btrfs_key found_key;
3006 struct btrfs_file_extent_item *fi;
3010 btrfs_init_path(&path);
3012 key.type = BTRFS_EXTENT_DATA_KEY;
3015 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3020 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3021 ret = btrfs_next_leaf(root, &path);
3028 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3030 if (found_key.objectid != ino ||
3031 found_key.type != BTRFS_EXTENT_DATA_KEY)
3033 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3034 struct btrfs_file_extent_item);
3035 type = btrfs_file_extent_type(path.nodes[0], fi);
3036 if (type != BTRFS_FILE_EXTENT_INLINE) {
3042 btrfs_release_path(&path);
3046 static u32 btrfs_type_to_imode(u8 type)
3048 static u32 imode_by_btrfs_type[] = {
3049 [BTRFS_FT_REG_FILE] = S_IFREG,
3050 [BTRFS_FT_DIR] = S_IFDIR,
3051 [BTRFS_FT_CHRDEV] = S_IFCHR,
3052 [BTRFS_FT_BLKDEV] = S_IFBLK,
3053 [BTRFS_FT_FIFO] = S_IFIFO,
3054 [BTRFS_FT_SOCK] = S_IFSOCK,
3055 [BTRFS_FT_SYMLINK] = S_IFLNK,
3058 return imode_by_btrfs_type[(type)];
3061 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3062 struct btrfs_root *root,
3063 struct btrfs_path *path,
3064 struct inode_record *rec)
3068 int type_recovered = 0;
3071 printf("Trying to rebuild inode:%llu\n", rec->ino);
3073 type_recovered = !find_file_type(rec, &filetype);
3076 * Try to determine inode type if type not found.
3078 * For found regular file extent, it must be FILE.
3079 * For found dir_item/index, it must be DIR.
3081 * For undetermined one, use FILE as fallback.
3084 * 1. If found backref(inode_index/item is already handled) to it,
3086 * Need new inode-inode ref structure to allow search for that.
3088 if (!type_recovered) {
3089 if (rec->found_file_extent &&
3090 find_normal_file_extent(root, rec->ino)) {
3092 filetype = BTRFS_FT_REG_FILE;
3093 } else if (rec->found_dir_item) {
3095 filetype = BTRFS_FT_DIR;
3096 } else if (!list_empty(&rec->orphan_extents)) {
3098 filetype = BTRFS_FT_REG_FILE;
3100 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3103 filetype = BTRFS_FT_REG_FILE;
3107 ret = btrfs_new_inode(trans, root, rec->ino,
3108 mode | btrfs_type_to_imode(filetype));
3113 * Here inode rebuild is done, we only rebuild the inode item,
3114 * don't repair the nlink(like move to lost+found).
3115 * That is the job of nlink repair.
3117 * We just fill the record and return
3119 rec->found_dir_item = 1;
3120 rec->imode = mode | btrfs_type_to_imode(filetype);
3122 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3123 /* Ensure the inode_nlinks repair function will be called */
3124 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3129 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3130 struct btrfs_root *root,
3131 struct btrfs_path *path,
3132 struct inode_record *rec)
3134 struct orphan_data_extent *orphan;
3135 struct orphan_data_extent *tmp;
3138 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3140 * Check for conflicting file extents
3142 * Here we don't know whether the extents is compressed or not,
3143 * so we can only assume it not compressed nor data offset,
3144 * and use its disk_len as extent length.
3146 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3147 orphan->offset, orphan->disk_len, 0);
3148 btrfs_release_path(path);
3153 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3154 orphan->disk_bytenr, orphan->disk_len);
3155 ret = btrfs_free_extent(trans,
3156 root->fs_info->extent_root,
3157 orphan->disk_bytenr, orphan->disk_len,
3158 0, root->objectid, orphan->objectid,
3163 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3164 orphan->offset, orphan->disk_bytenr,
3165 orphan->disk_len, orphan->disk_len);
3169 /* Update file size info */
3170 rec->found_size += orphan->disk_len;
3171 if (rec->found_size == rec->nbytes)
3172 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3174 /* Update the file extent hole info too */
3175 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3179 if (RB_EMPTY_ROOT(&rec->holes))
3180 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3182 list_del(&orphan->list);
3185 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3190 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3191 struct btrfs_root *root,
3192 struct btrfs_path *path,
3193 struct inode_record *rec)
3195 struct rb_node *node;
3196 struct file_extent_hole *hole;
3200 node = rb_first(&rec->holes);
3204 hole = rb_entry(node, struct file_extent_hole, node);
3205 ret = btrfs_punch_hole(trans, root, rec->ino,
3206 hole->start, hole->len);
3209 ret = del_file_extent_hole(&rec->holes, hole->start,
3213 if (RB_EMPTY_ROOT(&rec->holes))
3214 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3215 node = rb_first(&rec->holes);
3217 /* special case for a file losing all its file extent */
3219 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3220 round_up(rec->isize, root->sectorsize));
3224 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3225 rec->ino, root->objectid);
3230 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3232 struct btrfs_trans_handle *trans;
3233 struct btrfs_path path;
3236 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3237 I_ERR_NO_ORPHAN_ITEM |
3238 I_ERR_LINK_COUNT_WRONG |
3239 I_ERR_NO_INODE_ITEM |
3240 I_ERR_FILE_EXTENT_ORPHAN |
3241 I_ERR_FILE_EXTENT_DISCOUNT|
3242 I_ERR_FILE_NBYTES_WRONG)))
3246 * For nlink repair, it may create a dir and add link, so
3247 * 2 for parent(256)'s dir_index and dir_item
3248 * 2 for lost+found dir's inode_item and inode_ref
3249 * 1 for the new inode_ref of the file
3250 * 2 for lost+found dir's dir_index and dir_item for the file
3252 trans = btrfs_start_transaction(root, 7);
3254 return PTR_ERR(trans);
3256 btrfs_init_path(&path);
3257 if (rec->errors & I_ERR_NO_INODE_ITEM)
3258 ret = repair_inode_no_item(trans, root, &path, rec);
3259 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3260 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3261 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3262 ret = repair_inode_discount_extent(trans, root, &path, rec);
3263 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3264 ret = repair_inode_isize(trans, root, &path, rec);
3265 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3266 ret = repair_inode_orphan_item(trans, root, &path, rec);
3267 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3268 ret = repair_inode_nlinks(trans, root, &path, rec);
3269 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3270 ret = repair_inode_nbytes(trans, root, &path, rec);
3271 btrfs_commit_transaction(trans, root);
3272 btrfs_release_path(&path);
3276 static int check_inode_recs(struct btrfs_root *root,
3277 struct cache_tree *inode_cache)
3279 struct cache_extent *cache;
3280 struct ptr_node *node;
3281 struct inode_record *rec;
3282 struct inode_backref *backref;
3287 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3289 if (btrfs_root_refs(&root->root_item) == 0) {
3290 if (!cache_tree_empty(inode_cache))
3291 fprintf(stderr, "warning line %d\n", __LINE__);
3296 * We need to repair backrefs first because we could change some of the
3297 * errors in the inode recs.
3299 * We also need to go through and delete invalid backrefs first and then
3300 * add the correct ones second. We do this because we may get EEXIST
3301 * when adding back the correct index because we hadn't yet deleted the
3304 * For example, if we were missing a dir index then the directories
3305 * isize would be wrong, so if we fixed the isize to what we thought it
3306 * would be and then fixed the backref we'd still have a invalid fs, so
3307 * we need to add back the dir index and then check to see if the isize
3312 if (stage == 3 && !err)
3315 cache = search_cache_extent(inode_cache, 0);
3316 while (repair && cache) {
3317 node = container_of(cache, struct ptr_node, cache);
3319 cache = next_cache_extent(cache);
3321 /* Need to free everything up and rescan */
3323 remove_cache_extent(inode_cache, &node->cache);
3325 free_inode_rec(rec);
3329 if (list_empty(&rec->backrefs))
3332 ret = repair_inode_backrefs(root, rec, inode_cache,
3346 rec = get_inode_rec(inode_cache, root_dirid, 0);
3347 BUG_ON(IS_ERR(rec));
3349 ret = check_root_dir(rec);
3351 fprintf(stderr, "root %llu root dir %llu error\n",
3352 (unsigned long long)root->root_key.objectid,
3353 (unsigned long long)root_dirid);
3354 print_inode_error(root, rec);
3359 struct btrfs_trans_handle *trans;
3361 trans = btrfs_start_transaction(root, 1);
3362 if (IS_ERR(trans)) {
3363 err = PTR_ERR(trans);
3368 "root %llu missing its root dir, recreating\n",
3369 (unsigned long long)root->objectid);
3371 ret = btrfs_make_root_dir(trans, root, root_dirid);
3374 btrfs_commit_transaction(trans, root);
3378 fprintf(stderr, "root %llu root dir %llu not found\n",
3379 (unsigned long long)root->root_key.objectid,
3380 (unsigned long long)root_dirid);
3384 cache = search_cache_extent(inode_cache, 0);
3387 node = container_of(cache, struct ptr_node, cache);
3389 remove_cache_extent(inode_cache, &node->cache);
3391 if (rec->ino == root_dirid ||
3392 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3393 free_inode_rec(rec);
3397 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3398 ret = check_orphan_item(root, rec->ino);
3400 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3401 if (can_free_inode_rec(rec)) {
3402 free_inode_rec(rec);
3407 if (!rec->found_inode_item)
3408 rec->errors |= I_ERR_NO_INODE_ITEM;
3409 if (rec->found_link != rec->nlink)
3410 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3412 ret = try_repair_inode(root, rec);
3413 if (ret == 0 && can_free_inode_rec(rec)) {
3414 free_inode_rec(rec);
3420 if (!(repair && ret == 0))
3422 print_inode_error(root, rec);
3423 list_for_each_entry(backref, &rec->backrefs, list) {
3424 if (!backref->found_dir_item)
3425 backref->errors |= REF_ERR_NO_DIR_ITEM;
3426 if (!backref->found_dir_index)
3427 backref->errors |= REF_ERR_NO_DIR_INDEX;
3428 if (!backref->found_inode_ref)
3429 backref->errors |= REF_ERR_NO_INODE_REF;
3430 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3431 " namelen %u name %s filetype %d errors %x",
3432 (unsigned long long)backref->dir,
3433 (unsigned long long)backref->index,
3434 backref->namelen, backref->name,
3435 backref->filetype, backref->errors);
3436 print_ref_error(backref->errors);
3438 free_inode_rec(rec);
3440 return (error > 0) ? -1 : 0;
3443 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3446 struct cache_extent *cache;
3447 struct root_record *rec = NULL;
3450 cache = lookup_cache_extent(root_cache, objectid, 1);
3452 rec = container_of(cache, struct root_record, cache);
3454 rec = calloc(1, sizeof(*rec));
3456 return ERR_PTR(-ENOMEM);
3457 rec->objectid = objectid;
3458 INIT_LIST_HEAD(&rec->backrefs);
3459 rec->cache.start = objectid;
3460 rec->cache.size = 1;
3462 ret = insert_cache_extent(root_cache, &rec->cache);
3464 return ERR_PTR(-EEXIST);
3469 static struct root_backref *get_root_backref(struct root_record *rec,
3470 u64 ref_root, u64 dir, u64 index,
3471 const char *name, int namelen)
3473 struct root_backref *backref;
3475 list_for_each_entry(backref, &rec->backrefs, list) {
3476 if (backref->ref_root != ref_root || backref->dir != dir ||
3477 backref->namelen != namelen)
3479 if (memcmp(name, backref->name, namelen))
3484 backref = calloc(1, sizeof(*backref) + namelen + 1);
3487 backref->ref_root = ref_root;
3489 backref->index = index;
3490 backref->namelen = namelen;
3491 memcpy(backref->name, name, namelen);
3492 backref->name[namelen] = '\0';
3493 list_add_tail(&backref->list, &rec->backrefs);
3497 static void free_root_record(struct cache_extent *cache)
3499 struct root_record *rec;
3500 struct root_backref *backref;
3502 rec = container_of(cache, struct root_record, cache);
3503 while (!list_empty(&rec->backrefs)) {
3504 backref = to_root_backref(rec->backrefs.next);
3505 list_del(&backref->list);
3512 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3514 static int add_root_backref(struct cache_tree *root_cache,
3515 u64 root_id, u64 ref_root, u64 dir, u64 index,
3516 const char *name, int namelen,
3517 int item_type, int errors)
3519 struct root_record *rec;
3520 struct root_backref *backref;
3522 rec = get_root_rec(root_cache, root_id);
3523 BUG_ON(IS_ERR(rec));
3524 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3527 backref->errors |= errors;
3529 if (item_type != BTRFS_DIR_ITEM_KEY) {
3530 if (backref->found_dir_index || backref->found_back_ref ||
3531 backref->found_forward_ref) {
3532 if (backref->index != index)
3533 backref->errors |= REF_ERR_INDEX_UNMATCH;
3535 backref->index = index;
3539 if (item_type == BTRFS_DIR_ITEM_KEY) {
3540 if (backref->found_forward_ref)
3542 backref->found_dir_item = 1;
3543 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3544 backref->found_dir_index = 1;
3545 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3546 if (backref->found_forward_ref)
3547 backref->errors |= REF_ERR_DUP_ROOT_REF;
3548 else if (backref->found_dir_item)
3550 backref->found_forward_ref = 1;
3551 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3552 if (backref->found_back_ref)
3553 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3554 backref->found_back_ref = 1;
3559 if (backref->found_forward_ref && backref->found_dir_item)
3560 backref->reachable = 1;
3564 static int merge_root_recs(struct btrfs_root *root,
3565 struct cache_tree *src_cache,
3566 struct cache_tree *dst_cache)
3568 struct cache_extent *cache;
3569 struct ptr_node *node;
3570 struct inode_record *rec;
3571 struct inode_backref *backref;
3574 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3575 free_inode_recs_tree(src_cache);
3580 cache = search_cache_extent(src_cache, 0);
3583 node = container_of(cache, struct ptr_node, cache);
3585 remove_cache_extent(src_cache, &node->cache);
3588 ret = is_child_root(root, root->objectid, rec->ino);
3594 list_for_each_entry(backref, &rec->backrefs, list) {
3595 BUG_ON(backref->found_inode_ref);
3596 if (backref->found_dir_item)
3597 add_root_backref(dst_cache, rec->ino,
3598 root->root_key.objectid, backref->dir,
3599 backref->index, backref->name,
3600 backref->namelen, BTRFS_DIR_ITEM_KEY,
3602 if (backref->found_dir_index)
3603 add_root_backref(dst_cache, rec->ino,
3604 root->root_key.objectid, backref->dir,
3605 backref->index, backref->name,
3606 backref->namelen, BTRFS_DIR_INDEX_KEY,
3610 free_inode_rec(rec);
3617 static int check_root_refs(struct btrfs_root *root,
3618 struct cache_tree *root_cache)
3620 struct root_record *rec;
3621 struct root_record *ref_root;
3622 struct root_backref *backref;
3623 struct cache_extent *cache;
3629 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3630 BUG_ON(IS_ERR(rec));
3633 /* fixme: this can not detect circular references */
3636 cache = search_cache_extent(root_cache, 0);
3640 rec = container_of(cache, struct root_record, cache);
3641 cache = next_cache_extent(cache);
3643 if (rec->found_ref == 0)
3646 list_for_each_entry(backref, &rec->backrefs, list) {
3647 if (!backref->reachable)
3650 ref_root = get_root_rec(root_cache,
3652 BUG_ON(IS_ERR(ref_root));
3653 if (ref_root->found_ref > 0)
3656 backref->reachable = 0;
3658 if (rec->found_ref == 0)
3664 cache = search_cache_extent(root_cache, 0);
3668 rec = container_of(cache, struct root_record, cache);
3669 cache = next_cache_extent(cache);
3671 if (rec->found_ref == 0 &&
3672 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3673 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3674 ret = check_orphan_item(root->fs_info->tree_root,
3680 * If we don't have a root item then we likely just have
3681 * a dir item in a snapshot for this root but no actual
3682 * ref key or anything so it's meaningless.
3684 if (!rec->found_root_item)
3687 fprintf(stderr, "fs tree %llu not referenced\n",
3688 (unsigned long long)rec->objectid);
3692 if (rec->found_ref > 0 && !rec->found_root_item)
3694 list_for_each_entry(backref, &rec->backrefs, list) {
3695 if (!backref->found_dir_item)
3696 backref->errors |= REF_ERR_NO_DIR_ITEM;
3697 if (!backref->found_dir_index)
3698 backref->errors |= REF_ERR_NO_DIR_INDEX;
3699 if (!backref->found_back_ref)
3700 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3701 if (!backref->found_forward_ref)
3702 backref->errors |= REF_ERR_NO_ROOT_REF;
3703 if (backref->reachable && backref->errors)
3710 fprintf(stderr, "fs tree %llu refs %u %s\n",
3711 (unsigned long long)rec->objectid, rec->found_ref,
3712 rec->found_root_item ? "" : "not found");
3714 list_for_each_entry(backref, &rec->backrefs, list) {
3715 if (!backref->reachable)
3717 if (!backref->errors && rec->found_root_item)
3719 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3720 " index %llu namelen %u name %s errors %x\n",
3721 (unsigned long long)backref->ref_root,
3722 (unsigned long long)backref->dir,
3723 (unsigned long long)backref->index,
3724 backref->namelen, backref->name,
3726 print_ref_error(backref->errors);
3729 return errors > 0 ? 1 : 0;
3732 static int process_root_ref(struct extent_buffer *eb, int slot,
3733 struct btrfs_key *key,
3734 struct cache_tree *root_cache)
3740 struct btrfs_root_ref *ref;
3741 char namebuf[BTRFS_NAME_LEN];
3744 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3746 dirid = btrfs_root_ref_dirid(eb, ref);
3747 index = btrfs_root_ref_sequence(eb, ref);
3748 name_len = btrfs_root_ref_name_len(eb, ref);
3750 if (name_len <= BTRFS_NAME_LEN) {
3754 len = BTRFS_NAME_LEN;
3755 error = REF_ERR_NAME_TOO_LONG;
3757 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3759 if (key->type == BTRFS_ROOT_REF_KEY) {
3760 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3761 index, namebuf, len, key->type, error);
3763 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3764 index, namebuf, len, key->type, error);
3769 static void free_corrupt_block(struct cache_extent *cache)
3771 struct btrfs_corrupt_block *corrupt;
3773 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3777 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3780 * Repair the btree of the given root.
3782 * The fix is to remove the node key in corrupt_blocks cache_tree.
3783 * and rebalance the tree.
3784 * After the fix, the btree should be writeable.
3786 static int repair_btree(struct btrfs_root *root,
3787 struct cache_tree *corrupt_blocks)
3789 struct btrfs_trans_handle *trans;
3790 struct btrfs_path path;
3791 struct btrfs_corrupt_block *corrupt;
3792 struct cache_extent *cache;
3793 struct btrfs_key key;
3798 if (cache_tree_empty(corrupt_blocks))
3801 trans = btrfs_start_transaction(root, 1);
3802 if (IS_ERR(trans)) {
3803 ret = PTR_ERR(trans);
3804 fprintf(stderr, "Error starting transaction: %s\n",
3808 btrfs_init_path(&path);
3809 cache = first_cache_extent(corrupt_blocks);
3811 corrupt = container_of(cache, struct btrfs_corrupt_block,
3813 level = corrupt->level;
3814 path.lowest_level = level;
3815 key.objectid = corrupt->key.objectid;
3816 key.type = corrupt->key.type;
3817 key.offset = corrupt->key.offset;
3820 * Here we don't want to do any tree balance, since it may
3821 * cause a balance with corrupted brother leaf/node,
3822 * so ins_len set to 0 here.
3823 * Balance will be done after all corrupt node/leaf is deleted.
3825 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3828 offset = btrfs_node_blockptr(path.nodes[level],
3831 /* Remove the ptr */
3832 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3836 * Remove the corresponding extent
3837 * return value is not concerned.
3839 btrfs_release_path(&path);
3840 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3841 0, root->root_key.objectid,
3843 cache = next_cache_extent(cache);
3846 /* Balance the btree using btrfs_search_slot() */
3847 cache = first_cache_extent(corrupt_blocks);
3849 corrupt = container_of(cache, struct btrfs_corrupt_block,
3851 memcpy(&key, &corrupt->key, sizeof(key));
3852 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3855 /* return will always >0 since it won't find the item */
3857 btrfs_release_path(&path);
3858 cache = next_cache_extent(cache);
3861 btrfs_commit_transaction(trans, root);
3862 btrfs_release_path(&path);
3866 static int check_fs_root(struct btrfs_root *root,
3867 struct cache_tree *root_cache,
3868 struct walk_control *wc)
3874 struct btrfs_path path;
3875 struct shared_node root_node;
3876 struct root_record *rec;
3877 struct btrfs_root_item *root_item = &root->root_item;
3878 struct cache_tree corrupt_blocks;
3879 struct orphan_data_extent *orphan;
3880 struct orphan_data_extent *tmp;
3881 enum btrfs_tree_block_status status;
3882 struct node_refs nrefs;
3885 * Reuse the corrupt_block cache tree to record corrupted tree block
3887 * Unlike the usage in extent tree check, here we do it in a per
3888 * fs/subvol tree base.
3890 cache_tree_init(&corrupt_blocks);
3891 root->fs_info->corrupt_blocks = &corrupt_blocks;
3893 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3894 rec = get_root_rec(root_cache, root->root_key.objectid);
3895 BUG_ON(IS_ERR(rec));
3896 if (btrfs_root_refs(root_item) > 0)
3897 rec->found_root_item = 1;
3900 btrfs_init_path(&path);
3901 memset(&root_node, 0, sizeof(root_node));
3902 cache_tree_init(&root_node.root_cache);
3903 cache_tree_init(&root_node.inode_cache);
3904 memset(&nrefs, 0, sizeof(nrefs));
3906 /* Move the orphan extent record to corresponding inode_record */
3907 list_for_each_entry_safe(orphan, tmp,
3908 &root->orphan_data_extents, list) {
3909 struct inode_record *inode;
3911 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3913 BUG_ON(IS_ERR(inode));
3914 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3915 list_move(&orphan->list, &inode->orphan_extents);
3918 level = btrfs_header_level(root->node);
3919 memset(wc->nodes, 0, sizeof(wc->nodes));
3920 wc->nodes[level] = &root_node;
3921 wc->active_node = level;
3922 wc->root_level = level;
3924 /* We may not have checked the root block, lets do that now */
3925 if (btrfs_is_leaf(root->node))
3926 status = btrfs_check_leaf(root, NULL, root->node);
3928 status = btrfs_check_node(root, NULL, root->node);
3929 if (status != BTRFS_TREE_BLOCK_CLEAN)
3932 if (btrfs_root_refs(root_item) > 0 ||
3933 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3934 path.nodes[level] = root->node;
3935 extent_buffer_get(root->node);
3936 path.slots[level] = 0;
3938 struct btrfs_key key;
3939 struct btrfs_disk_key found_key;
3941 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3942 level = root_item->drop_level;
3943 path.lowest_level = level;
3944 if (level > btrfs_header_level(root->node) ||
3945 level >= BTRFS_MAX_LEVEL) {
3946 error("ignoring invalid drop level: %u", level);
3949 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3952 btrfs_node_key(path.nodes[level], &found_key,
3954 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3955 sizeof(found_key)));
3959 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3965 wret = walk_up_tree(root, &path, wc, &level);
3972 btrfs_release_path(&path);
3974 if (!cache_tree_empty(&corrupt_blocks)) {
3975 struct cache_extent *cache;
3976 struct btrfs_corrupt_block *corrupt;
3978 printf("The following tree block(s) is corrupted in tree %llu:\n",
3979 root->root_key.objectid);
3980 cache = first_cache_extent(&corrupt_blocks);
3982 corrupt = container_of(cache,
3983 struct btrfs_corrupt_block,
3985 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3986 cache->start, corrupt->level,
3987 corrupt->key.objectid, corrupt->key.type,
3988 corrupt->key.offset);
3989 cache = next_cache_extent(cache);
3992 printf("Try to repair the btree for root %llu\n",
3993 root->root_key.objectid);
3994 ret = repair_btree(root, &corrupt_blocks);
3996 fprintf(stderr, "Failed to repair btree: %s\n",
3999 printf("Btree for root %llu is fixed\n",
4000 root->root_key.objectid);
4004 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4008 if (root_node.current) {
4009 root_node.current->checked = 1;
4010 maybe_free_inode_rec(&root_node.inode_cache,
4014 err = check_inode_recs(root, &root_node.inode_cache);
4018 free_corrupt_blocks_tree(&corrupt_blocks);
4019 root->fs_info->corrupt_blocks = NULL;
4020 free_orphan_data_extents(&root->orphan_data_extents);
4024 static int fs_root_objectid(u64 objectid)
4026 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4027 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4029 return is_fstree(objectid);
4032 static int check_fs_roots(struct btrfs_root *root,
4033 struct cache_tree *root_cache)
4035 struct btrfs_path path;
4036 struct btrfs_key key;
4037 struct walk_control wc;
4038 struct extent_buffer *leaf, *tree_node;
4039 struct btrfs_root *tmp_root;
4040 struct btrfs_root *tree_root = root->fs_info->tree_root;
4044 if (ctx.progress_enabled) {
4045 ctx.tp = TASK_FS_ROOTS;
4046 task_start(ctx.info);
4050 * Just in case we made any changes to the extent tree that weren't
4051 * reflected into the free space cache yet.
4054 reset_cached_block_groups(root->fs_info);
4055 memset(&wc, 0, sizeof(wc));
4056 cache_tree_init(&wc.shared);
4057 btrfs_init_path(&path);
4062 key.type = BTRFS_ROOT_ITEM_KEY;
4063 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4068 tree_node = tree_root->node;
4070 if (tree_node != tree_root->node) {
4071 free_root_recs_tree(root_cache);
4072 btrfs_release_path(&path);
4075 leaf = path.nodes[0];
4076 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4077 ret = btrfs_next_leaf(tree_root, &path);
4083 leaf = path.nodes[0];
4085 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4086 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4087 fs_root_objectid(key.objectid)) {
4088 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4089 tmp_root = btrfs_read_fs_root_no_cache(
4090 root->fs_info, &key);
4092 key.offset = (u64)-1;
4093 tmp_root = btrfs_read_fs_root(
4094 root->fs_info, &key);
4096 if (IS_ERR(tmp_root)) {
4100 ret = check_fs_root(tmp_root, root_cache, &wc);
4101 if (ret == -EAGAIN) {
4102 free_root_recs_tree(root_cache);
4103 btrfs_release_path(&path);
4108 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4109 btrfs_free_fs_root(tmp_root);
4110 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4111 key.type == BTRFS_ROOT_BACKREF_KEY) {
4112 process_root_ref(leaf, path.slots[0], &key,
4119 btrfs_release_path(&path);
4121 free_extent_cache_tree(&wc.shared);
4122 if (!cache_tree_empty(&wc.shared))
4123 fprintf(stderr, "warning line %d\n", __LINE__);
4125 task_stop(ctx.info);
4131 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4132 * INODE_REF/INODE_EXTREF match.
4134 * @root: the root of the fs/file tree
4135 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4136 * @key: the key of the DIR_ITEM/DIR_INDEX
4137 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4138 * distinguish root_dir between normal dir/file
4139 * @name: the name in the INODE_REF/INODE_EXTREF
4140 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4141 * @mode: the st_mode of INODE_ITEM
4143 * Return 0 if no error occurred.
4144 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4145 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4147 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4148 * not match for normal dir/file.
4150 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4151 struct btrfs_key *key, u64 index, char *name,
4152 u32 namelen, u32 mode)
4154 struct btrfs_path path;
4155 struct extent_buffer *node;
4156 struct btrfs_dir_item *di;
4157 struct btrfs_key location;
4158 char namebuf[BTRFS_NAME_LEN] = {0};
4168 btrfs_init_path(&path);
4169 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4171 ret = DIR_ITEM_MISSING;
4175 /* Process root dir and goto out*/
4178 ret = ROOT_DIR_ERROR;
4180 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4182 ref_key->type == BTRFS_INODE_REF_KEY ?
4184 ref_key->objectid, ref_key->offset,
4185 key->type == BTRFS_DIR_ITEM_KEY ?
4186 "DIR_ITEM" : "DIR_INDEX");
4194 /* Process normal file/dir */
4196 ret = DIR_ITEM_MISSING;
4198 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4200 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4201 ref_key->objectid, ref_key->offset,
4202 key->type == BTRFS_DIR_ITEM_KEY ?
4203 "DIR_ITEM" : "DIR_INDEX",
4204 key->objectid, key->offset, namelen, name,
4205 imode_to_type(mode));
4209 /* Check whether inode_id/filetype/name match */
4210 node = path.nodes[0];
4211 slot = path.slots[0];
4212 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4213 total = btrfs_item_size_nr(node, slot);
4214 while (cur < total) {
4215 ret = DIR_ITEM_MISMATCH;
4216 name_len = btrfs_dir_name_len(node, di);
4217 data_len = btrfs_dir_data_len(node, di);
4219 btrfs_dir_item_key_to_cpu(node, di, &location);
4220 if (location.objectid != ref_key->objectid ||
4221 location.type != BTRFS_INODE_ITEM_KEY ||
4222 location.offset != 0)
4225 filetype = btrfs_dir_type(node, di);
4226 if (imode_to_type(mode) != filetype)
4229 if (name_len <= BTRFS_NAME_LEN) {
4232 len = BTRFS_NAME_LEN;
4233 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4235 key->type == BTRFS_DIR_ITEM_KEY ?
4236 "DIR_ITEM" : "DIR_INDEX",
4237 key->objectid, key->offset, name_len);
4239 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4240 if (len != namelen || strncmp(namebuf, name, len))
4246 len = sizeof(*di) + name_len + data_len;
4247 di = (struct btrfs_dir_item *)((char *)di + len);
4250 if (ret == DIR_ITEM_MISMATCH)
4252 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4254 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4255 ref_key->objectid, ref_key->offset,
4256 key->type == BTRFS_DIR_ITEM_KEY ?
4257 "DIR_ITEM" : "DIR_INDEX",
4258 key->objectid, key->offset, namelen, name,
4259 imode_to_type(mode));
4261 btrfs_release_path(&path);
4266 * Traverse the given INODE_REF and call find_dir_item() to find related
4267 * DIR_ITEM/DIR_INDEX.
4269 * @root: the root of the fs/file tree
4270 * @ref_key: the key of the INODE_REF
4271 * @refs: the count of INODE_REF
4272 * @mode: the st_mode of INODE_ITEM
4274 * Return 0 if no error occurred.
4276 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4277 struct extent_buffer *node, int slot, u64 *refs,
4280 struct btrfs_key key;
4281 struct btrfs_inode_ref *ref;
4282 char namebuf[BTRFS_NAME_LEN] = {0};
4290 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4291 total = btrfs_item_size_nr(node, slot);
4294 /* Update inode ref count */
4297 index = btrfs_inode_ref_index(node, ref);
4298 name_len = btrfs_inode_ref_name_len(node, ref);
4299 if (name_len <= BTRFS_NAME_LEN) {
4302 len = BTRFS_NAME_LEN;
4303 warning("root %llu INODE_REF[%llu %llu] name too long",
4304 root->objectid, ref_key->objectid, ref_key->offset);
4307 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4309 /* Check root dir ref name */
4310 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4311 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4312 root->objectid, ref_key->objectid, ref_key->offset,
4314 err |= ROOT_DIR_ERROR;
4317 /* Find related DIR_INDEX */
4318 key.objectid = ref_key->offset;
4319 key.type = BTRFS_DIR_INDEX_KEY;
4321 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4324 /* Find related dir_item */
4325 key.objectid = ref_key->offset;
4326 key.type = BTRFS_DIR_ITEM_KEY;
4327 key.offset = btrfs_name_hash(namebuf, len);
4328 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4331 len = sizeof(*ref) + name_len;
4332 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4341 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4342 * DIR_ITEM/DIR_INDEX.
4344 * @root: the root of the fs/file tree
4345 * @ref_key: the key of the INODE_EXTREF
4346 * @refs: the count of INODE_EXTREF
4347 * @mode: the st_mode of INODE_ITEM
4349 * Return 0 if no error occurred.
4351 static int check_inode_extref(struct btrfs_root *root,
4352 struct btrfs_key *ref_key,
4353 struct extent_buffer *node, int slot, u64 *refs,
4356 struct btrfs_key key;
4357 struct btrfs_inode_extref *extref;
4358 char namebuf[BTRFS_NAME_LEN] = {0};
4368 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4369 total = btrfs_item_size_nr(node, slot);
4372 /* update inode ref count */
4374 name_len = btrfs_inode_extref_name_len(node, extref);
4375 index = btrfs_inode_extref_index(node, extref);
4376 parent = btrfs_inode_extref_parent(node, extref);
4377 if (name_len <= BTRFS_NAME_LEN) {
4380 len = BTRFS_NAME_LEN;
4381 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4382 root->objectid, ref_key->objectid, ref_key->offset);
4384 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4386 /* Check root dir ref name */
4387 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4388 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4389 root->objectid, ref_key->objectid, ref_key->offset,
4391 err |= ROOT_DIR_ERROR;
4394 /* find related dir_index */
4395 key.objectid = parent;
4396 key.type = BTRFS_DIR_INDEX_KEY;
4398 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4401 /* find related dir_item */
4402 key.objectid = parent;
4403 key.type = BTRFS_DIR_ITEM_KEY;
4404 key.offset = btrfs_name_hash(namebuf, len);
4405 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4408 len = sizeof(*extref) + name_len;
4409 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4419 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4420 * DIR_ITEM/DIR_INDEX match.
4422 * @root: the root of the fs/file tree
4423 * @key: the key of the INODE_REF/INODE_EXTREF
4424 * @name: the name in the INODE_REF/INODE_EXTREF
4425 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4426 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4428 * @ext_ref: the EXTENDED_IREF feature
4430 * Return 0 if no error occurred.
4431 * Return >0 for error bitmap
4433 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4434 char *name, int namelen, u64 index,
4435 unsigned int ext_ref)
4437 struct btrfs_path path;
4438 struct btrfs_inode_ref *ref;
4439 struct btrfs_inode_extref *extref;
4440 struct extent_buffer *node;
4441 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4452 btrfs_init_path(&path);
4453 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4455 ret = INODE_REF_MISSING;
4459 node = path.nodes[0];
4460 slot = path.slots[0];
4462 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4463 total = btrfs_item_size_nr(node, slot);
4465 /* Iterate all entry of INODE_REF */
4466 while (cur < total) {
4467 ret = INODE_REF_MISSING;
4469 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4470 ref_index = btrfs_inode_ref_index(node, ref);
4471 if (index != (u64)-1 && index != ref_index)
4474 if (ref_namelen <= BTRFS_NAME_LEN) {
4477 len = BTRFS_NAME_LEN;
4478 warning("root %llu INODE %s[%llu %llu] name too long",
4480 key->type == BTRFS_INODE_REF_KEY ?
4482 key->objectid, key->offset);
4484 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4487 if (len != namelen || strncmp(ref_namebuf, name, len))
4493 len = sizeof(*ref) + ref_namelen;
4494 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4499 /* Skip if not support EXTENDED_IREF feature */
4503 btrfs_release_path(&path);
4504 btrfs_init_path(&path);
4506 dir_id = key->offset;
4507 key->type = BTRFS_INODE_EXTREF_KEY;
4508 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4510 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4512 ret = INODE_REF_MISSING;
4516 node = path.nodes[0];
4517 slot = path.slots[0];
4519 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4521 total = btrfs_item_size_nr(node, slot);
4523 /* Iterate all entry of INODE_EXTREF */
4524 while (cur < total) {
4525 ret = INODE_REF_MISSING;
4527 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4528 ref_index = btrfs_inode_extref_index(node, extref);
4529 parent = btrfs_inode_extref_parent(node, extref);
4530 if (index != (u64)-1 && index != ref_index)
4533 if (parent != dir_id)
4536 if (ref_namelen <= BTRFS_NAME_LEN) {
4539 len = BTRFS_NAME_LEN;
4540 warning("root %llu INODE %s[%llu %llu] name too long",
4542 key->type == BTRFS_INODE_REF_KEY ?
4544 key->objectid, key->offset);
4546 read_extent_buffer(node, ref_namebuf,
4547 (unsigned long)(extref + 1), len);
4549 if (len != namelen || strncmp(ref_namebuf, name, len))
4556 len = sizeof(*extref) + ref_namelen;
4557 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4562 btrfs_release_path(&path);
4567 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4568 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4570 * @root: the root of the fs/file tree
4571 * @key: the key of the INODE_REF/INODE_EXTREF
4572 * @size: the st_size of the INODE_ITEM
4573 * @ext_ref: the EXTENDED_IREF feature
4575 * Return 0 if no error occurred.
4577 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4578 struct extent_buffer *node, int slot, u64 *size,
4579 unsigned int ext_ref)
4581 struct btrfs_dir_item *di;
4582 struct btrfs_inode_item *ii;
4583 struct btrfs_path path;
4584 struct btrfs_key location;
4585 char namebuf[BTRFS_NAME_LEN] = {0};
4598 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4599 * ignore index check.
4601 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4603 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4604 total = btrfs_item_size_nr(node, slot);
4606 while (cur < total) {
4607 data_len = btrfs_dir_data_len(node, di);
4609 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4610 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4611 "DIR_ITEM" : "DIR_INDEX",
4612 key->objectid, key->offset, data_len);
4614 name_len = btrfs_dir_name_len(node, di);
4615 if (name_len <= BTRFS_NAME_LEN) {
4618 len = BTRFS_NAME_LEN;
4619 warning("root %llu %s[%llu %llu] name too long",
4621 key->type == BTRFS_DIR_ITEM_KEY ?
4622 "DIR_ITEM" : "DIR_INDEX",
4623 key->objectid, key->offset);
4625 (*size) += name_len;
4627 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4628 filetype = btrfs_dir_type(node, di);
4630 btrfs_init_path(&path);
4631 btrfs_dir_item_key_to_cpu(node, di, &location);
4633 /* Ignore related ROOT_ITEM check */
4634 if (location.type == BTRFS_ROOT_ITEM_KEY)
4637 /* Check relative INODE_ITEM(existence/filetype) */
4638 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4640 err |= INODE_ITEM_MISSING;
4641 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4642 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4643 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4644 key->offset, location.objectid, name_len,
4649 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4650 struct btrfs_inode_item);
4651 mode = btrfs_inode_mode(path.nodes[0], ii);
4653 if (imode_to_type(mode) != filetype) {
4654 err |= INODE_ITEM_MISMATCH;
4655 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4656 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4657 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4658 key->offset, name_len, namebuf, filetype);
4661 /* Check relative INODE_REF/INODE_EXTREF */
4662 location.type = BTRFS_INODE_REF_KEY;
4663 location.offset = key->objectid;
4664 ret = find_inode_ref(root, &location, namebuf, len,
4667 if (ret & INODE_REF_MISSING)
4668 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4669 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4670 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4671 key->offset, name_len, namebuf, filetype);
4674 btrfs_release_path(&path);
4675 len = sizeof(*di) + name_len + data_len;
4676 di = (struct btrfs_dir_item *)((char *)di + len);
4679 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4680 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4681 root->objectid, key->objectid, key->offset);
4690 * Check file extent datasum/hole, update the size of the file extents,
4691 * check and update the last offset of the file extent.
4693 * @root: the root of fs/file tree.
4694 * @fkey: the key of the file extent.
4695 * @nodatasum: INODE_NODATASUM feature.
4696 * @size: the sum of all EXTENT_DATA items size for this inode.
4697 * @end: the offset of the last extent.
4699 * Return 0 if no error occurred.
4701 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4702 struct extent_buffer *node, int slot,
4703 unsigned int nodatasum, u64 *size, u64 *end)
4705 struct btrfs_file_extent_item *fi;
4708 u64 extent_num_bytes;
4710 u64 csum_found; /* In byte size, sectorsize aligned */
4711 u64 search_start; /* Logical range start we search for csum */
4712 u64 search_len; /* Logical range len we search for csum */
4713 unsigned int extent_type;
4714 unsigned int is_hole;
4719 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4721 /* Check inline extent */
4722 extent_type = btrfs_file_extent_type(node, fi);
4723 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4724 struct btrfs_item *e = btrfs_item_nr(slot);
4725 u32 item_inline_len;
4727 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4728 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4729 compressed = btrfs_file_extent_compression(node, fi);
4730 if (extent_num_bytes == 0) {
4732 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4733 root->objectid, fkey->objectid, fkey->offset);
4734 err |= FILE_EXTENT_ERROR;
4736 if (!compressed && extent_num_bytes != item_inline_len) {
4738 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4739 root->objectid, fkey->objectid, fkey->offset,
4740 extent_num_bytes, item_inline_len);
4741 err |= FILE_EXTENT_ERROR;
4743 *size += extent_num_bytes;
4747 /* Check extent type */
4748 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4749 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4750 err |= FILE_EXTENT_ERROR;
4751 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4752 root->objectid, fkey->objectid, fkey->offset);
4756 /* Check REG_EXTENT/PREALLOC_EXTENT */
4757 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4758 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4759 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4760 extent_offset = btrfs_file_extent_offset(node, fi);
4761 compressed = btrfs_file_extent_compression(node, fi);
4762 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4765 * Check EXTENT_DATA csum
4767 * For plain (uncompressed) extent, we should only check the range
4768 * we're referring to, as it's possible that part of prealloc extent
4769 * has been written, and has csum:
4771 * |<--- Original large preallocated extent A ---->|
4772 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4775 * For compressed extent, we should check the whole range.
4778 search_start = disk_bytenr + extent_offset;
4779 search_len = extent_num_bytes;
4781 search_start = disk_bytenr;
4782 search_len = disk_num_bytes;
4784 ret = count_csum_range(root, search_start, search_len, &csum_found);
4785 if (csum_found > 0 && nodatasum) {
4786 err |= ODD_CSUM_ITEM;
4787 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4788 root->objectid, fkey->objectid, fkey->offset);
4789 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4790 !is_hole && (ret < 0 || csum_found < search_len)) {
4791 err |= CSUM_ITEM_MISSING;
4792 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4793 root->objectid, fkey->objectid, fkey->offset,
4794 csum_found, search_len);
4795 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4796 err |= ODD_CSUM_ITEM;
4797 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4798 root->objectid, fkey->objectid, fkey->offset, csum_found);
4801 /* Check EXTENT_DATA hole */
4802 if (no_holes && is_hole) {
4803 err |= FILE_EXTENT_ERROR;
4804 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4805 root->objectid, fkey->objectid, fkey->offset);
4806 } else if (!no_holes && *end != fkey->offset) {
4807 err |= FILE_EXTENT_ERROR;
4808 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4809 root->objectid, fkey->objectid, fkey->offset);
4812 *end += extent_num_bytes;
4814 *size += extent_num_bytes;
4820 * Check INODE_ITEM and related ITEMs (the same inode number)
4821 * 1. check link count
4822 * 2. check inode ref/extref
4823 * 3. check dir item/index
4825 * @ext_ref: the EXTENDED_IREF feature
4827 * Return 0 if no error occurred.
4828 * Return >0 for error or hit the traversal is done(by error bitmap)
4830 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4831 unsigned int ext_ref)
4833 struct extent_buffer *node;
4834 struct btrfs_inode_item *ii;
4835 struct btrfs_key key;
4844 u64 extent_size = 0;
4846 unsigned int nodatasum;
4851 node = path->nodes[0];
4852 slot = path->slots[0];
4854 btrfs_item_key_to_cpu(node, &key, slot);
4855 inode_id = key.objectid;
4857 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4858 ret = btrfs_next_item(root, path);
4864 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4865 isize = btrfs_inode_size(node, ii);
4866 nbytes = btrfs_inode_nbytes(node, ii);
4867 mode = btrfs_inode_mode(node, ii);
4868 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4869 nlink = btrfs_inode_nlink(node, ii);
4870 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4873 ret = btrfs_next_item(root, path);
4875 /* out will fill 'err' rusing current statistics */
4877 } else if (ret > 0) {
4882 node = path->nodes[0];
4883 slot = path->slots[0];
4884 btrfs_item_key_to_cpu(node, &key, slot);
4885 if (key.objectid != inode_id)
4889 case BTRFS_INODE_REF_KEY:
4890 ret = check_inode_ref(root, &key, node, slot, &refs,
4894 case BTRFS_INODE_EXTREF_KEY:
4895 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4896 warning("root %llu EXTREF[%llu %llu] isn't supported",
4897 root->objectid, key.objectid,
4899 ret = check_inode_extref(root, &key, node, slot, &refs,
4903 case BTRFS_DIR_ITEM_KEY:
4904 case BTRFS_DIR_INDEX_KEY:
4906 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4907 root->objectid, inode_id,
4908 imode_to_type(mode), key.objectid,
4911 ret = check_dir_item(root, &key, node, slot, &size,
4915 case BTRFS_EXTENT_DATA_KEY:
4917 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4918 root->objectid, inode_id, key.objectid,
4921 ret = check_file_extent(root, &key, node, slot,
4922 nodatasum, &extent_size,
4926 case BTRFS_XATTR_ITEM_KEY:
4929 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4930 key.objectid, key.type, key.offset);
4935 /* verify INODE_ITEM nlink/isize/nbytes */
4938 err |= LINK_COUNT_ERROR;
4939 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4940 root->objectid, inode_id, nlink);
4944 * Just a warning, as dir inode nbytes is just an
4945 * instructive value.
4947 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4948 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4949 root->objectid, inode_id, root->nodesize);
4952 if (isize != size) {
4954 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4955 root->objectid, inode_id, isize, size);
4958 if (nlink != refs) {
4959 err |= LINK_COUNT_ERROR;
4960 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4961 root->objectid, inode_id, nlink, refs);
4962 } else if (!nlink) {
4966 if (!nbytes && !no_holes && extent_end < isize) {
4967 err |= NBYTES_ERROR;
4968 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4969 root->objectid, inode_id, isize);
4972 if (nbytes != extent_size) {
4973 err |= NBYTES_ERROR;
4974 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4975 root->objectid, inode_id, nbytes, extent_size);
4982 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4984 struct btrfs_path path;
4985 struct btrfs_key key;
4989 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4990 key.type = BTRFS_INODE_ITEM_KEY;
4993 /* For root being dropped, we don't need to check first inode */
4994 if (btrfs_root_refs(&root->root_item) == 0 &&
4995 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4999 btrfs_init_path(&path);
5001 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5006 err |= INODE_ITEM_MISSING;
5007 error("first inode item of root %llu is missing",
5011 err |= check_inode_item(root, &path, ext_ref);
5016 btrfs_release_path(&path);
5021 * Iterate all item on the tree and call check_inode_item() to check.
5023 * @root: the root of the tree to be checked.
5024 * @ext_ref: the EXTENDED_IREF feature
5026 * Return 0 if no error found.
5027 * Return <0 for error.
5029 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5031 struct btrfs_path path;
5032 struct node_refs nrefs;
5033 struct btrfs_root_item *root_item = &root->root_item;
5039 * We need to manually check the first inode item(256)
5040 * As the following traversal function will only start from
5041 * the first inode item in the leaf, if inode item(256) is missing
5042 * we will just skip it forever.
5044 ret = check_fs_first_inode(root, ext_ref);
5048 memset(&nrefs, 0, sizeof(nrefs));
5049 level = btrfs_header_level(root->node);
5050 btrfs_init_path(&path);
5052 if (btrfs_root_refs(root_item) > 0 ||
5053 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5054 path.nodes[level] = root->node;
5055 path.slots[level] = 0;
5056 extent_buffer_get(root->node);
5058 struct btrfs_key key;
5060 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5061 level = root_item->drop_level;
5062 path.lowest_level = level;
5063 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5070 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5073 /* if ret is negative, walk shall stop */
5079 ret = walk_up_tree_v2(root, &path, &level);
5081 /* Normal exit, reset ret to err */
5088 btrfs_release_path(&path);
5093 * Find the relative ref for root_ref and root_backref.
5095 * @root: the root of the root tree.
5096 * @ref_key: the key of the root ref.
5098 * Return 0 if no error occurred.
5100 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5101 struct extent_buffer *node, int slot)
5103 struct btrfs_path path;
5104 struct btrfs_key key;
5105 struct btrfs_root_ref *ref;
5106 struct btrfs_root_ref *backref;
5107 char ref_name[BTRFS_NAME_LEN] = {0};
5108 char backref_name[BTRFS_NAME_LEN] = {0};
5114 u32 backref_namelen;
5119 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5120 ref_dirid = btrfs_root_ref_dirid(node, ref);
5121 ref_seq = btrfs_root_ref_sequence(node, ref);
5122 ref_namelen = btrfs_root_ref_name_len(node, ref);
5124 if (ref_namelen <= BTRFS_NAME_LEN) {
5127 len = BTRFS_NAME_LEN;
5128 warning("%s[%llu %llu] ref_name too long",
5129 ref_key->type == BTRFS_ROOT_REF_KEY ?
5130 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5133 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5135 /* Find relative root_ref */
5136 key.objectid = ref_key->offset;
5137 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5138 key.offset = ref_key->objectid;
5140 btrfs_init_path(&path);
5141 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5143 err |= ROOT_REF_MISSING;
5144 error("%s[%llu %llu] couldn't find relative ref",
5145 ref_key->type == BTRFS_ROOT_REF_KEY ?
5146 "ROOT_REF" : "ROOT_BACKREF",
5147 ref_key->objectid, ref_key->offset);
5151 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5152 struct btrfs_root_ref);
5153 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5154 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5155 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5157 if (backref_namelen <= BTRFS_NAME_LEN) {
5158 len = backref_namelen;
5160 len = BTRFS_NAME_LEN;
5161 warning("%s[%llu %llu] ref_name too long",
5162 key.type == BTRFS_ROOT_REF_KEY ?
5163 "ROOT_REF" : "ROOT_BACKREF",
5164 key.objectid, key.offset);
5166 read_extent_buffer(path.nodes[0], backref_name,
5167 (unsigned long)(backref + 1), len);
5169 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5170 ref_namelen != backref_namelen ||
5171 strncmp(ref_name, backref_name, len)) {
5172 err |= ROOT_REF_MISMATCH;
5173 error("%s[%llu %llu] mismatch relative ref",
5174 ref_key->type == BTRFS_ROOT_REF_KEY ?
5175 "ROOT_REF" : "ROOT_BACKREF",
5176 ref_key->objectid, ref_key->offset);
5179 btrfs_release_path(&path);
5184 * Check all fs/file tree in low_memory mode.
5186 * 1. for fs tree root item, call check_fs_root_v2()
5187 * 2. for fs tree root ref/backref, call check_root_ref()
5189 * Return 0 if no error occurred.
5191 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5193 struct btrfs_root *tree_root = fs_info->tree_root;
5194 struct btrfs_root *cur_root = NULL;
5195 struct btrfs_path path;
5196 struct btrfs_key key;
5197 struct extent_buffer *node;
5198 unsigned int ext_ref;
5203 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5205 btrfs_init_path(&path);
5206 key.objectid = BTRFS_FS_TREE_OBJECTID;
5208 key.type = BTRFS_ROOT_ITEM_KEY;
5210 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5214 } else if (ret > 0) {
5220 node = path.nodes[0];
5221 slot = path.slots[0];
5222 btrfs_item_key_to_cpu(node, &key, slot);
5223 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5225 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5226 fs_root_objectid(key.objectid)) {
5227 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5228 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5231 key.offset = (u64)-1;
5232 cur_root = btrfs_read_fs_root(fs_info, &key);
5235 if (IS_ERR(cur_root)) {
5236 error("Fail to read fs/subvol tree: %lld",
5242 ret = check_fs_root_v2(cur_root, ext_ref);
5245 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5246 btrfs_free_fs_root(cur_root);
5247 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5248 key.type == BTRFS_ROOT_BACKREF_KEY) {
5249 ret = check_root_ref(tree_root, &key, node, slot);
5253 ret = btrfs_next_item(tree_root, &path);
5263 btrfs_release_path(&path);
5267 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5269 struct list_head *cur = rec->backrefs.next;
5270 struct extent_backref *back;
5271 struct tree_backref *tback;
5272 struct data_backref *dback;
5276 while(cur != &rec->backrefs) {
5277 back = to_extent_backref(cur);
5279 if (!back->found_extent_tree) {
5283 if (back->is_data) {
5284 dback = to_data_backref(back);
5285 fprintf(stderr, "Backref %llu %s %llu"
5286 " owner %llu offset %llu num_refs %lu"
5287 " not found in extent tree\n",
5288 (unsigned long long)rec->start,
5289 back->full_backref ?
5291 back->full_backref ?
5292 (unsigned long long)dback->parent:
5293 (unsigned long long)dback->root,
5294 (unsigned long long)dback->owner,
5295 (unsigned long long)dback->offset,
5296 (unsigned long)dback->num_refs);
5298 tback = to_tree_backref(back);
5299 fprintf(stderr, "Backref %llu parent %llu"
5300 " root %llu not found in extent tree\n",
5301 (unsigned long long)rec->start,
5302 (unsigned long long)tback->parent,
5303 (unsigned long long)tback->root);
5306 if (!back->is_data && !back->found_ref) {
5310 tback = to_tree_backref(back);
5311 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5312 (unsigned long long)rec->start,
5313 back->full_backref ? "parent" : "root",
5314 back->full_backref ?
5315 (unsigned long long)tback->parent :
5316 (unsigned long long)tback->root, back);
5318 if (back->is_data) {
5319 dback = to_data_backref(back);
5320 if (dback->found_ref != dback->num_refs) {
5324 fprintf(stderr, "Incorrect local backref count"
5325 " on %llu %s %llu owner %llu"
5326 " offset %llu found %u wanted %u back %p\n",
5327 (unsigned long long)rec->start,
5328 back->full_backref ?
5330 back->full_backref ?
5331 (unsigned long long)dback->parent:
5332 (unsigned long long)dback->root,
5333 (unsigned long long)dback->owner,
5334 (unsigned long long)dback->offset,
5335 dback->found_ref, dback->num_refs, back);
5337 if (dback->disk_bytenr != rec->start) {
5341 fprintf(stderr, "Backref disk bytenr does not"
5342 " match extent record, bytenr=%llu, "
5343 "ref bytenr=%llu\n",
5344 (unsigned long long)rec->start,
5345 (unsigned long long)dback->disk_bytenr);
5348 if (dback->bytes != rec->nr) {
5352 fprintf(stderr, "Backref bytes do not match "
5353 "extent backref, bytenr=%llu, ref "
5354 "bytes=%llu, backref bytes=%llu\n",
5355 (unsigned long long)rec->start,
5356 (unsigned long long)rec->nr,
5357 (unsigned long long)dback->bytes);
5360 if (!back->is_data) {
5363 dback = to_data_backref(back);
5364 found += dback->found_ref;
5367 if (found != rec->refs) {
5371 fprintf(stderr, "Incorrect global backref count "
5372 "on %llu found %llu wanted %llu\n",
5373 (unsigned long long)rec->start,
5374 (unsigned long long)found,
5375 (unsigned long long)rec->refs);
5381 static int free_all_extent_backrefs(struct extent_record *rec)
5383 struct extent_backref *back;
5384 struct list_head *cur;
5385 while (!list_empty(&rec->backrefs)) {
5386 cur = rec->backrefs.next;
5387 back = to_extent_backref(cur);
5394 static void free_extent_record_cache(struct cache_tree *extent_cache)
5396 struct cache_extent *cache;
5397 struct extent_record *rec;
5400 cache = first_cache_extent(extent_cache);
5403 rec = container_of(cache, struct extent_record, cache);
5404 remove_cache_extent(extent_cache, cache);
5405 free_all_extent_backrefs(rec);
5410 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5411 struct extent_record *rec)
5413 if (rec->content_checked && rec->owner_ref_checked &&
5414 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5415 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5416 !rec->bad_full_backref && !rec->crossing_stripes &&
5417 !rec->wrong_chunk_type) {
5418 remove_cache_extent(extent_cache, &rec->cache);
5419 free_all_extent_backrefs(rec);
5420 list_del_init(&rec->list);
5426 static int check_owner_ref(struct btrfs_root *root,
5427 struct extent_record *rec,
5428 struct extent_buffer *buf)
5430 struct extent_backref *node;
5431 struct tree_backref *back;
5432 struct btrfs_root *ref_root;
5433 struct btrfs_key key;
5434 struct btrfs_path path;
5435 struct extent_buffer *parent;
5440 list_for_each_entry(node, &rec->backrefs, list) {
5443 if (!node->found_ref)
5445 if (node->full_backref)
5447 back = to_tree_backref(node);
5448 if (btrfs_header_owner(buf) == back->root)
5451 BUG_ON(rec->is_root);
5453 /* try to find the block by search corresponding fs tree */
5454 key.objectid = btrfs_header_owner(buf);
5455 key.type = BTRFS_ROOT_ITEM_KEY;
5456 key.offset = (u64)-1;
5458 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5459 if (IS_ERR(ref_root))
5462 level = btrfs_header_level(buf);
5464 btrfs_item_key_to_cpu(buf, &key, 0);
5466 btrfs_node_key_to_cpu(buf, &key, 0);
5468 btrfs_init_path(&path);
5469 path.lowest_level = level + 1;
5470 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5474 parent = path.nodes[level + 1];
5475 if (parent && buf->start == btrfs_node_blockptr(parent,
5476 path.slots[level + 1]))
5479 btrfs_release_path(&path);
5480 return found ? 0 : 1;
5483 static int is_extent_tree_record(struct extent_record *rec)
5485 struct list_head *cur = rec->backrefs.next;
5486 struct extent_backref *node;
5487 struct tree_backref *back;
5490 while(cur != &rec->backrefs) {
5491 node = to_extent_backref(cur);
5495 back = to_tree_backref(node);
5496 if (node->full_backref)
5498 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5505 static int record_bad_block_io(struct btrfs_fs_info *info,
5506 struct cache_tree *extent_cache,
5509 struct extent_record *rec;
5510 struct cache_extent *cache;
5511 struct btrfs_key key;
5513 cache = lookup_cache_extent(extent_cache, start, len);
5517 rec = container_of(cache, struct extent_record, cache);
5518 if (!is_extent_tree_record(rec))
5521 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5522 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5525 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5526 struct extent_buffer *buf, int slot)
5528 if (btrfs_header_level(buf)) {
5529 struct btrfs_key_ptr ptr1, ptr2;
5531 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5532 sizeof(struct btrfs_key_ptr));
5533 read_extent_buffer(buf, &ptr2,
5534 btrfs_node_key_ptr_offset(slot + 1),
5535 sizeof(struct btrfs_key_ptr));
5536 write_extent_buffer(buf, &ptr1,
5537 btrfs_node_key_ptr_offset(slot + 1),
5538 sizeof(struct btrfs_key_ptr));
5539 write_extent_buffer(buf, &ptr2,
5540 btrfs_node_key_ptr_offset(slot),
5541 sizeof(struct btrfs_key_ptr));
5543 struct btrfs_disk_key key;
5544 btrfs_node_key(buf, &key, 0);
5545 btrfs_fixup_low_keys(root, path, &key,
5546 btrfs_header_level(buf) + 1);
5549 struct btrfs_item *item1, *item2;
5550 struct btrfs_key k1, k2;
5551 char *item1_data, *item2_data;
5552 u32 item1_offset, item2_offset, item1_size, item2_size;
5554 item1 = btrfs_item_nr(slot);
5555 item2 = btrfs_item_nr(slot + 1);
5556 btrfs_item_key_to_cpu(buf, &k1, slot);
5557 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5558 item1_offset = btrfs_item_offset(buf, item1);
5559 item2_offset = btrfs_item_offset(buf, item2);
5560 item1_size = btrfs_item_size(buf, item1);
5561 item2_size = btrfs_item_size(buf, item2);
5563 item1_data = malloc(item1_size);
5566 item2_data = malloc(item2_size);
5572 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5573 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5575 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5576 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5580 btrfs_set_item_offset(buf, item1, item2_offset);
5581 btrfs_set_item_offset(buf, item2, item1_offset);
5582 btrfs_set_item_size(buf, item1, item2_size);
5583 btrfs_set_item_size(buf, item2, item1_size);
5585 path->slots[0] = slot;
5586 btrfs_set_item_key_unsafe(root, path, &k2);
5587 path->slots[0] = slot + 1;
5588 btrfs_set_item_key_unsafe(root, path, &k1);
5593 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5595 struct extent_buffer *buf;
5596 struct btrfs_key k1, k2;
5598 int level = path->lowest_level;
5601 buf = path->nodes[level];
5602 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5604 btrfs_node_key_to_cpu(buf, &k1, i);
5605 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5607 btrfs_item_key_to_cpu(buf, &k1, i);
5608 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5610 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5612 ret = swap_values(root, path, buf, i);
5615 btrfs_mark_buffer_dirty(buf);
5621 static int delete_bogus_item(struct btrfs_root *root,
5622 struct btrfs_path *path,
5623 struct extent_buffer *buf, int slot)
5625 struct btrfs_key key;
5626 int nritems = btrfs_header_nritems(buf);
5628 btrfs_item_key_to_cpu(buf, &key, slot);
5630 /* These are all the keys we can deal with missing. */
5631 if (key.type != BTRFS_DIR_INDEX_KEY &&
5632 key.type != BTRFS_EXTENT_ITEM_KEY &&
5633 key.type != BTRFS_METADATA_ITEM_KEY &&
5634 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5635 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5638 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5639 (unsigned long long)key.objectid, key.type,
5640 (unsigned long long)key.offset, slot, buf->start);
5641 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5642 btrfs_item_nr_offset(slot + 1),
5643 sizeof(struct btrfs_item) *
5644 (nritems - slot - 1));
5645 btrfs_set_header_nritems(buf, nritems - 1);
5647 struct btrfs_disk_key disk_key;
5649 btrfs_item_key(buf, &disk_key, 0);
5650 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5652 btrfs_mark_buffer_dirty(buf);
5656 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5658 struct extent_buffer *buf;
5662 /* We should only get this for leaves */
5663 BUG_ON(path->lowest_level);
5664 buf = path->nodes[0];
5666 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5667 unsigned int shift = 0, offset;
5669 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5670 BTRFS_LEAF_DATA_SIZE(root)) {
5671 if (btrfs_item_end_nr(buf, i) >
5672 BTRFS_LEAF_DATA_SIZE(root)) {
5673 ret = delete_bogus_item(root, path, buf, i);
5676 fprintf(stderr, "item is off the end of the "
5677 "leaf, can't fix\n");
5681 shift = BTRFS_LEAF_DATA_SIZE(root) -
5682 btrfs_item_end_nr(buf, i);
5683 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5684 btrfs_item_offset_nr(buf, i - 1)) {
5685 if (btrfs_item_end_nr(buf, i) >
5686 btrfs_item_offset_nr(buf, i - 1)) {
5687 ret = delete_bogus_item(root, path, buf, i);
5690 fprintf(stderr, "items overlap, can't fix\n");
5694 shift = btrfs_item_offset_nr(buf, i - 1) -
5695 btrfs_item_end_nr(buf, i);
5700 printf("Shifting item nr %d by %u bytes in block %llu\n",
5701 i, shift, (unsigned long long)buf->start);
5702 offset = btrfs_item_offset_nr(buf, i);
5703 memmove_extent_buffer(buf,
5704 btrfs_leaf_data(buf) + offset + shift,
5705 btrfs_leaf_data(buf) + offset,
5706 btrfs_item_size_nr(buf, i));
5707 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5709 btrfs_mark_buffer_dirty(buf);
5713 * We may have moved things, in which case we want to exit so we don't
5714 * write those changes out. Once we have proper abort functionality in
5715 * progs this can be changed to something nicer.
5722 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5723 * then just return -EIO.
5725 static int try_to_fix_bad_block(struct btrfs_root *root,
5726 struct extent_buffer *buf,
5727 enum btrfs_tree_block_status status)
5729 struct btrfs_trans_handle *trans;
5730 struct ulist *roots;
5731 struct ulist_node *node;
5732 struct btrfs_root *search_root;
5733 struct btrfs_path path;
5734 struct ulist_iterator iter;
5735 struct btrfs_key root_key, key;
5738 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5739 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5742 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5746 btrfs_init_path(&path);
5747 ULIST_ITER_INIT(&iter);
5748 while ((node = ulist_next(roots, &iter))) {
5749 root_key.objectid = node->val;
5750 root_key.type = BTRFS_ROOT_ITEM_KEY;
5751 root_key.offset = (u64)-1;
5753 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5760 trans = btrfs_start_transaction(search_root, 0);
5761 if (IS_ERR(trans)) {
5762 ret = PTR_ERR(trans);
5766 path.lowest_level = btrfs_header_level(buf);
5767 path.skip_check_block = 1;
5768 if (path.lowest_level)
5769 btrfs_node_key_to_cpu(buf, &key, 0);
5771 btrfs_item_key_to_cpu(buf, &key, 0);
5772 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5775 btrfs_commit_transaction(trans, search_root);
5778 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5779 ret = fix_key_order(search_root, &path);
5780 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5781 ret = fix_item_offset(search_root, &path);
5783 btrfs_commit_transaction(trans, search_root);
5786 btrfs_release_path(&path);
5787 btrfs_commit_transaction(trans, search_root);
5790 btrfs_release_path(&path);
5794 static int check_block(struct btrfs_root *root,
5795 struct cache_tree *extent_cache,
5796 struct extent_buffer *buf, u64 flags)
5798 struct extent_record *rec;
5799 struct cache_extent *cache;
5800 struct btrfs_key key;
5801 enum btrfs_tree_block_status status;
5805 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5808 rec = container_of(cache, struct extent_record, cache);
5809 rec->generation = btrfs_header_generation(buf);
5811 level = btrfs_header_level(buf);
5812 if (btrfs_header_nritems(buf) > 0) {
5815 btrfs_item_key_to_cpu(buf, &key, 0);
5817 btrfs_node_key_to_cpu(buf, &key, 0);
5819 rec->info_objectid = key.objectid;
5821 rec->info_level = level;
5823 if (btrfs_is_leaf(buf))
5824 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5826 status = btrfs_check_node(root, &rec->parent_key, buf);
5828 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5830 status = try_to_fix_bad_block(root, buf, status);
5831 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5833 fprintf(stderr, "bad block %llu\n",
5834 (unsigned long long)buf->start);
5837 * Signal to callers we need to start the scan over
5838 * again since we'll have cowed blocks.
5843 rec->content_checked = 1;
5844 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5845 rec->owner_ref_checked = 1;
5847 ret = check_owner_ref(root, rec, buf);
5849 rec->owner_ref_checked = 1;
5853 maybe_free_extent_rec(extent_cache, rec);
5857 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5858 u64 parent, u64 root)
5860 struct list_head *cur = rec->backrefs.next;
5861 struct extent_backref *node;
5862 struct tree_backref *back;
5864 while(cur != &rec->backrefs) {
5865 node = to_extent_backref(cur);
5869 back = to_tree_backref(node);
5871 if (!node->full_backref)
5873 if (parent == back->parent)
5876 if (node->full_backref)
5878 if (back->root == root)
5885 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5886 u64 parent, u64 root)
5888 struct tree_backref *ref = malloc(sizeof(*ref));
5892 memset(&ref->node, 0, sizeof(ref->node));
5894 ref->parent = parent;
5895 ref->node.full_backref = 1;
5898 ref->node.full_backref = 0;
5900 list_add_tail(&ref->node.list, &rec->backrefs);
5905 static struct data_backref *find_data_backref(struct extent_record *rec,
5906 u64 parent, u64 root,
5907 u64 owner, u64 offset,
5909 u64 disk_bytenr, u64 bytes)
5911 struct list_head *cur = rec->backrefs.next;
5912 struct extent_backref *node;
5913 struct data_backref *back;
5915 while(cur != &rec->backrefs) {
5916 node = to_extent_backref(cur);
5920 back = to_data_backref(node);
5922 if (!node->full_backref)
5924 if (parent == back->parent)
5927 if (node->full_backref)
5929 if (back->root == root && back->owner == owner &&
5930 back->offset == offset) {
5931 if (found_ref && node->found_ref &&
5932 (back->bytes != bytes ||
5933 back->disk_bytenr != disk_bytenr))
5942 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5943 u64 parent, u64 root,
5944 u64 owner, u64 offset,
5947 struct data_backref *ref = malloc(sizeof(*ref));
5951 memset(&ref->node, 0, sizeof(ref->node));
5952 ref->node.is_data = 1;
5955 ref->parent = parent;
5958 ref->node.full_backref = 1;
5962 ref->offset = offset;
5963 ref->node.full_backref = 0;
5965 ref->bytes = max_size;
5968 list_add_tail(&ref->node.list, &rec->backrefs);
5969 if (max_size > rec->max_size)
5970 rec->max_size = max_size;
5974 /* Check if the type of extent matches with its chunk */
5975 static void check_extent_type(struct extent_record *rec)
5977 struct btrfs_block_group_cache *bg_cache;
5979 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5983 /* data extent, check chunk directly*/
5984 if (!rec->metadata) {
5985 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5986 rec->wrong_chunk_type = 1;
5990 /* metadata extent, check the obvious case first */
5991 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5992 BTRFS_BLOCK_GROUP_METADATA))) {
5993 rec->wrong_chunk_type = 1;
5998 * Check SYSTEM extent, as it's also marked as metadata, we can only
5999 * make sure it's a SYSTEM extent by its backref
6001 if (!list_empty(&rec->backrefs)) {
6002 struct extent_backref *node;
6003 struct tree_backref *tback;
6006 node = to_extent_backref(rec->backrefs.next);
6007 if (node->is_data) {
6008 /* tree block shouldn't have data backref */
6009 rec->wrong_chunk_type = 1;
6012 tback = container_of(node, struct tree_backref, node);
6014 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6015 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6017 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6018 if (!(bg_cache->flags & bg_type))
6019 rec->wrong_chunk_type = 1;
6024 * Allocate a new extent record, fill default values from @tmpl and insert int
6025 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6026 * the cache, otherwise it fails.
6028 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6029 struct extent_record *tmpl)
6031 struct extent_record *rec;
6034 BUG_ON(tmpl->max_size == 0);
6035 rec = malloc(sizeof(*rec));
6038 rec->start = tmpl->start;
6039 rec->max_size = tmpl->max_size;
6040 rec->nr = max(tmpl->nr, tmpl->max_size);
6041 rec->found_rec = tmpl->found_rec;
6042 rec->content_checked = tmpl->content_checked;
6043 rec->owner_ref_checked = tmpl->owner_ref_checked;
6044 rec->num_duplicates = 0;
6045 rec->metadata = tmpl->metadata;
6046 rec->flag_block_full_backref = FLAG_UNSET;
6047 rec->bad_full_backref = 0;
6048 rec->crossing_stripes = 0;
6049 rec->wrong_chunk_type = 0;
6050 rec->is_root = tmpl->is_root;
6051 rec->refs = tmpl->refs;
6052 rec->extent_item_refs = tmpl->extent_item_refs;
6053 rec->parent_generation = tmpl->parent_generation;
6054 INIT_LIST_HEAD(&rec->backrefs);
6055 INIT_LIST_HEAD(&rec->dups);
6056 INIT_LIST_HEAD(&rec->list);
6057 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6058 rec->cache.start = tmpl->start;
6059 rec->cache.size = tmpl->nr;
6060 ret = insert_cache_extent(extent_cache, &rec->cache);
6065 bytes_used += rec->nr;
6068 rec->crossing_stripes = check_crossing_stripes(global_info,
6069 rec->start, global_info->tree_root->nodesize);
6070 check_extent_type(rec);
6075 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6077 * - refs - if found, increase refs
6078 * - is_root - if found, set
6079 * - content_checked - if found, set
6080 * - owner_ref_checked - if found, set
6082 * If not found, create a new one, initialize and insert.
6084 static int add_extent_rec(struct cache_tree *extent_cache,
6085 struct extent_record *tmpl)
6087 struct extent_record *rec;
6088 struct cache_extent *cache;
6092 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6094 rec = container_of(cache, struct extent_record, cache);
6098 rec->nr = max(tmpl->nr, tmpl->max_size);
6101 * We need to make sure to reset nr to whatever the extent
6102 * record says was the real size, this way we can compare it to
6105 if (tmpl->found_rec) {
6106 if (tmpl->start != rec->start || rec->found_rec) {
6107 struct extent_record *tmp;
6110 if (list_empty(&rec->list))
6111 list_add_tail(&rec->list,
6112 &duplicate_extents);
6115 * We have to do this song and dance in case we
6116 * find an extent record that falls inside of
6117 * our current extent record but does not have
6118 * the same objectid.
6120 tmp = malloc(sizeof(*tmp));
6123 tmp->start = tmpl->start;
6124 tmp->max_size = tmpl->max_size;
6127 tmp->metadata = tmpl->metadata;
6128 tmp->extent_item_refs = tmpl->extent_item_refs;
6129 INIT_LIST_HEAD(&tmp->list);
6130 list_add_tail(&tmp->list, &rec->dups);
6131 rec->num_duplicates++;
6138 if (tmpl->extent_item_refs && !dup) {
6139 if (rec->extent_item_refs) {
6140 fprintf(stderr, "block %llu rec "
6141 "extent_item_refs %llu, passed %llu\n",
6142 (unsigned long long)tmpl->start,
6143 (unsigned long long)
6144 rec->extent_item_refs,
6145 (unsigned long long)tmpl->extent_item_refs);
6147 rec->extent_item_refs = tmpl->extent_item_refs;
6151 if (tmpl->content_checked)
6152 rec->content_checked = 1;
6153 if (tmpl->owner_ref_checked)
6154 rec->owner_ref_checked = 1;
6155 memcpy(&rec->parent_key, &tmpl->parent_key,
6156 sizeof(tmpl->parent_key));
6157 if (tmpl->parent_generation)
6158 rec->parent_generation = tmpl->parent_generation;
6159 if (rec->max_size < tmpl->max_size)
6160 rec->max_size = tmpl->max_size;
6163 * A metadata extent can't cross stripe_len boundary, otherwise
6164 * kernel scrub won't be able to handle it.
6165 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6169 rec->crossing_stripes = check_crossing_stripes(
6170 global_info, rec->start,
6171 global_info->tree_root->nodesize);
6172 check_extent_type(rec);
6173 maybe_free_extent_rec(extent_cache, rec);
6177 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6182 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6183 u64 parent, u64 root, int found_ref)
6185 struct extent_record *rec;
6186 struct tree_backref *back;
6187 struct cache_extent *cache;
6190 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6192 struct extent_record tmpl;
6194 memset(&tmpl, 0, sizeof(tmpl));
6195 tmpl.start = bytenr;
6200 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6204 /* really a bug in cache_extent implement now */
6205 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6210 rec = container_of(cache, struct extent_record, cache);
6211 if (rec->start != bytenr) {
6213 * Several cause, from unaligned bytenr to over lapping extents
6218 back = find_tree_backref(rec, parent, root);
6220 back = alloc_tree_backref(rec, parent, root);
6226 if (back->node.found_ref) {
6227 fprintf(stderr, "Extent back ref already exists "
6228 "for %llu parent %llu root %llu \n",
6229 (unsigned long long)bytenr,
6230 (unsigned long long)parent,
6231 (unsigned long long)root);
6233 back->node.found_ref = 1;
6235 if (back->node.found_extent_tree) {
6236 fprintf(stderr, "Extent back ref already exists "
6237 "for %llu parent %llu root %llu \n",
6238 (unsigned long long)bytenr,
6239 (unsigned long long)parent,
6240 (unsigned long long)root);
6242 back->node.found_extent_tree = 1;
6244 check_extent_type(rec);
6245 maybe_free_extent_rec(extent_cache, rec);
6249 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6250 u64 parent, u64 root, u64 owner, u64 offset,
6251 u32 num_refs, int found_ref, u64 max_size)
6253 struct extent_record *rec;
6254 struct data_backref *back;
6255 struct cache_extent *cache;
6258 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6260 struct extent_record tmpl;
6262 memset(&tmpl, 0, sizeof(tmpl));
6263 tmpl.start = bytenr;
6265 tmpl.max_size = max_size;
6267 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6271 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6276 rec = container_of(cache, struct extent_record, cache);
6277 if (rec->max_size < max_size)
6278 rec->max_size = max_size;
6281 * If found_ref is set then max_size is the real size and must match the
6282 * existing refs. So if we have already found a ref then we need to
6283 * make sure that this ref matches the existing one, otherwise we need
6284 * to add a new backref so we can notice that the backrefs don't match
6285 * and we need to figure out who is telling the truth. This is to
6286 * account for that awful fsync bug I introduced where we'd end up with
6287 * a btrfs_file_extent_item that would have its length include multiple
6288 * prealloc extents or point inside of a prealloc extent.
6290 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6293 back = alloc_data_backref(rec, parent, root, owner, offset,
6299 BUG_ON(num_refs != 1);
6300 if (back->node.found_ref)
6301 BUG_ON(back->bytes != max_size);
6302 back->node.found_ref = 1;
6303 back->found_ref += 1;
6304 back->bytes = max_size;
6305 back->disk_bytenr = bytenr;
6307 rec->content_checked = 1;
6308 rec->owner_ref_checked = 1;
6310 if (back->node.found_extent_tree) {
6311 fprintf(stderr, "Extent back ref already exists "
6312 "for %llu parent %llu root %llu "
6313 "owner %llu offset %llu num_refs %lu\n",
6314 (unsigned long long)bytenr,
6315 (unsigned long long)parent,
6316 (unsigned long long)root,
6317 (unsigned long long)owner,
6318 (unsigned long long)offset,
6319 (unsigned long)num_refs);
6321 back->num_refs = num_refs;
6322 back->node.found_extent_tree = 1;
6324 maybe_free_extent_rec(extent_cache, rec);
6328 static int add_pending(struct cache_tree *pending,
6329 struct cache_tree *seen, u64 bytenr, u32 size)
6332 ret = add_cache_extent(seen, bytenr, size);
6335 add_cache_extent(pending, bytenr, size);
6339 static int pick_next_pending(struct cache_tree *pending,
6340 struct cache_tree *reada,
6341 struct cache_tree *nodes,
6342 u64 last, struct block_info *bits, int bits_nr,
6345 unsigned long node_start = last;
6346 struct cache_extent *cache;
6349 cache = search_cache_extent(reada, 0);
6351 bits[0].start = cache->start;
6352 bits[0].size = cache->size;
6357 if (node_start > 32768)
6358 node_start -= 32768;
6360 cache = search_cache_extent(nodes, node_start);
6362 cache = search_cache_extent(nodes, 0);
6365 cache = search_cache_extent(pending, 0);
6370 bits[ret].start = cache->start;
6371 bits[ret].size = cache->size;
6372 cache = next_cache_extent(cache);
6374 } while (cache && ret < bits_nr);
6380 bits[ret].start = cache->start;
6381 bits[ret].size = cache->size;
6382 cache = next_cache_extent(cache);
6384 } while (cache && ret < bits_nr);
6386 if (bits_nr - ret > 8) {
6387 u64 lookup = bits[0].start + bits[0].size;
6388 struct cache_extent *next;
6389 next = search_cache_extent(pending, lookup);
6391 if (next->start - lookup > 32768)
6393 bits[ret].start = next->start;
6394 bits[ret].size = next->size;
6395 lookup = next->start + next->size;
6399 next = next_cache_extent(next);
6407 static void free_chunk_record(struct cache_extent *cache)
6409 struct chunk_record *rec;
6411 rec = container_of(cache, struct chunk_record, cache);
6412 list_del_init(&rec->list);
6413 list_del_init(&rec->dextents);
6417 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6419 cache_tree_free_extents(chunk_cache, free_chunk_record);
6422 static void free_device_record(struct rb_node *node)
6424 struct device_record *rec;
6426 rec = container_of(node, struct device_record, node);
6430 FREE_RB_BASED_TREE(device_cache, free_device_record);
6432 int insert_block_group_record(struct block_group_tree *tree,
6433 struct block_group_record *bg_rec)
6437 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6441 list_add_tail(&bg_rec->list, &tree->block_groups);
6445 static void free_block_group_record(struct cache_extent *cache)
6447 struct block_group_record *rec;
6449 rec = container_of(cache, struct block_group_record, cache);
6450 list_del_init(&rec->list);
6454 void free_block_group_tree(struct block_group_tree *tree)
6456 cache_tree_free_extents(&tree->tree, free_block_group_record);
6459 int insert_device_extent_record(struct device_extent_tree *tree,
6460 struct device_extent_record *de_rec)
6465 * Device extent is a bit different from the other extents, because
6466 * the extents which belong to the different devices may have the
6467 * same start and size, so we need use the special extent cache
6468 * search/insert functions.
6470 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6474 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6475 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6479 static void free_device_extent_record(struct cache_extent *cache)
6481 struct device_extent_record *rec;
6483 rec = container_of(cache, struct device_extent_record, cache);
6484 if (!list_empty(&rec->chunk_list))
6485 list_del_init(&rec->chunk_list);
6486 if (!list_empty(&rec->device_list))
6487 list_del_init(&rec->device_list);
6491 void free_device_extent_tree(struct device_extent_tree *tree)
6493 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6496 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6497 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6498 struct extent_buffer *leaf, int slot)
6500 struct btrfs_extent_ref_v0 *ref0;
6501 struct btrfs_key key;
6504 btrfs_item_key_to_cpu(leaf, &key, slot);
6505 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6506 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6507 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6510 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6511 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6517 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6518 struct btrfs_key *key,
6521 struct btrfs_chunk *ptr;
6522 struct chunk_record *rec;
6525 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6526 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6528 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6530 fprintf(stderr, "memory allocation failed\n");
6534 INIT_LIST_HEAD(&rec->list);
6535 INIT_LIST_HEAD(&rec->dextents);
6538 rec->cache.start = key->offset;
6539 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6541 rec->generation = btrfs_header_generation(leaf);
6543 rec->objectid = key->objectid;
6544 rec->type = key->type;
6545 rec->offset = key->offset;
6547 rec->length = rec->cache.size;
6548 rec->owner = btrfs_chunk_owner(leaf, ptr);
6549 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6550 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6551 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6552 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6553 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6554 rec->num_stripes = num_stripes;
6555 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6557 for (i = 0; i < rec->num_stripes; ++i) {
6558 rec->stripes[i].devid =
6559 btrfs_stripe_devid_nr(leaf, ptr, i);
6560 rec->stripes[i].offset =
6561 btrfs_stripe_offset_nr(leaf, ptr, i);
6562 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6563 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6570 static int process_chunk_item(struct cache_tree *chunk_cache,
6571 struct btrfs_key *key, struct extent_buffer *eb,
6574 struct chunk_record *rec;
6575 struct btrfs_chunk *chunk;
6578 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6580 * Do extra check for this chunk item,
6582 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6583 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6584 * and owner<->key_type check.
6586 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6589 error("chunk(%llu, %llu) is not valid, ignore it",
6590 key->offset, btrfs_chunk_length(eb, chunk));
6593 rec = btrfs_new_chunk_record(eb, key, slot);
6594 ret = insert_cache_extent(chunk_cache, &rec->cache);
6596 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6597 rec->offset, rec->length);
6604 static int process_device_item(struct rb_root *dev_cache,
6605 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6607 struct btrfs_dev_item *ptr;
6608 struct device_record *rec;
6611 ptr = btrfs_item_ptr(eb,
6612 slot, struct btrfs_dev_item);
6614 rec = malloc(sizeof(*rec));
6616 fprintf(stderr, "memory allocation failed\n");
6620 rec->devid = key->offset;
6621 rec->generation = btrfs_header_generation(eb);
6623 rec->objectid = key->objectid;
6624 rec->type = key->type;
6625 rec->offset = key->offset;
6627 rec->devid = btrfs_device_id(eb, ptr);
6628 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6629 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6631 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6633 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6640 struct block_group_record *
6641 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6644 struct btrfs_block_group_item *ptr;
6645 struct block_group_record *rec;
6647 rec = calloc(1, sizeof(*rec));
6649 fprintf(stderr, "memory allocation failed\n");
6653 rec->cache.start = key->objectid;
6654 rec->cache.size = key->offset;
6656 rec->generation = btrfs_header_generation(leaf);
6658 rec->objectid = key->objectid;
6659 rec->type = key->type;
6660 rec->offset = key->offset;
6662 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6663 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6665 INIT_LIST_HEAD(&rec->list);
6670 static int process_block_group_item(struct block_group_tree *block_group_cache,
6671 struct btrfs_key *key,
6672 struct extent_buffer *eb, int slot)
6674 struct block_group_record *rec;
6677 rec = btrfs_new_block_group_record(eb, key, slot);
6678 ret = insert_block_group_record(block_group_cache, rec);
6680 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6681 rec->objectid, rec->offset);
6688 struct device_extent_record *
6689 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6690 struct btrfs_key *key, int slot)
6692 struct device_extent_record *rec;
6693 struct btrfs_dev_extent *ptr;
6695 rec = calloc(1, sizeof(*rec));
6697 fprintf(stderr, "memory allocation failed\n");
6701 rec->cache.objectid = key->objectid;
6702 rec->cache.start = key->offset;
6704 rec->generation = btrfs_header_generation(leaf);
6706 rec->objectid = key->objectid;
6707 rec->type = key->type;
6708 rec->offset = key->offset;
6710 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6711 rec->chunk_objecteid =
6712 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6714 btrfs_dev_extent_chunk_offset(leaf, ptr);
6715 rec->length = btrfs_dev_extent_length(leaf, ptr);
6716 rec->cache.size = rec->length;
6718 INIT_LIST_HEAD(&rec->chunk_list);
6719 INIT_LIST_HEAD(&rec->device_list);
6725 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6726 struct btrfs_key *key, struct extent_buffer *eb,
6729 struct device_extent_record *rec;
6732 rec = btrfs_new_device_extent_record(eb, key, slot);
6733 ret = insert_device_extent_record(dev_extent_cache, rec);
6736 "Device extent[%llu, %llu, %llu] existed.\n",
6737 rec->objectid, rec->offset, rec->length);
6744 static int process_extent_item(struct btrfs_root *root,
6745 struct cache_tree *extent_cache,
6746 struct extent_buffer *eb, int slot)
6748 struct btrfs_extent_item *ei;
6749 struct btrfs_extent_inline_ref *iref;
6750 struct btrfs_extent_data_ref *dref;
6751 struct btrfs_shared_data_ref *sref;
6752 struct btrfs_key key;
6753 struct extent_record tmpl;
6758 u32 item_size = btrfs_item_size_nr(eb, slot);
6764 btrfs_item_key_to_cpu(eb, &key, slot);
6766 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6768 num_bytes = root->nodesize;
6770 num_bytes = key.offset;
6773 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6774 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6775 key.objectid, root->sectorsize);
6778 if (item_size < sizeof(*ei)) {
6779 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6780 struct btrfs_extent_item_v0 *ei0;
6781 BUG_ON(item_size != sizeof(*ei0));
6782 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6783 refs = btrfs_extent_refs_v0(eb, ei0);
6787 memset(&tmpl, 0, sizeof(tmpl));
6788 tmpl.start = key.objectid;
6789 tmpl.nr = num_bytes;
6790 tmpl.extent_item_refs = refs;
6791 tmpl.metadata = metadata;
6793 tmpl.max_size = num_bytes;
6795 return add_extent_rec(extent_cache, &tmpl);
6798 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6799 refs = btrfs_extent_refs(eb, ei);
6800 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6804 if (metadata && num_bytes != root->nodesize) {
6805 error("ignore invalid metadata extent, length %llu does not equal to %u",
6806 num_bytes, root->nodesize);
6809 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6810 error("ignore invalid data extent, length %llu is not aligned to %u",
6811 num_bytes, root->sectorsize);
6815 memset(&tmpl, 0, sizeof(tmpl));
6816 tmpl.start = key.objectid;
6817 tmpl.nr = num_bytes;
6818 tmpl.extent_item_refs = refs;
6819 tmpl.metadata = metadata;
6821 tmpl.max_size = num_bytes;
6822 add_extent_rec(extent_cache, &tmpl);
6824 ptr = (unsigned long)(ei + 1);
6825 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6826 key.type == BTRFS_EXTENT_ITEM_KEY)
6827 ptr += sizeof(struct btrfs_tree_block_info);
6829 end = (unsigned long)ei + item_size;
6831 iref = (struct btrfs_extent_inline_ref *)ptr;
6832 type = btrfs_extent_inline_ref_type(eb, iref);
6833 offset = btrfs_extent_inline_ref_offset(eb, iref);
6835 case BTRFS_TREE_BLOCK_REF_KEY:
6836 ret = add_tree_backref(extent_cache, key.objectid,
6840 "add_tree_backref failed (extent items tree block): %s",
6843 case BTRFS_SHARED_BLOCK_REF_KEY:
6844 ret = add_tree_backref(extent_cache, key.objectid,
6848 "add_tree_backref failed (extent items shared block): %s",
6851 case BTRFS_EXTENT_DATA_REF_KEY:
6852 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6853 add_data_backref(extent_cache, key.objectid, 0,
6854 btrfs_extent_data_ref_root(eb, dref),
6855 btrfs_extent_data_ref_objectid(eb,
6857 btrfs_extent_data_ref_offset(eb, dref),
6858 btrfs_extent_data_ref_count(eb, dref),
6861 case BTRFS_SHARED_DATA_REF_KEY:
6862 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6863 add_data_backref(extent_cache, key.objectid, offset,
6865 btrfs_shared_data_ref_count(eb, sref),
6869 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6870 key.objectid, key.type, num_bytes);
6873 ptr += btrfs_extent_inline_ref_size(type);
6880 static int check_cache_range(struct btrfs_root *root,
6881 struct btrfs_block_group_cache *cache,
6882 u64 offset, u64 bytes)
6884 struct btrfs_free_space *entry;
6890 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6891 bytenr = btrfs_sb_offset(i);
6892 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6893 cache->key.objectid, bytenr, 0,
6894 &logical, &nr, &stripe_len);
6899 if (logical[nr] + stripe_len <= offset)
6901 if (offset + bytes <= logical[nr])
6903 if (logical[nr] == offset) {
6904 if (stripe_len >= bytes) {
6908 bytes -= stripe_len;
6909 offset += stripe_len;
6910 } else if (logical[nr] < offset) {
6911 if (logical[nr] + stripe_len >=
6916 bytes = (offset + bytes) -
6917 (logical[nr] + stripe_len);
6918 offset = logical[nr] + stripe_len;
6921 * Could be tricky, the super may land in the
6922 * middle of the area we're checking. First
6923 * check the easiest case, it's at the end.
6925 if (logical[nr] + stripe_len >=
6927 bytes = logical[nr] - offset;
6931 /* Check the left side */
6932 ret = check_cache_range(root, cache,
6934 logical[nr] - offset);
6940 /* Now we continue with the right side */
6941 bytes = (offset + bytes) -
6942 (logical[nr] + stripe_len);
6943 offset = logical[nr] + stripe_len;
6950 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6952 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6953 offset, offset+bytes);
6957 if (entry->offset != offset) {
6958 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6963 if (entry->bytes != bytes) {
6964 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6965 bytes, entry->bytes, offset);
6969 unlink_free_space(cache->free_space_ctl, entry);
6974 static int verify_space_cache(struct btrfs_root *root,
6975 struct btrfs_block_group_cache *cache)
6977 struct btrfs_path path;
6978 struct extent_buffer *leaf;
6979 struct btrfs_key key;
6983 root = root->fs_info->extent_root;
6985 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6987 btrfs_init_path(&path);
6988 key.objectid = last;
6990 key.type = BTRFS_EXTENT_ITEM_KEY;
6991 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6996 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6997 ret = btrfs_next_leaf(root, &path);
7005 leaf = path.nodes[0];
7006 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7007 if (key.objectid >= cache->key.offset + cache->key.objectid)
7009 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7010 key.type != BTRFS_METADATA_ITEM_KEY) {
7015 if (last == key.objectid) {
7016 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7017 last = key.objectid + key.offset;
7019 last = key.objectid + root->nodesize;
7024 ret = check_cache_range(root, cache, last,
7025 key.objectid - last);
7028 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7029 last = key.objectid + key.offset;
7031 last = key.objectid + root->nodesize;
7035 if (last < cache->key.objectid + cache->key.offset)
7036 ret = check_cache_range(root, cache, last,
7037 cache->key.objectid +
7038 cache->key.offset - last);
7041 btrfs_release_path(&path);
7044 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7045 fprintf(stderr, "There are still entries left in the space "
7053 static int check_space_cache(struct btrfs_root *root)
7055 struct btrfs_block_group_cache *cache;
7056 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7060 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7061 btrfs_super_generation(root->fs_info->super_copy) !=
7062 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7063 printf("cache and super generation don't match, space cache "
7064 "will be invalidated\n");
7068 if (ctx.progress_enabled) {
7069 ctx.tp = TASK_FREE_SPACE;
7070 task_start(ctx.info);
7074 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7078 start = cache->key.objectid + cache->key.offset;
7079 if (!cache->free_space_ctl) {
7080 if (btrfs_init_free_space_ctl(cache,
7081 root->sectorsize)) {
7086 btrfs_remove_free_space_cache(cache);
7089 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7090 ret = exclude_super_stripes(root, cache);
7092 fprintf(stderr, "could not exclude super stripes: %s\n",
7097 ret = load_free_space_tree(root->fs_info, cache);
7098 free_excluded_extents(root, cache);
7100 fprintf(stderr, "could not load free space tree: %s\n",
7107 ret = load_free_space_cache(root->fs_info, cache);
7112 ret = verify_space_cache(root, cache);
7114 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7115 cache->key.objectid);
7120 task_stop(ctx.info);
7122 return error ? -EINVAL : 0;
7125 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7126 u64 num_bytes, unsigned long leaf_offset,
7127 struct extent_buffer *eb) {
7130 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7132 unsigned long csum_offset;
7136 u64 data_checked = 0;
7142 if (num_bytes % root->sectorsize)
7145 data = malloc(num_bytes);
7149 while (offset < num_bytes) {
7152 read_len = num_bytes - offset;
7153 /* read as much space once a time */
7154 ret = read_extent_data(root, data + offset,
7155 bytenr + offset, &read_len, mirror);
7159 /* verify every 4k data's checksum */
7160 while (data_checked < read_len) {
7162 tmp = offset + data_checked;
7164 csum = btrfs_csum_data((char *)data + tmp,
7165 csum, root->sectorsize);
7166 btrfs_csum_final(csum, (u8 *)&csum);
7168 csum_offset = leaf_offset +
7169 tmp / root->sectorsize * csum_size;
7170 read_extent_buffer(eb, (char *)&csum_expected,
7171 csum_offset, csum_size);
7172 /* try another mirror */
7173 if (csum != csum_expected) {
7174 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7175 mirror, bytenr + tmp,
7176 csum, csum_expected);
7177 num_copies = btrfs_num_copies(
7178 &root->fs_info->mapping_tree,
7180 if (mirror < num_copies - 1) {
7185 data_checked += root->sectorsize;
7194 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7197 struct btrfs_path path;
7198 struct extent_buffer *leaf;
7199 struct btrfs_key key;
7202 btrfs_init_path(&path);
7203 key.objectid = bytenr;
7204 key.type = BTRFS_EXTENT_ITEM_KEY;
7205 key.offset = (u64)-1;
7208 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7211 fprintf(stderr, "Error looking up extent record %d\n", ret);
7212 btrfs_release_path(&path);
7215 if (path.slots[0] > 0) {
7218 ret = btrfs_prev_leaf(root, &path);
7221 } else if (ret > 0) {
7228 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7231 * Block group items come before extent items if they have the same
7232 * bytenr, so walk back one more just in case. Dear future traveller,
7233 * first congrats on mastering time travel. Now if it's not too much
7234 * trouble could you go back to 2006 and tell Chris to make the
7235 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7236 * EXTENT_ITEM_KEY please?
7238 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7239 if (path.slots[0] > 0) {
7242 ret = btrfs_prev_leaf(root, &path);
7245 } else if (ret > 0) {
7250 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7254 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7255 ret = btrfs_next_leaf(root, &path);
7257 fprintf(stderr, "Error going to next leaf "
7259 btrfs_release_path(&path);
7265 leaf = path.nodes[0];
7266 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7267 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7271 if (key.objectid + key.offset < bytenr) {
7275 if (key.objectid > bytenr + num_bytes)
7278 if (key.objectid == bytenr) {
7279 if (key.offset >= num_bytes) {
7283 num_bytes -= key.offset;
7284 bytenr += key.offset;
7285 } else if (key.objectid < bytenr) {
7286 if (key.objectid + key.offset >= bytenr + num_bytes) {
7290 num_bytes = (bytenr + num_bytes) -
7291 (key.objectid + key.offset);
7292 bytenr = key.objectid + key.offset;
7294 if (key.objectid + key.offset < bytenr + num_bytes) {
7295 u64 new_start = key.objectid + key.offset;
7296 u64 new_bytes = bytenr + num_bytes - new_start;
7299 * Weird case, the extent is in the middle of
7300 * our range, we'll have to search one side
7301 * and then the other. Not sure if this happens
7302 * in real life, but no harm in coding it up
7303 * anyway just in case.
7305 btrfs_release_path(&path);
7306 ret = check_extent_exists(root, new_start,
7309 fprintf(stderr, "Right section didn't "
7313 num_bytes = key.objectid - bytenr;
7316 num_bytes = key.objectid - bytenr;
7323 if (num_bytes && !ret) {
7324 fprintf(stderr, "There are no extents for csum range "
7325 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7329 btrfs_release_path(&path);
7333 static int check_csums(struct btrfs_root *root)
7335 struct btrfs_path path;
7336 struct extent_buffer *leaf;
7337 struct btrfs_key key;
7338 u64 offset = 0, num_bytes = 0;
7339 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7343 unsigned long leaf_offset;
7345 root = root->fs_info->csum_root;
7346 if (!extent_buffer_uptodate(root->node)) {
7347 fprintf(stderr, "No valid csum tree found\n");
7351 btrfs_init_path(&path);
7352 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7353 key.type = BTRFS_EXTENT_CSUM_KEY;
7355 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7357 fprintf(stderr, "Error searching csum tree %d\n", ret);
7358 btrfs_release_path(&path);
7362 if (ret > 0 && path.slots[0])
7367 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7368 ret = btrfs_next_leaf(root, &path);
7370 fprintf(stderr, "Error going to next leaf "
7377 leaf = path.nodes[0];
7379 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7380 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7385 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7386 csum_size) * root->sectorsize;
7387 if (!check_data_csum)
7388 goto skip_csum_check;
7389 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7390 ret = check_extent_csums(root, key.offset, data_len,
7396 offset = key.offset;
7397 } else if (key.offset != offset + num_bytes) {
7398 ret = check_extent_exists(root, offset, num_bytes);
7400 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7401 "there is no extent record\n",
7402 offset, offset+num_bytes);
7405 offset = key.offset;
7408 num_bytes += data_len;
7412 btrfs_release_path(&path);
7416 static int is_dropped_key(struct btrfs_key *key,
7417 struct btrfs_key *drop_key) {
7418 if (key->objectid < drop_key->objectid)
7420 else if (key->objectid == drop_key->objectid) {
7421 if (key->type < drop_key->type)
7423 else if (key->type == drop_key->type) {
7424 if (key->offset < drop_key->offset)
7432 * Here are the rules for FULL_BACKREF.
7434 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7435 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7437 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7438 * if it happened after the relocation occurred since we'll have dropped the
7439 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7440 * have no real way to know for sure.
7442 * We process the blocks one root at a time, and we start from the lowest root
7443 * objectid and go to the highest. So we can just lookup the owner backref for
7444 * the record and if we don't find it then we know it doesn't exist and we have
7447 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7448 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7449 * be set or not and then we can check later once we've gathered all the refs.
7451 static int calc_extent_flag(struct cache_tree *extent_cache,
7452 struct extent_buffer *buf,
7453 struct root_item_record *ri,
7456 struct extent_record *rec;
7457 struct cache_extent *cache;
7458 struct tree_backref *tback;
7461 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7462 /* we have added this extent before */
7466 rec = container_of(cache, struct extent_record, cache);
7469 * Except file/reloc tree, we can not have
7472 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7477 if (buf->start == ri->bytenr)
7480 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7483 owner = btrfs_header_owner(buf);
7484 if (owner == ri->objectid)
7487 tback = find_tree_backref(rec, 0, owner);
7492 if (rec->flag_block_full_backref != FLAG_UNSET &&
7493 rec->flag_block_full_backref != 0)
7494 rec->bad_full_backref = 1;
7497 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7498 if (rec->flag_block_full_backref != FLAG_UNSET &&
7499 rec->flag_block_full_backref != 1)
7500 rec->bad_full_backref = 1;
7504 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7506 fprintf(stderr, "Invalid key type(");
7507 print_key_type(stderr, 0, key_type);
7508 fprintf(stderr, ") found in root(");
7509 print_objectid(stderr, rootid, 0);
7510 fprintf(stderr, ")\n");
7514 * Check if the key is valid with its extent buffer.
7516 * This is a early check in case invalid key exists in a extent buffer
7517 * This is not comprehensive yet, but should prevent wrong key/item passed
7520 static int check_type_with_root(u64 rootid, u8 key_type)
7523 /* Only valid in chunk tree */
7524 case BTRFS_DEV_ITEM_KEY:
7525 case BTRFS_CHUNK_ITEM_KEY:
7526 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7529 /* valid in csum and log tree */
7530 case BTRFS_CSUM_TREE_OBJECTID:
7531 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7535 case BTRFS_EXTENT_ITEM_KEY:
7536 case BTRFS_METADATA_ITEM_KEY:
7537 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7538 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7541 case BTRFS_ROOT_ITEM_KEY:
7542 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7545 case BTRFS_DEV_EXTENT_KEY:
7546 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7552 report_mismatch_key_root(key_type, rootid);
7556 static int run_next_block(struct btrfs_root *root,
7557 struct block_info *bits,
7560 struct cache_tree *pending,
7561 struct cache_tree *seen,
7562 struct cache_tree *reada,
7563 struct cache_tree *nodes,
7564 struct cache_tree *extent_cache,
7565 struct cache_tree *chunk_cache,
7566 struct rb_root *dev_cache,
7567 struct block_group_tree *block_group_cache,
7568 struct device_extent_tree *dev_extent_cache,
7569 struct root_item_record *ri)
7571 struct extent_buffer *buf;
7572 struct extent_record *rec = NULL;
7583 struct btrfs_key key;
7584 struct cache_extent *cache;
7587 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7588 bits_nr, &reada_bits);
7593 for(i = 0; i < nritems; i++) {
7594 ret = add_cache_extent(reada, bits[i].start,
7599 /* fixme, get the parent transid */
7600 readahead_tree_block(root, bits[i].start,
7604 *last = bits[0].start;
7605 bytenr = bits[0].start;
7606 size = bits[0].size;
7608 cache = lookup_cache_extent(pending, bytenr, size);
7610 remove_cache_extent(pending, cache);
7613 cache = lookup_cache_extent(reada, bytenr, size);
7615 remove_cache_extent(reada, cache);
7618 cache = lookup_cache_extent(nodes, bytenr, size);
7620 remove_cache_extent(nodes, cache);
7623 cache = lookup_cache_extent(extent_cache, bytenr, size);
7625 rec = container_of(cache, struct extent_record, cache);
7626 gen = rec->parent_generation;
7629 /* fixme, get the real parent transid */
7630 buf = read_tree_block(root, bytenr, size, gen);
7631 if (!extent_buffer_uptodate(buf)) {
7632 record_bad_block_io(root->fs_info,
7633 extent_cache, bytenr, size);
7637 nritems = btrfs_header_nritems(buf);
7640 if (!init_extent_tree) {
7641 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7642 btrfs_header_level(buf), 1, NULL,
7645 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7647 fprintf(stderr, "Couldn't calc extent flags\n");
7648 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7653 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7655 fprintf(stderr, "Couldn't calc extent flags\n");
7656 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7660 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7662 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7663 ri->objectid == btrfs_header_owner(buf)) {
7665 * Ok we got to this block from it's original owner and
7666 * we have FULL_BACKREF set. Relocation can leave
7667 * converted blocks over so this is altogether possible,
7668 * however it's not possible if the generation > the
7669 * last snapshot, so check for this case.
7671 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7672 btrfs_header_generation(buf) > ri->last_snapshot) {
7673 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7674 rec->bad_full_backref = 1;
7679 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7680 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7681 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7682 rec->bad_full_backref = 1;
7686 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7687 rec->flag_block_full_backref = 1;
7691 rec->flag_block_full_backref = 0;
7693 owner = btrfs_header_owner(buf);
7696 ret = check_block(root, extent_cache, buf, flags);
7700 if (btrfs_is_leaf(buf)) {
7701 btree_space_waste += btrfs_leaf_free_space(root, buf);
7702 for (i = 0; i < nritems; i++) {
7703 struct btrfs_file_extent_item *fi;
7704 btrfs_item_key_to_cpu(buf, &key, i);
7706 * Check key type against the leaf owner.
7707 * Could filter quite a lot of early error if
7710 if (check_type_with_root(btrfs_header_owner(buf),
7712 fprintf(stderr, "ignoring invalid key\n");
7715 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7716 process_extent_item(root, extent_cache, buf,
7720 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7721 process_extent_item(root, extent_cache, buf,
7725 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7727 btrfs_item_size_nr(buf, i);
7730 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7731 process_chunk_item(chunk_cache, &key, buf, i);
7734 if (key.type == BTRFS_DEV_ITEM_KEY) {
7735 process_device_item(dev_cache, &key, buf, i);
7738 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7739 process_block_group_item(block_group_cache,
7743 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7744 process_device_extent_item(dev_extent_cache,
7749 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7750 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7751 process_extent_ref_v0(extent_cache, buf, i);
7758 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7759 ret = add_tree_backref(extent_cache,
7760 key.objectid, 0, key.offset, 0);
7763 "add_tree_backref failed (leaf tree block): %s",
7767 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7768 ret = add_tree_backref(extent_cache,
7769 key.objectid, key.offset, 0, 0);
7772 "add_tree_backref failed (leaf shared block): %s",
7776 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7777 struct btrfs_extent_data_ref *ref;
7778 ref = btrfs_item_ptr(buf, i,
7779 struct btrfs_extent_data_ref);
7780 add_data_backref(extent_cache,
7782 btrfs_extent_data_ref_root(buf, ref),
7783 btrfs_extent_data_ref_objectid(buf,
7785 btrfs_extent_data_ref_offset(buf, ref),
7786 btrfs_extent_data_ref_count(buf, ref),
7787 0, root->sectorsize);
7790 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7791 struct btrfs_shared_data_ref *ref;
7792 ref = btrfs_item_ptr(buf, i,
7793 struct btrfs_shared_data_ref);
7794 add_data_backref(extent_cache,
7795 key.objectid, key.offset, 0, 0, 0,
7796 btrfs_shared_data_ref_count(buf, ref),
7797 0, root->sectorsize);
7800 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7801 struct bad_item *bad;
7803 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7807 bad = malloc(sizeof(struct bad_item));
7810 INIT_LIST_HEAD(&bad->list);
7811 memcpy(&bad->key, &key,
7812 sizeof(struct btrfs_key));
7813 bad->root_id = owner;
7814 list_add_tail(&bad->list, &delete_items);
7817 if (key.type != BTRFS_EXTENT_DATA_KEY)
7819 fi = btrfs_item_ptr(buf, i,
7820 struct btrfs_file_extent_item);
7821 if (btrfs_file_extent_type(buf, fi) ==
7822 BTRFS_FILE_EXTENT_INLINE)
7824 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7827 data_bytes_allocated +=
7828 btrfs_file_extent_disk_num_bytes(buf, fi);
7829 if (data_bytes_allocated < root->sectorsize) {
7832 data_bytes_referenced +=
7833 btrfs_file_extent_num_bytes(buf, fi);
7834 add_data_backref(extent_cache,
7835 btrfs_file_extent_disk_bytenr(buf, fi),
7836 parent, owner, key.objectid, key.offset -
7837 btrfs_file_extent_offset(buf, fi), 1, 1,
7838 btrfs_file_extent_disk_num_bytes(buf, fi));
7842 struct btrfs_key first_key;
7844 first_key.objectid = 0;
7847 btrfs_item_key_to_cpu(buf, &first_key, 0);
7848 level = btrfs_header_level(buf);
7849 for (i = 0; i < nritems; i++) {
7850 struct extent_record tmpl;
7852 ptr = btrfs_node_blockptr(buf, i);
7853 size = root->nodesize;
7854 btrfs_node_key_to_cpu(buf, &key, i);
7856 if ((level == ri->drop_level)
7857 && is_dropped_key(&key, &ri->drop_key)) {
7862 memset(&tmpl, 0, sizeof(tmpl));
7863 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7864 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7869 tmpl.max_size = size;
7870 ret = add_extent_rec(extent_cache, &tmpl);
7874 ret = add_tree_backref(extent_cache, ptr, parent,
7878 "add_tree_backref failed (non-leaf block): %s",
7884 add_pending(nodes, seen, ptr, size);
7886 add_pending(pending, seen, ptr, size);
7889 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7890 nritems) * sizeof(struct btrfs_key_ptr);
7892 total_btree_bytes += buf->len;
7893 if (fs_root_objectid(btrfs_header_owner(buf)))
7894 total_fs_tree_bytes += buf->len;
7895 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7896 total_extent_tree_bytes += buf->len;
7897 if (!found_old_backref &&
7898 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7899 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7900 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7901 found_old_backref = 1;
7903 free_extent_buffer(buf);
7907 static int add_root_to_pending(struct extent_buffer *buf,
7908 struct cache_tree *extent_cache,
7909 struct cache_tree *pending,
7910 struct cache_tree *seen,
7911 struct cache_tree *nodes,
7914 struct extent_record tmpl;
7917 if (btrfs_header_level(buf) > 0)
7918 add_pending(nodes, seen, buf->start, buf->len);
7920 add_pending(pending, seen, buf->start, buf->len);
7922 memset(&tmpl, 0, sizeof(tmpl));
7923 tmpl.start = buf->start;
7928 tmpl.max_size = buf->len;
7929 add_extent_rec(extent_cache, &tmpl);
7931 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7932 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7933 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7936 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7941 /* as we fix the tree, we might be deleting blocks that
7942 * we're tracking for repair. This hook makes sure we
7943 * remove any backrefs for blocks as we are fixing them.
7945 static int free_extent_hook(struct btrfs_trans_handle *trans,
7946 struct btrfs_root *root,
7947 u64 bytenr, u64 num_bytes, u64 parent,
7948 u64 root_objectid, u64 owner, u64 offset,
7951 struct extent_record *rec;
7952 struct cache_extent *cache;
7954 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7956 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7957 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7961 rec = container_of(cache, struct extent_record, cache);
7963 struct data_backref *back;
7964 back = find_data_backref(rec, parent, root_objectid, owner,
7965 offset, 1, bytenr, num_bytes);
7968 if (back->node.found_ref) {
7969 back->found_ref -= refs_to_drop;
7971 rec->refs -= refs_to_drop;
7973 if (back->node.found_extent_tree) {
7974 back->num_refs -= refs_to_drop;
7975 if (rec->extent_item_refs)
7976 rec->extent_item_refs -= refs_to_drop;
7978 if (back->found_ref == 0)
7979 back->node.found_ref = 0;
7980 if (back->num_refs == 0)
7981 back->node.found_extent_tree = 0;
7983 if (!back->node.found_extent_tree && back->node.found_ref) {
7984 list_del(&back->node.list);
7988 struct tree_backref *back;
7989 back = find_tree_backref(rec, parent, root_objectid);
7992 if (back->node.found_ref) {
7995 back->node.found_ref = 0;
7997 if (back->node.found_extent_tree) {
7998 if (rec->extent_item_refs)
7999 rec->extent_item_refs--;
8000 back->node.found_extent_tree = 0;
8002 if (!back->node.found_extent_tree && back->node.found_ref) {
8003 list_del(&back->node.list);
8007 maybe_free_extent_rec(extent_cache, rec);
8012 static int delete_extent_records(struct btrfs_trans_handle *trans,
8013 struct btrfs_root *root,
8014 struct btrfs_path *path,
8017 struct btrfs_key key;
8018 struct btrfs_key found_key;
8019 struct extent_buffer *leaf;
8024 key.objectid = bytenr;
8026 key.offset = (u64)-1;
8029 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8036 if (path->slots[0] == 0)
8042 leaf = path->nodes[0];
8043 slot = path->slots[0];
8045 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8046 if (found_key.objectid != bytenr)
8049 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8050 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8051 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8052 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8053 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8054 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8055 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8056 btrfs_release_path(path);
8057 if (found_key.type == 0) {
8058 if (found_key.offset == 0)
8060 key.offset = found_key.offset - 1;
8061 key.type = found_key.type;
8063 key.type = found_key.type - 1;
8064 key.offset = (u64)-1;
8068 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8069 found_key.objectid, found_key.type, found_key.offset);
8071 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8074 btrfs_release_path(path);
8076 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8077 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8078 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8079 found_key.offset : root->nodesize;
8081 ret = btrfs_update_block_group(trans, root, bytenr,
8088 btrfs_release_path(path);
8093 * for a single backref, this will allocate a new extent
8094 * and add the backref to it.
8096 static int record_extent(struct btrfs_trans_handle *trans,
8097 struct btrfs_fs_info *info,
8098 struct btrfs_path *path,
8099 struct extent_record *rec,
8100 struct extent_backref *back,
8101 int allocated, u64 flags)
8104 struct btrfs_root *extent_root = info->extent_root;
8105 struct extent_buffer *leaf;
8106 struct btrfs_key ins_key;
8107 struct btrfs_extent_item *ei;
8108 struct data_backref *dback;
8109 struct btrfs_tree_block_info *bi;
8112 rec->max_size = max_t(u64, rec->max_size,
8113 info->extent_root->nodesize);
8116 u32 item_size = sizeof(*ei);
8119 item_size += sizeof(*bi);
8121 ins_key.objectid = rec->start;
8122 ins_key.offset = rec->max_size;
8123 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8125 ret = btrfs_insert_empty_item(trans, extent_root, path,
8126 &ins_key, item_size);
8130 leaf = path->nodes[0];
8131 ei = btrfs_item_ptr(leaf, path->slots[0],
8132 struct btrfs_extent_item);
8134 btrfs_set_extent_refs(leaf, ei, 0);
8135 btrfs_set_extent_generation(leaf, ei, rec->generation);
8137 if (back->is_data) {
8138 btrfs_set_extent_flags(leaf, ei,
8139 BTRFS_EXTENT_FLAG_DATA);
8141 struct btrfs_disk_key copy_key;;
8143 bi = (struct btrfs_tree_block_info *)(ei + 1);
8144 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8147 btrfs_set_disk_key_objectid(©_key,
8148 rec->info_objectid);
8149 btrfs_set_disk_key_type(©_key, 0);
8150 btrfs_set_disk_key_offset(©_key, 0);
8152 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8153 btrfs_set_tree_block_key(leaf, bi, ©_key);
8155 btrfs_set_extent_flags(leaf, ei,
8156 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8159 btrfs_mark_buffer_dirty(leaf);
8160 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8161 rec->max_size, 1, 0);
8164 btrfs_release_path(path);
8167 if (back->is_data) {
8171 dback = to_data_backref(back);
8172 if (back->full_backref)
8173 parent = dback->parent;
8177 for (i = 0; i < dback->found_ref; i++) {
8178 /* if parent != 0, we're doing a full backref
8179 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8180 * just makes the backref allocator create a data
8183 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8184 rec->start, rec->max_size,
8188 BTRFS_FIRST_FREE_OBJECTID :
8194 fprintf(stderr, "adding new data backref"
8195 " on %llu %s %llu owner %llu"
8196 " offset %llu found %d\n",
8197 (unsigned long long)rec->start,
8198 back->full_backref ?
8200 back->full_backref ?
8201 (unsigned long long)parent :
8202 (unsigned long long)dback->root,
8203 (unsigned long long)dback->owner,
8204 (unsigned long long)dback->offset,
8208 struct tree_backref *tback;
8210 tback = to_tree_backref(back);
8211 if (back->full_backref)
8212 parent = tback->parent;
8216 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8217 rec->start, rec->max_size,
8218 parent, tback->root, 0, 0);
8219 fprintf(stderr, "adding new tree backref on "
8220 "start %llu len %llu parent %llu root %llu\n",
8221 rec->start, rec->max_size, parent, tback->root);
8224 btrfs_release_path(path);
8228 static struct extent_entry *find_entry(struct list_head *entries,
8229 u64 bytenr, u64 bytes)
8231 struct extent_entry *entry = NULL;
8233 list_for_each_entry(entry, entries, list) {
8234 if (entry->bytenr == bytenr && entry->bytes == bytes)
8241 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8243 struct extent_entry *entry, *best = NULL, *prev = NULL;
8245 list_for_each_entry(entry, entries, list) {
8247 * If there are as many broken entries as entries then we know
8248 * not to trust this particular entry.
8250 if (entry->broken == entry->count)
8254 * Special case, when there are only two entries and 'best' is
8264 * If our current entry == best then we can't be sure our best
8265 * is really the best, so we need to keep searching.
8267 if (best && best->count == entry->count) {
8273 /* Prev == entry, not good enough, have to keep searching */
8274 if (!prev->broken && prev->count == entry->count)
8278 best = (prev->count > entry->count) ? prev : entry;
8279 else if (best->count < entry->count)
8287 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8288 struct data_backref *dback, struct extent_entry *entry)
8290 struct btrfs_trans_handle *trans;
8291 struct btrfs_root *root;
8292 struct btrfs_file_extent_item *fi;
8293 struct extent_buffer *leaf;
8294 struct btrfs_key key;
8298 key.objectid = dback->root;
8299 key.type = BTRFS_ROOT_ITEM_KEY;
8300 key.offset = (u64)-1;
8301 root = btrfs_read_fs_root(info, &key);
8303 fprintf(stderr, "Couldn't find root for our ref\n");
8308 * The backref points to the original offset of the extent if it was
8309 * split, so we need to search down to the offset we have and then walk
8310 * forward until we find the backref we're looking for.
8312 key.objectid = dback->owner;
8313 key.type = BTRFS_EXTENT_DATA_KEY;
8314 key.offset = dback->offset;
8315 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8317 fprintf(stderr, "Error looking up ref %d\n", ret);
8322 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8323 ret = btrfs_next_leaf(root, path);
8325 fprintf(stderr, "Couldn't find our ref, next\n");
8329 leaf = path->nodes[0];
8330 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8331 if (key.objectid != dback->owner ||
8332 key.type != BTRFS_EXTENT_DATA_KEY) {
8333 fprintf(stderr, "Couldn't find our ref, search\n");
8336 fi = btrfs_item_ptr(leaf, path->slots[0],
8337 struct btrfs_file_extent_item);
8338 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8339 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8341 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8346 btrfs_release_path(path);
8348 trans = btrfs_start_transaction(root, 1);
8350 return PTR_ERR(trans);
8353 * Ok we have the key of the file extent we want to fix, now we can cow
8354 * down to the thing and fix it.
8356 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8358 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8359 key.objectid, key.type, key.offset, ret);
8363 fprintf(stderr, "Well that's odd, we just found this key "
8364 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8369 leaf = path->nodes[0];
8370 fi = btrfs_item_ptr(leaf, path->slots[0],
8371 struct btrfs_file_extent_item);
8373 if (btrfs_file_extent_compression(leaf, fi) &&
8374 dback->disk_bytenr != entry->bytenr) {
8375 fprintf(stderr, "Ref doesn't match the record start and is "
8376 "compressed, please take a btrfs-image of this file "
8377 "system and send it to a btrfs developer so they can "
8378 "complete this functionality for bytenr %Lu\n",
8379 dback->disk_bytenr);
8384 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8385 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8386 } else if (dback->disk_bytenr > entry->bytenr) {
8387 u64 off_diff, offset;
8389 off_diff = dback->disk_bytenr - entry->bytenr;
8390 offset = btrfs_file_extent_offset(leaf, fi);
8391 if (dback->disk_bytenr + offset +
8392 btrfs_file_extent_num_bytes(leaf, fi) >
8393 entry->bytenr + entry->bytes) {
8394 fprintf(stderr, "Ref is past the entry end, please "
8395 "take a btrfs-image of this file system and "
8396 "send it to a btrfs developer, ref %Lu\n",
8397 dback->disk_bytenr);
8402 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8403 btrfs_set_file_extent_offset(leaf, fi, offset);
8404 } else if (dback->disk_bytenr < entry->bytenr) {
8407 offset = btrfs_file_extent_offset(leaf, fi);
8408 if (dback->disk_bytenr + offset < entry->bytenr) {
8409 fprintf(stderr, "Ref is before the entry start, please"
8410 " take a btrfs-image of this file system and "
8411 "send it to a btrfs developer, ref %Lu\n",
8412 dback->disk_bytenr);
8417 offset += dback->disk_bytenr;
8418 offset -= entry->bytenr;
8419 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8420 btrfs_set_file_extent_offset(leaf, fi, offset);
8423 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8426 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8427 * only do this if we aren't using compression, otherwise it's a
8430 if (!btrfs_file_extent_compression(leaf, fi))
8431 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8433 printf("ram bytes may be wrong?\n");
8434 btrfs_mark_buffer_dirty(leaf);
8436 err = btrfs_commit_transaction(trans, root);
8437 btrfs_release_path(path);
8438 return ret ? ret : err;
8441 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8442 struct extent_record *rec)
8444 struct extent_backref *back;
8445 struct data_backref *dback;
8446 struct extent_entry *entry, *best = NULL;
8449 int broken_entries = 0;
8454 * Metadata is easy and the backrefs should always agree on bytenr and
8455 * size, if not we've got bigger issues.
8460 list_for_each_entry(back, &rec->backrefs, list) {
8461 if (back->full_backref || !back->is_data)
8464 dback = to_data_backref(back);
8467 * We only pay attention to backrefs that we found a real
8470 if (dback->found_ref == 0)
8474 * For now we only catch when the bytes don't match, not the
8475 * bytenr. We can easily do this at the same time, but I want
8476 * to have a fs image to test on before we just add repair
8477 * functionality willy-nilly so we know we won't screw up the
8481 entry = find_entry(&entries, dback->disk_bytenr,
8484 entry = malloc(sizeof(struct extent_entry));
8489 memset(entry, 0, sizeof(*entry));
8490 entry->bytenr = dback->disk_bytenr;
8491 entry->bytes = dback->bytes;
8492 list_add_tail(&entry->list, &entries);
8497 * If we only have on entry we may think the entries agree when
8498 * in reality they don't so we have to do some extra checking.
8500 if (dback->disk_bytenr != rec->start ||
8501 dback->bytes != rec->nr || back->broken)
8512 /* Yay all the backrefs agree, carry on good sir */
8513 if (nr_entries <= 1 && !mismatch)
8516 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8517 "%Lu\n", rec->start);
8520 * First we want to see if the backrefs can agree amongst themselves who
8521 * is right, so figure out which one of the entries has the highest
8524 best = find_most_right_entry(&entries);
8527 * Ok so we may have an even split between what the backrefs think, so
8528 * this is where we use the extent ref to see what it thinks.
8531 entry = find_entry(&entries, rec->start, rec->nr);
8532 if (!entry && (!broken_entries || !rec->found_rec)) {
8533 fprintf(stderr, "Backrefs don't agree with each other "
8534 "and extent record doesn't agree with anybody,"
8535 " so we can't fix bytenr %Lu bytes %Lu\n",
8536 rec->start, rec->nr);
8539 } else if (!entry) {
8541 * Ok our backrefs were broken, we'll assume this is the
8542 * correct value and add an entry for this range.
8544 entry = malloc(sizeof(struct extent_entry));
8549 memset(entry, 0, sizeof(*entry));
8550 entry->bytenr = rec->start;
8551 entry->bytes = rec->nr;
8552 list_add_tail(&entry->list, &entries);
8556 best = find_most_right_entry(&entries);
8558 fprintf(stderr, "Backrefs and extent record evenly "
8559 "split on who is right, this is going to "
8560 "require user input to fix bytenr %Lu bytes "
8561 "%Lu\n", rec->start, rec->nr);
8568 * I don't think this can happen currently as we'll abort() if we catch
8569 * this case higher up, but in case somebody removes that we still can't
8570 * deal with it properly here yet, so just bail out of that's the case.
8572 if (best->bytenr != rec->start) {
8573 fprintf(stderr, "Extent start and backref starts don't match, "
8574 "please use btrfs-image on this file system and send "
8575 "it to a btrfs developer so they can make fsck fix "
8576 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8577 rec->start, rec->nr);
8583 * Ok great we all agreed on an extent record, let's go find the real
8584 * references and fix up the ones that don't match.
8586 list_for_each_entry(back, &rec->backrefs, list) {
8587 if (back->full_backref || !back->is_data)
8590 dback = to_data_backref(back);
8593 * Still ignoring backrefs that don't have a real ref attached
8596 if (dback->found_ref == 0)
8599 if (dback->bytes == best->bytes &&
8600 dback->disk_bytenr == best->bytenr)
8603 ret = repair_ref(info, path, dback, best);
8609 * Ok we messed with the actual refs, which means we need to drop our
8610 * entire cache and go back and rescan. I know this is a huge pain and
8611 * adds a lot of extra work, but it's the only way to be safe. Once all
8612 * the backrefs agree we may not need to do anything to the extent
8617 while (!list_empty(&entries)) {
8618 entry = list_entry(entries.next, struct extent_entry, list);
8619 list_del_init(&entry->list);
8625 static int process_duplicates(struct cache_tree *extent_cache,
8626 struct extent_record *rec)
8628 struct extent_record *good, *tmp;
8629 struct cache_extent *cache;
8633 * If we found a extent record for this extent then return, or if we
8634 * have more than one duplicate we are likely going to need to delete
8637 if (rec->found_rec || rec->num_duplicates > 1)
8640 /* Shouldn't happen but just in case */
8641 BUG_ON(!rec->num_duplicates);
8644 * So this happens if we end up with a backref that doesn't match the
8645 * actual extent entry. So either the backref is bad or the extent
8646 * entry is bad. Either way we want to have the extent_record actually
8647 * reflect what we found in the extent_tree, so we need to take the
8648 * duplicate out and use that as the extent_record since the only way we
8649 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8651 remove_cache_extent(extent_cache, &rec->cache);
8653 good = to_extent_record(rec->dups.next);
8654 list_del_init(&good->list);
8655 INIT_LIST_HEAD(&good->backrefs);
8656 INIT_LIST_HEAD(&good->dups);
8657 good->cache.start = good->start;
8658 good->cache.size = good->nr;
8659 good->content_checked = 0;
8660 good->owner_ref_checked = 0;
8661 good->num_duplicates = 0;
8662 good->refs = rec->refs;
8663 list_splice_init(&rec->backrefs, &good->backrefs);
8665 cache = lookup_cache_extent(extent_cache, good->start,
8669 tmp = container_of(cache, struct extent_record, cache);
8672 * If we find another overlapping extent and it's found_rec is
8673 * set then it's a duplicate and we need to try and delete
8676 if (tmp->found_rec || tmp->num_duplicates > 0) {
8677 if (list_empty(&good->list))
8678 list_add_tail(&good->list,
8679 &duplicate_extents);
8680 good->num_duplicates += tmp->num_duplicates + 1;
8681 list_splice_init(&tmp->dups, &good->dups);
8682 list_del_init(&tmp->list);
8683 list_add_tail(&tmp->list, &good->dups);
8684 remove_cache_extent(extent_cache, &tmp->cache);
8689 * Ok we have another non extent item backed extent rec, so lets
8690 * just add it to this extent and carry on like we did above.
8692 good->refs += tmp->refs;
8693 list_splice_init(&tmp->backrefs, &good->backrefs);
8694 remove_cache_extent(extent_cache, &tmp->cache);
8697 ret = insert_cache_extent(extent_cache, &good->cache);
8700 return good->num_duplicates ? 0 : 1;
8703 static int delete_duplicate_records(struct btrfs_root *root,
8704 struct extent_record *rec)
8706 struct btrfs_trans_handle *trans;
8707 LIST_HEAD(delete_list);
8708 struct btrfs_path path;
8709 struct extent_record *tmp, *good, *n;
8712 struct btrfs_key key;
8714 btrfs_init_path(&path);
8717 /* Find the record that covers all of the duplicates. */
8718 list_for_each_entry(tmp, &rec->dups, list) {
8719 if (good->start < tmp->start)
8721 if (good->nr > tmp->nr)
8724 if (tmp->start + tmp->nr < good->start + good->nr) {
8725 fprintf(stderr, "Ok we have overlapping extents that "
8726 "aren't completely covered by each other, this "
8727 "is going to require more careful thought. "
8728 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8729 tmp->start, tmp->nr, good->start, good->nr);
8736 list_add_tail(&rec->list, &delete_list);
8738 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8741 list_move_tail(&tmp->list, &delete_list);
8744 root = root->fs_info->extent_root;
8745 trans = btrfs_start_transaction(root, 1);
8746 if (IS_ERR(trans)) {
8747 ret = PTR_ERR(trans);
8751 list_for_each_entry(tmp, &delete_list, list) {
8752 if (tmp->found_rec == 0)
8754 key.objectid = tmp->start;
8755 key.type = BTRFS_EXTENT_ITEM_KEY;
8756 key.offset = tmp->nr;
8758 /* Shouldn't happen but just in case */
8759 if (tmp->metadata) {
8760 fprintf(stderr, "Well this shouldn't happen, extent "
8761 "record overlaps but is metadata? "
8762 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8766 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8772 ret = btrfs_del_item(trans, root, &path);
8775 btrfs_release_path(&path);
8778 err = btrfs_commit_transaction(trans, root);
8782 while (!list_empty(&delete_list)) {
8783 tmp = to_extent_record(delete_list.next);
8784 list_del_init(&tmp->list);
8790 while (!list_empty(&rec->dups)) {
8791 tmp = to_extent_record(rec->dups.next);
8792 list_del_init(&tmp->list);
8796 btrfs_release_path(&path);
8798 if (!ret && !nr_del)
8799 rec->num_duplicates = 0;
8801 return ret ? ret : nr_del;
8804 static int find_possible_backrefs(struct btrfs_fs_info *info,
8805 struct btrfs_path *path,
8806 struct cache_tree *extent_cache,
8807 struct extent_record *rec)
8809 struct btrfs_root *root;
8810 struct extent_backref *back;
8811 struct data_backref *dback;
8812 struct cache_extent *cache;
8813 struct btrfs_file_extent_item *fi;
8814 struct btrfs_key key;
8818 list_for_each_entry(back, &rec->backrefs, list) {
8819 /* Don't care about full backrefs (poor unloved backrefs) */
8820 if (back->full_backref || !back->is_data)
8823 dback = to_data_backref(back);
8825 /* We found this one, we don't need to do a lookup */
8826 if (dback->found_ref)
8829 key.objectid = dback->root;
8830 key.type = BTRFS_ROOT_ITEM_KEY;
8831 key.offset = (u64)-1;
8833 root = btrfs_read_fs_root(info, &key);
8835 /* No root, definitely a bad ref, skip */
8836 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8838 /* Other err, exit */
8840 return PTR_ERR(root);
8842 key.objectid = dback->owner;
8843 key.type = BTRFS_EXTENT_DATA_KEY;
8844 key.offset = dback->offset;
8845 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8847 btrfs_release_path(path);
8850 /* Didn't find it, we can carry on */
8855 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8856 struct btrfs_file_extent_item);
8857 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8858 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8859 btrfs_release_path(path);
8860 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8862 struct extent_record *tmp;
8863 tmp = container_of(cache, struct extent_record, cache);
8866 * If we found an extent record for the bytenr for this
8867 * particular backref then we can't add it to our
8868 * current extent record. We only want to add backrefs
8869 * that don't have a corresponding extent item in the
8870 * extent tree since they likely belong to this record
8871 * and we need to fix it if it doesn't match bytenrs.
8877 dback->found_ref += 1;
8878 dback->disk_bytenr = bytenr;
8879 dback->bytes = bytes;
8882 * Set this so the verify backref code knows not to trust the
8883 * values in this backref.
8892 * Record orphan data ref into corresponding root.
8894 * Return 0 if the extent item contains data ref and recorded.
8895 * Return 1 if the extent item contains no useful data ref
8896 * On that case, it may contains only shared_dataref or metadata backref
8897 * or the file extent exists(this should be handled by the extent bytenr
8899 * Return <0 if something goes wrong.
8901 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8902 struct extent_record *rec)
8904 struct btrfs_key key;
8905 struct btrfs_root *dest_root;
8906 struct extent_backref *back;
8907 struct data_backref *dback;
8908 struct orphan_data_extent *orphan;
8909 struct btrfs_path path;
8910 int recorded_data_ref = 0;
8915 btrfs_init_path(&path);
8916 list_for_each_entry(back, &rec->backrefs, list) {
8917 if (back->full_backref || !back->is_data ||
8918 !back->found_extent_tree)
8920 dback = to_data_backref(back);
8921 if (dback->found_ref)
8923 key.objectid = dback->root;
8924 key.type = BTRFS_ROOT_ITEM_KEY;
8925 key.offset = (u64)-1;
8927 dest_root = btrfs_read_fs_root(fs_info, &key);
8929 /* For non-exist root we just skip it */
8930 if (IS_ERR(dest_root) || !dest_root)
8933 key.objectid = dback->owner;
8934 key.type = BTRFS_EXTENT_DATA_KEY;
8935 key.offset = dback->offset;
8937 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8938 btrfs_release_path(&path);
8940 * For ret < 0, it's OK since the fs-tree may be corrupted,
8941 * we need to record it for inode/file extent rebuild.
8942 * For ret > 0, we record it only for file extent rebuild.
8943 * For ret == 0, the file extent exists but only bytenr
8944 * mismatch, let the original bytenr fix routine to handle,
8950 orphan = malloc(sizeof(*orphan));
8955 INIT_LIST_HEAD(&orphan->list);
8956 orphan->root = dback->root;
8957 orphan->objectid = dback->owner;
8958 orphan->offset = dback->offset;
8959 orphan->disk_bytenr = rec->cache.start;
8960 orphan->disk_len = rec->cache.size;
8961 list_add(&dest_root->orphan_data_extents, &orphan->list);
8962 recorded_data_ref = 1;
8965 btrfs_release_path(&path);
8967 return !recorded_data_ref;
8973 * when an incorrect extent item is found, this will delete
8974 * all of the existing entries for it and recreate them
8975 * based on what the tree scan found.
8977 static int fixup_extent_refs(struct btrfs_fs_info *info,
8978 struct cache_tree *extent_cache,
8979 struct extent_record *rec)
8981 struct btrfs_trans_handle *trans = NULL;
8983 struct btrfs_path path;
8984 struct list_head *cur = rec->backrefs.next;
8985 struct cache_extent *cache;
8986 struct extent_backref *back;
8990 if (rec->flag_block_full_backref)
8991 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8993 btrfs_init_path(&path);
8994 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8996 * Sometimes the backrefs themselves are so broken they don't
8997 * get attached to any meaningful rec, so first go back and
8998 * check any of our backrefs that we couldn't find and throw
8999 * them into the list if we find the backref so that
9000 * verify_backrefs can figure out what to do.
9002 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9007 /* step one, make sure all of the backrefs agree */
9008 ret = verify_backrefs(info, &path, rec);
9012 trans = btrfs_start_transaction(info->extent_root, 1);
9013 if (IS_ERR(trans)) {
9014 ret = PTR_ERR(trans);
9018 /* step two, delete all the existing records */
9019 ret = delete_extent_records(trans, info->extent_root, &path,
9025 /* was this block corrupt? If so, don't add references to it */
9026 cache = lookup_cache_extent(info->corrupt_blocks,
9027 rec->start, rec->max_size);
9033 /* step three, recreate all the refs we did find */
9034 while(cur != &rec->backrefs) {
9035 back = to_extent_backref(cur);
9039 * if we didn't find any references, don't create a
9042 if (!back->found_ref)
9045 rec->bad_full_backref = 0;
9046 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9054 int err = btrfs_commit_transaction(trans, info->extent_root);
9060 fprintf(stderr, "Repaired extent references for %llu\n",
9061 (unsigned long long)rec->start);
9063 btrfs_release_path(&path);
9067 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9068 struct extent_record *rec)
9070 struct btrfs_trans_handle *trans;
9071 struct btrfs_root *root = fs_info->extent_root;
9072 struct btrfs_path path;
9073 struct btrfs_extent_item *ei;
9074 struct btrfs_key key;
9078 key.objectid = rec->start;
9079 if (rec->metadata) {
9080 key.type = BTRFS_METADATA_ITEM_KEY;
9081 key.offset = rec->info_level;
9083 key.type = BTRFS_EXTENT_ITEM_KEY;
9084 key.offset = rec->max_size;
9087 trans = btrfs_start_transaction(root, 0);
9089 return PTR_ERR(trans);
9091 btrfs_init_path(&path);
9092 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9094 btrfs_release_path(&path);
9095 btrfs_commit_transaction(trans, root);
9098 fprintf(stderr, "Didn't find extent for %llu\n",
9099 (unsigned long long)rec->start);
9100 btrfs_release_path(&path);
9101 btrfs_commit_transaction(trans, root);
9105 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9106 struct btrfs_extent_item);
9107 flags = btrfs_extent_flags(path.nodes[0], ei);
9108 if (rec->flag_block_full_backref) {
9109 fprintf(stderr, "setting full backref on %llu\n",
9110 (unsigned long long)key.objectid);
9111 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9113 fprintf(stderr, "clearing full backref on %llu\n",
9114 (unsigned long long)key.objectid);
9115 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9117 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9118 btrfs_mark_buffer_dirty(path.nodes[0]);
9119 btrfs_release_path(&path);
9120 ret = btrfs_commit_transaction(trans, root);
9122 fprintf(stderr, "Repaired extent flags for %llu\n",
9123 (unsigned long long)rec->start);
9128 /* right now we only prune from the extent allocation tree */
9129 static int prune_one_block(struct btrfs_trans_handle *trans,
9130 struct btrfs_fs_info *info,
9131 struct btrfs_corrupt_block *corrupt)
9134 struct btrfs_path path;
9135 struct extent_buffer *eb;
9139 int level = corrupt->level + 1;
9141 btrfs_init_path(&path);
9143 /* we want to stop at the parent to our busted block */
9144 path.lowest_level = level;
9146 ret = btrfs_search_slot(trans, info->extent_root,
9147 &corrupt->key, &path, -1, 1);
9152 eb = path.nodes[level];
9159 * hopefully the search gave us the block we want to prune,
9160 * lets try that first
9162 slot = path.slots[level];
9163 found = btrfs_node_blockptr(eb, slot);
9164 if (found == corrupt->cache.start)
9167 nritems = btrfs_header_nritems(eb);
9169 /* the search failed, lets scan this node and hope we find it */
9170 for (slot = 0; slot < nritems; slot++) {
9171 found = btrfs_node_blockptr(eb, slot);
9172 if (found == corrupt->cache.start)
9176 * we couldn't find the bad block. TODO, search all the nodes for pointers
9179 if (eb == info->extent_root->node) {
9184 btrfs_release_path(&path);
9189 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9190 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9193 btrfs_release_path(&path);
9197 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9199 struct btrfs_trans_handle *trans = NULL;
9200 struct cache_extent *cache;
9201 struct btrfs_corrupt_block *corrupt;
9204 cache = search_cache_extent(info->corrupt_blocks, 0);
9208 trans = btrfs_start_transaction(info->extent_root, 1);
9210 return PTR_ERR(trans);
9212 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9213 prune_one_block(trans, info, corrupt);
9214 remove_cache_extent(info->corrupt_blocks, cache);
9217 return btrfs_commit_transaction(trans, info->extent_root);
9221 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9223 struct btrfs_block_group_cache *cache;
9228 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9229 &start, &end, EXTENT_DIRTY);
9232 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9237 cache = btrfs_lookup_first_block_group(fs_info, start);
9242 start = cache->key.objectid + cache->key.offset;
9246 static int check_extent_refs(struct btrfs_root *root,
9247 struct cache_tree *extent_cache)
9249 struct extent_record *rec;
9250 struct cache_extent *cache;
9256 * if we're doing a repair, we have to make sure
9257 * we don't allocate from the problem extents.
9258 * In the worst case, this will be all the
9261 cache = search_cache_extent(extent_cache, 0);
9263 rec = container_of(cache, struct extent_record, cache);
9264 set_extent_dirty(root->fs_info->excluded_extents,
9266 rec->start + rec->max_size - 1);
9267 cache = next_cache_extent(cache);
9270 /* pin down all the corrupted blocks too */
9271 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9273 set_extent_dirty(root->fs_info->excluded_extents,
9275 cache->start + cache->size - 1);
9276 cache = next_cache_extent(cache);
9278 prune_corrupt_blocks(root->fs_info);
9279 reset_cached_block_groups(root->fs_info);
9282 reset_cached_block_groups(root->fs_info);
9285 * We need to delete any duplicate entries we find first otherwise we
9286 * could mess up the extent tree when we have backrefs that actually
9287 * belong to a different extent item and not the weird duplicate one.
9289 while (repair && !list_empty(&duplicate_extents)) {
9290 rec = to_extent_record(duplicate_extents.next);
9291 list_del_init(&rec->list);
9293 /* Sometimes we can find a backref before we find an actual
9294 * extent, so we need to process it a little bit to see if there
9295 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9296 * if this is a backref screwup. If we need to delete stuff
9297 * process_duplicates() will return 0, otherwise it will return
9300 if (process_duplicates(extent_cache, rec))
9302 ret = delete_duplicate_records(root, rec);
9306 * delete_duplicate_records will return the number of entries
9307 * deleted, so if it's greater than 0 then we know we actually
9308 * did something and we need to remove.
9321 cache = search_cache_extent(extent_cache, 0);
9324 rec = container_of(cache, struct extent_record, cache);
9325 if (rec->num_duplicates) {
9326 fprintf(stderr, "extent item %llu has multiple extent "
9327 "items\n", (unsigned long long)rec->start);
9331 if (rec->refs != rec->extent_item_refs) {
9332 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9333 (unsigned long long)rec->start,
9334 (unsigned long long)rec->nr);
9335 fprintf(stderr, "extent item %llu, found %llu\n",
9336 (unsigned long long)rec->extent_item_refs,
9337 (unsigned long long)rec->refs);
9338 ret = record_orphan_data_extents(root->fs_info, rec);
9344 if (all_backpointers_checked(rec, 1)) {
9345 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9346 (unsigned long long)rec->start,
9347 (unsigned long long)rec->nr);
9351 if (!rec->owner_ref_checked) {
9352 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9353 (unsigned long long)rec->start,
9354 (unsigned long long)rec->nr);
9359 if (repair && fix) {
9360 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9366 if (rec->bad_full_backref) {
9367 fprintf(stderr, "bad full backref, on [%llu]\n",
9368 (unsigned long long)rec->start);
9370 ret = fixup_extent_flags(root->fs_info, rec);
9378 * Although it's not a extent ref's problem, we reuse this
9379 * routine for error reporting.
9380 * No repair function yet.
9382 if (rec->crossing_stripes) {
9384 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9385 rec->start, rec->start + rec->max_size);
9389 if (rec->wrong_chunk_type) {
9391 "bad extent [%llu, %llu), type mismatch with chunk\n",
9392 rec->start, rec->start + rec->max_size);
9396 remove_cache_extent(extent_cache, cache);
9397 free_all_extent_backrefs(rec);
9398 if (!init_extent_tree && repair && (!cur_err || fix))
9399 clear_extent_dirty(root->fs_info->excluded_extents,
9401 rec->start + rec->max_size - 1);
9406 if (ret && ret != -EAGAIN) {
9407 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9410 struct btrfs_trans_handle *trans;
9412 root = root->fs_info->extent_root;
9413 trans = btrfs_start_transaction(root, 1);
9414 if (IS_ERR(trans)) {
9415 ret = PTR_ERR(trans);
9419 btrfs_fix_block_accounting(trans, root);
9420 ret = btrfs_commit_transaction(trans, root);
9429 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9433 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9434 stripe_size = length;
9435 stripe_size /= num_stripes;
9436 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9437 stripe_size = length * 2;
9438 stripe_size /= num_stripes;
9439 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9440 stripe_size = length;
9441 stripe_size /= (num_stripes - 1);
9442 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9443 stripe_size = length;
9444 stripe_size /= (num_stripes - 2);
9446 stripe_size = length;
9452 * Check the chunk with its block group/dev list ref:
9453 * Return 0 if all refs seems valid.
9454 * Return 1 if part of refs seems valid, need later check for rebuild ref
9455 * like missing block group and needs to search extent tree to rebuild them.
9456 * Return -1 if essential refs are missing and unable to rebuild.
9458 static int check_chunk_refs(struct chunk_record *chunk_rec,
9459 struct block_group_tree *block_group_cache,
9460 struct device_extent_tree *dev_extent_cache,
9463 struct cache_extent *block_group_item;
9464 struct block_group_record *block_group_rec;
9465 struct cache_extent *dev_extent_item;
9466 struct device_extent_record *dev_extent_rec;
9470 int metadump_v2 = 0;
9474 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9477 if (block_group_item) {
9478 block_group_rec = container_of(block_group_item,
9479 struct block_group_record,
9481 if (chunk_rec->length != block_group_rec->offset ||
9482 chunk_rec->offset != block_group_rec->objectid ||
9484 chunk_rec->type_flags != block_group_rec->flags)) {
9487 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9488 chunk_rec->objectid,
9493 chunk_rec->type_flags,
9494 block_group_rec->objectid,
9495 block_group_rec->type,
9496 block_group_rec->offset,
9497 block_group_rec->offset,
9498 block_group_rec->objectid,
9499 block_group_rec->flags);
9502 list_del_init(&block_group_rec->list);
9503 chunk_rec->bg_rec = block_group_rec;
9508 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9509 chunk_rec->objectid,
9514 chunk_rec->type_flags);
9521 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9522 chunk_rec->num_stripes);
9523 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9524 devid = chunk_rec->stripes[i].devid;
9525 offset = chunk_rec->stripes[i].offset;
9526 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9527 devid, offset, length);
9528 if (dev_extent_item) {
9529 dev_extent_rec = container_of(dev_extent_item,
9530 struct device_extent_record,
9532 if (dev_extent_rec->objectid != devid ||
9533 dev_extent_rec->offset != offset ||
9534 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9535 dev_extent_rec->length != length) {
9538 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9539 chunk_rec->objectid,
9542 chunk_rec->stripes[i].devid,
9543 chunk_rec->stripes[i].offset,
9544 dev_extent_rec->objectid,
9545 dev_extent_rec->offset,
9546 dev_extent_rec->length);
9549 list_move(&dev_extent_rec->chunk_list,
9550 &chunk_rec->dextents);
9555 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9556 chunk_rec->objectid,
9559 chunk_rec->stripes[i].devid,
9560 chunk_rec->stripes[i].offset);
9567 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9568 int check_chunks(struct cache_tree *chunk_cache,
9569 struct block_group_tree *block_group_cache,
9570 struct device_extent_tree *dev_extent_cache,
9571 struct list_head *good, struct list_head *bad,
9572 struct list_head *rebuild, int silent)
9574 struct cache_extent *chunk_item;
9575 struct chunk_record *chunk_rec;
9576 struct block_group_record *bg_rec;
9577 struct device_extent_record *dext_rec;
9581 chunk_item = first_cache_extent(chunk_cache);
9582 while (chunk_item) {
9583 chunk_rec = container_of(chunk_item, struct chunk_record,
9585 err = check_chunk_refs(chunk_rec, block_group_cache,
9586 dev_extent_cache, silent);
9589 if (err == 0 && good)
9590 list_add_tail(&chunk_rec->list, good);
9591 if (err > 0 && rebuild)
9592 list_add_tail(&chunk_rec->list, rebuild);
9594 list_add_tail(&chunk_rec->list, bad);
9595 chunk_item = next_cache_extent(chunk_item);
9598 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9601 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9609 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9613 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9624 static int check_device_used(struct device_record *dev_rec,
9625 struct device_extent_tree *dext_cache)
9627 struct cache_extent *cache;
9628 struct device_extent_record *dev_extent_rec;
9631 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9633 dev_extent_rec = container_of(cache,
9634 struct device_extent_record,
9636 if (dev_extent_rec->objectid != dev_rec->devid)
9639 list_del_init(&dev_extent_rec->device_list);
9640 total_byte += dev_extent_rec->length;
9641 cache = next_cache_extent(cache);
9644 if (total_byte != dev_rec->byte_used) {
9646 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9647 total_byte, dev_rec->byte_used, dev_rec->objectid,
9648 dev_rec->type, dev_rec->offset);
9655 /* check btrfs_dev_item -> btrfs_dev_extent */
9656 static int check_devices(struct rb_root *dev_cache,
9657 struct device_extent_tree *dev_extent_cache)
9659 struct rb_node *dev_node;
9660 struct device_record *dev_rec;
9661 struct device_extent_record *dext_rec;
9665 dev_node = rb_first(dev_cache);
9667 dev_rec = container_of(dev_node, struct device_record, node);
9668 err = check_device_used(dev_rec, dev_extent_cache);
9672 dev_node = rb_next(dev_node);
9674 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9677 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9678 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9685 static int add_root_item_to_list(struct list_head *head,
9686 u64 objectid, u64 bytenr, u64 last_snapshot,
9687 u8 level, u8 drop_level,
9688 int level_size, struct btrfs_key *drop_key)
9691 struct root_item_record *ri_rec;
9692 ri_rec = malloc(sizeof(*ri_rec));
9695 ri_rec->bytenr = bytenr;
9696 ri_rec->objectid = objectid;
9697 ri_rec->level = level;
9698 ri_rec->level_size = level_size;
9699 ri_rec->drop_level = drop_level;
9700 ri_rec->last_snapshot = last_snapshot;
9702 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9703 list_add_tail(&ri_rec->list, head);
9708 static void free_root_item_list(struct list_head *list)
9710 struct root_item_record *ri_rec;
9712 while (!list_empty(list)) {
9713 ri_rec = list_first_entry(list, struct root_item_record,
9715 list_del_init(&ri_rec->list);
9720 static int deal_root_from_list(struct list_head *list,
9721 struct btrfs_root *root,
9722 struct block_info *bits,
9724 struct cache_tree *pending,
9725 struct cache_tree *seen,
9726 struct cache_tree *reada,
9727 struct cache_tree *nodes,
9728 struct cache_tree *extent_cache,
9729 struct cache_tree *chunk_cache,
9730 struct rb_root *dev_cache,
9731 struct block_group_tree *block_group_cache,
9732 struct device_extent_tree *dev_extent_cache)
9737 while (!list_empty(list)) {
9738 struct root_item_record *rec;
9739 struct extent_buffer *buf;
9740 rec = list_entry(list->next,
9741 struct root_item_record, list);
9743 buf = read_tree_block(root->fs_info->tree_root,
9744 rec->bytenr, rec->level_size, 0);
9745 if (!extent_buffer_uptodate(buf)) {
9746 free_extent_buffer(buf);
9750 ret = add_root_to_pending(buf, extent_cache, pending,
9751 seen, nodes, rec->objectid);
9755 * To rebuild extent tree, we need deal with snapshot
9756 * one by one, otherwise we deal with node firstly which
9757 * can maximize readahead.
9760 ret = run_next_block(root, bits, bits_nr, &last,
9761 pending, seen, reada, nodes,
9762 extent_cache, chunk_cache,
9763 dev_cache, block_group_cache,
9764 dev_extent_cache, rec);
9768 free_extent_buffer(buf);
9769 list_del(&rec->list);
9775 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9776 reada, nodes, extent_cache, chunk_cache,
9777 dev_cache, block_group_cache,
9778 dev_extent_cache, NULL);
9788 static int check_chunks_and_extents(struct btrfs_root *root)
9790 struct rb_root dev_cache;
9791 struct cache_tree chunk_cache;
9792 struct block_group_tree block_group_cache;
9793 struct device_extent_tree dev_extent_cache;
9794 struct cache_tree extent_cache;
9795 struct cache_tree seen;
9796 struct cache_tree pending;
9797 struct cache_tree reada;
9798 struct cache_tree nodes;
9799 struct extent_io_tree excluded_extents;
9800 struct cache_tree corrupt_blocks;
9801 struct btrfs_path path;
9802 struct btrfs_key key;
9803 struct btrfs_key found_key;
9805 struct block_info *bits;
9807 struct extent_buffer *leaf;
9809 struct btrfs_root_item ri;
9810 struct list_head dropping_trees;
9811 struct list_head normal_trees;
9812 struct btrfs_root *root1;
9817 dev_cache = RB_ROOT;
9818 cache_tree_init(&chunk_cache);
9819 block_group_tree_init(&block_group_cache);
9820 device_extent_tree_init(&dev_extent_cache);
9822 cache_tree_init(&extent_cache);
9823 cache_tree_init(&seen);
9824 cache_tree_init(&pending);
9825 cache_tree_init(&nodes);
9826 cache_tree_init(&reada);
9827 cache_tree_init(&corrupt_blocks);
9828 extent_io_tree_init(&excluded_extents);
9829 INIT_LIST_HEAD(&dropping_trees);
9830 INIT_LIST_HEAD(&normal_trees);
9833 root->fs_info->excluded_extents = &excluded_extents;
9834 root->fs_info->fsck_extent_cache = &extent_cache;
9835 root->fs_info->free_extent_hook = free_extent_hook;
9836 root->fs_info->corrupt_blocks = &corrupt_blocks;
9840 bits = malloc(bits_nr * sizeof(struct block_info));
9846 if (ctx.progress_enabled) {
9847 ctx.tp = TASK_EXTENTS;
9848 task_start(ctx.info);
9852 root1 = root->fs_info->tree_root;
9853 level = btrfs_header_level(root1->node);
9854 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9855 root1->node->start, 0, level, 0,
9856 root1->nodesize, NULL);
9859 root1 = root->fs_info->chunk_root;
9860 level = btrfs_header_level(root1->node);
9861 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9862 root1->node->start, 0, level, 0,
9863 root1->nodesize, NULL);
9866 btrfs_init_path(&path);
9869 key.type = BTRFS_ROOT_ITEM_KEY;
9870 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9875 leaf = path.nodes[0];
9876 slot = path.slots[0];
9877 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9878 ret = btrfs_next_leaf(root, &path);
9881 leaf = path.nodes[0];
9882 slot = path.slots[0];
9884 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9885 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9886 unsigned long offset;
9889 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9890 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9891 last_snapshot = btrfs_root_last_snapshot(&ri);
9892 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9893 level = btrfs_root_level(&ri);
9894 level_size = root->nodesize;
9895 ret = add_root_item_to_list(&normal_trees,
9897 btrfs_root_bytenr(&ri),
9898 last_snapshot, level,
9899 0, level_size, NULL);
9903 level = btrfs_root_level(&ri);
9904 level_size = root->nodesize;
9905 objectid = found_key.objectid;
9906 btrfs_disk_key_to_cpu(&found_key,
9908 ret = add_root_item_to_list(&dropping_trees,
9910 btrfs_root_bytenr(&ri),
9911 last_snapshot, level,
9913 level_size, &found_key);
9920 btrfs_release_path(&path);
9923 * check_block can return -EAGAIN if it fixes something, please keep
9924 * this in mind when dealing with return values from these functions, if
9925 * we get -EAGAIN we want to fall through and restart the loop.
9927 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9928 &seen, &reada, &nodes, &extent_cache,
9929 &chunk_cache, &dev_cache, &block_group_cache,
9936 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9937 &pending, &seen, &reada, &nodes,
9938 &extent_cache, &chunk_cache, &dev_cache,
9939 &block_group_cache, &dev_extent_cache);
9946 ret = check_chunks(&chunk_cache, &block_group_cache,
9947 &dev_extent_cache, NULL, NULL, NULL, 0);
9954 ret = check_extent_refs(root, &extent_cache);
9961 ret = check_devices(&dev_cache, &dev_extent_cache);
9966 task_stop(ctx.info);
9968 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9969 extent_io_tree_cleanup(&excluded_extents);
9970 root->fs_info->fsck_extent_cache = NULL;
9971 root->fs_info->free_extent_hook = NULL;
9972 root->fs_info->corrupt_blocks = NULL;
9973 root->fs_info->excluded_extents = NULL;
9976 free_chunk_cache_tree(&chunk_cache);
9977 free_device_cache_tree(&dev_cache);
9978 free_block_group_tree(&block_group_cache);
9979 free_device_extent_tree(&dev_extent_cache);
9980 free_extent_cache_tree(&seen);
9981 free_extent_cache_tree(&pending);
9982 free_extent_cache_tree(&reada);
9983 free_extent_cache_tree(&nodes);
9984 free_root_item_list(&normal_trees);
9985 free_root_item_list(&dropping_trees);
9988 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9989 free_extent_cache_tree(&seen);
9990 free_extent_cache_tree(&pending);
9991 free_extent_cache_tree(&reada);
9992 free_extent_cache_tree(&nodes);
9993 free_chunk_cache_tree(&chunk_cache);
9994 free_block_group_tree(&block_group_cache);
9995 free_device_cache_tree(&dev_cache);
9996 free_device_extent_tree(&dev_extent_cache);
9997 free_extent_record_cache(&extent_cache);
9998 free_root_item_list(&normal_trees);
9999 free_root_item_list(&dropping_trees);
10000 extent_io_tree_cleanup(&excluded_extents);
10005 * Check backrefs of a tree block given by @bytenr or @eb.
10007 * @root: the root containing the @bytenr or @eb
10008 * @eb: tree block extent buffer, can be NULL
10009 * @bytenr: bytenr of the tree block to search
10010 * @level: tree level of the tree block
10011 * @owner: owner of the tree block
10013 * Return >0 for any error found and output error message
10014 * Return 0 for no error found
10016 static int check_tree_block_ref(struct btrfs_root *root,
10017 struct extent_buffer *eb, u64 bytenr,
10018 int level, u64 owner)
10020 struct btrfs_key key;
10021 struct btrfs_root *extent_root = root->fs_info->extent_root;
10022 struct btrfs_path path;
10023 struct btrfs_extent_item *ei;
10024 struct btrfs_extent_inline_ref *iref;
10025 struct extent_buffer *leaf;
10031 u32 nodesize = root->nodesize;
10034 int tree_reloc_root = 0;
10039 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10040 btrfs_header_bytenr(root->node) == bytenr)
10041 tree_reloc_root = 1;
10043 btrfs_init_path(&path);
10044 key.objectid = bytenr;
10045 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10046 key.type = BTRFS_METADATA_ITEM_KEY;
10048 key.type = BTRFS_EXTENT_ITEM_KEY;
10049 key.offset = (u64)-1;
10051 /* Search for the backref in extent tree */
10052 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10054 err |= BACKREF_MISSING;
10057 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10059 err |= BACKREF_MISSING;
10063 leaf = path.nodes[0];
10064 slot = path.slots[0];
10065 btrfs_item_key_to_cpu(leaf, &key, slot);
10067 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10069 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10070 skinny_level = (int)key.offset;
10071 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10073 struct btrfs_tree_block_info *info;
10075 info = (struct btrfs_tree_block_info *)(ei + 1);
10076 skinny_level = btrfs_tree_block_level(leaf, info);
10077 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10084 if (!(btrfs_extent_flags(leaf, ei) &
10085 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10087 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10088 key.objectid, nodesize,
10089 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10090 err = BACKREF_MISMATCH;
10092 header_gen = btrfs_header_generation(eb);
10093 extent_gen = btrfs_extent_generation(leaf, ei);
10094 if (header_gen != extent_gen) {
10096 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10097 key.objectid, nodesize, header_gen,
10099 err = BACKREF_MISMATCH;
10101 if (level != skinny_level) {
10103 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10104 key.objectid, nodesize, level, skinny_level);
10105 err = BACKREF_MISMATCH;
10107 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10109 "extent[%llu %u] is referred by other roots than %llu",
10110 key.objectid, nodesize, root->objectid);
10111 err = BACKREF_MISMATCH;
10116 * Iterate the extent/metadata item to find the exact backref
10118 item_size = btrfs_item_size_nr(leaf, slot);
10119 ptr = (unsigned long)iref;
10120 end = (unsigned long)ei + item_size;
10121 while (ptr < end) {
10122 iref = (struct btrfs_extent_inline_ref *)ptr;
10123 type = btrfs_extent_inline_ref_type(leaf, iref);
10124 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10126 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10127 (offset == root->objectid || offset == owner)) {
10129 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10131 * Backref of tree reloc root points to itself, no need
10132 * to check backref any more.
10134 if (tree_reloc_root)
10137 /* Check if the backref points to valid referencer */
10138 found_ref = !check_tree_block_ref(root, NULL,
10139 offset, level + 1, owner);
10144 ptr += btrfs_extent_inline_ref_size(type);
10148 * Inlined extent item doesn't have what we need, check
10149 * TREE_BLOCK_REF_KEY
10152 btrfs_release_path(&path);
10153 key.objectid = bytenr;
10154 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10155 key.offset = root->objectid;
10157 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10162 err |= BACKREF_MISSING;
10164 btrfs_release_path(&path);
10165 if (eb && (err & BACKREF_MISSING))
10166 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10167 bytenr, nodesize, owner, level);
10172 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10174 * Return >0 any error found and output error message
10175 * Return 0 for no error found
10177 static int check_extent_data_item(struct btrfs_root *root,
10178 struct extent_buffer *eb, int slot)
10180 struct btrfs_file_extent_item *fi;
10181 struct btrfs_path path;
10182 struct btrfs_root *extent_root = root->fs_info->extent_root;
10183 struct btrfs_key fi_key;
10184 struct btrfs_key dbref_key;
10185 struct extent_buffer *leaf;
10186 struct btrfs_extent_item *ei;
10187 struct btrfs_extent_inline_ref *iref;
10188 struct btrfs_extent_data_ref *dref;
10191 u64 disk_num_bytes;
10192 u64 extent_num_bytes;
10199 int found_dbackref = 0;
10203 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10204 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10206 /* Nothing to check for hole and inline data extents */
10207 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10208 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10211 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10212 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10213 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10215 /* Check unaligned disk_num_bytes and num_bytes */
10216 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10218 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10219 fi_key.objectid, fi_key.offset, disk_num_bytes,
10221 err |= BYTES_UNALIGNED;
10223 data_bytes_allocated += disk_num_bytes;
10225 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10227 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10228 fi_key.objectid, fi_key.offset, extent_num_bytes,
10230 err |= BYTES_UNALIGNED;
10232 data_bytes_referenced += extent_num_bytes;
10234 owner = btrfs_header_owner(eb);
10236 /* Check the extent item of the file extent in extent tree */
10237 btrfs_init_path(&path);
10238 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10239 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10240 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10242 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10246 leaf = path.nodes[0];
10247 slot = path.slots[0];
10248 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10250 extent_flags = btrfs_extent_flags(leaf, ei);
10252 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10254 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10255 disk_bytenr, disk_num_bytes,
10256 BTRFS_EXTENT_FLAG_DATA);
10257 err |= BACKREF_MISMATCH;
10260 /* Check data backref inside that extent item */
10261 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10262 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10263 ptr = (unsigned long)iref;
10264 end = (unsigned long)ei + item_size;
10265 while (ptr < end) {
10266 iref = (struct btrfs_extent_inline_ref *)ptr;
10267 type = btrfs_extent_inline_ref_type(leaf, iref);
10268 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10270 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10271 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10272 if (ref_root == owner || ref_root == root->objectid)
10273 found_dbackref = 1;
10274 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10275 found_dbackref = !check_tree_block_ref(root, NULL,
10276 btrfs_extent_inline_ref_offset(leaf, iref),
10280 if (found_dbackref)
10282 ptr += btrfs_extent_inline_ref_size(type);
10285 if (!found_dbackref) {
10286 btrfs_release_path(&path);
10288 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10289 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10290 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10291 dbref_key.offset = hash_extent_data_ref(root->objectid,
10292 fi_key.objectid, fi_key.offset);
10294 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10295 &dbref_key, &path, 0, 0);
10297 found_dbackref = 1;
10301 btrfs_release_path(&path);
10304 * Neither inlined nor EXTENT_DATA_REF found, try
10305 * SHARED_DATA_REF as last chance.
10307 dbref_key.objectid = disk_bytenr;
10308 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10309 dbref_key.offset = eb->start;
10311 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10312 &dbref_key, &path, 0, 0);
10314 found_dbackref = 1;
10320 if (!found_dbackref)
10321 err |= BACKREF_MISSING;
10322 btrfs_release_path(&path);
10323 if (err & BACKREF_MISSING) {
10324 error("data extent[%llu %llu] backref lost",
10325 disk_bytenr, disk_num_bytes);
10331 * Get real tree block level for the case like shared block
10332 * Return >= 0 as tree level
10333 * Return <0 for error
10335 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10337 struct extent_buffer *eb;
10338 struct btrfs_path path;
10339 struct btrfs_key key;
10340 struct btrfs_extent_item *ei;
10343 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10348 /* Search extent tree for extent generation and level */
10349 key.objectid = bytenr;
10350 key.type = BTRFS_METADATA_ITEM_KEY;
10351 key.offset = (u64)-1;
10353 btrfs_init_path(&path);
10354 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10357 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10365 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10366 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10367 struct btrfs_extent_item);
10368 flags = btrfs_extent_flags(path.nodes[0], ei);
10369 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10374 /* Get transid for later read_tree_block() check */
10375 transid = btrfs_extent_generation(path.nodes[0], ei);
10377 /* Get backref level as one source */
10378 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10379 backref_level = key.offset;
10381 struct btrfs_tree_block_info *info;
10383 info = (struct btrfs_tree_block_info *)(ei + 1);
10384 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10386 btrfs_release_path(&path);
10388 /* Get level from tree block as an alternative source */
10389 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10390 if (!extent_buffer_uptodate(eb)) {
10391 free_extent_buffer(eb);
10394 header_level = btrfs_header_level(eb);
10395 free_extent_buffer(eb);
10397 if (header_level != backref_level)
10399 return header_level;
10402 btrfs_release_path(&path);
10407 * Check if a tree block backref is valid (points to a valid tree block)
10408 * if level == -1, level will be resolved
10409 * Return >0 for any error found and print error message
10411 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10412 u64 bytenr, int level)
10414 struct btrfs_root *root;
10415 struct btrfs_key key;
10416 struct btrfs_path path;
10417 struct extent_buffer *eb;
10418 struct extent_buffer *node;
10419 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10423 /* Query level for level == -1 special case */
10425 level = query_tree_block_level(fs_info, bytenr);
10427 err |= REFERENCER_MISSING;
10431 key.objectid = root_id;
10432 key.type = BTRFS_ROOT_ITEM_KEY;
10433 key.offset = (u64)-1;
10435 root = btrfs_read_fs_root(fs_info, &key);
10436 if (IS_ERR(root)) {
10437 err |= REFERENCER_MISSING;
10441 /* Read out the tree block to get item/node key */
10442 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10443 if (!extent_buffer_uptodate(eb)) {
10444 err |= REFERENCER_MISSING;
10445 free_extent_buffer(eb);
10449 /* Empty tree, no need to check key */
10450 if (!btrfs_header_nritems(eb) && !level) {
10451 free_extent_buffer(eb);
10456 btrfs_node_key_to_cpu(eb, &key, 0);
10458 btrfs_item_key_to_cpu(eb, &key, 0);
10460 free_extent_buffer(eb);
10462 btrfs_init_path(&path);
10463 path.lowest_level = level;
10464 /* Search with the first key, to ensure we can reach it */
10465 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10467 err |= REFERENCER_MISSING;
10471 node = path.nodes[level];
10472 if (btrfs_header_bytenr(node) != bytenr) {
10474 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10475 bytenr, nodesize, bytenr,
10476 btrfs_header_bytenr(node));
10477 err |= REFERENCER_MISMATCH;
10479 if (btrfs_header_level(node) != level) {
10481 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10482 bytenr, nodesize, level,
10483 btrfs_header_level(node));
10484 err |= REFERENCER_MISMATCH;
10488 btrfs_release_path(&path);
10490 if (err & REFERENCER_MISSING) {
10492 error("extent [%llu %d] lost referencer (owner: %llu)",
10493 bytenr, nodesize, root_id);
10496 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10497 bytenr, nodesize, root_id, level);
10504 * Check if tree block @eb is tree reloc root.
10505 * Return 0 if it's not or any problem happens
10506 * Return 1 if it's a tree reloc root
10508 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10509 struct extent_buffer *eb)
10511 struct btrfs_root *tree_reloc_root;
10512 struct btrfs_key key;
10513 u64 bytenr = btrfs_header_bytenr(eb);
10514 u64 owner = btrfs_header_owner(eb);
10517 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10518 key.offset = owner;
10519 key.type = BTRFS_ROOT_ITEM_KEY;
10521 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10522 if (IS_ERR(tree_reloc_root))
10525 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10527 btrfs_free_fs_root(tree_reloc_root);
10532 * Check referencer for shared block backref
10533 * If level == -1, this function will resolve the level.
10535 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10536 u64 parent, u64 bytenr, int level)
10538 struct extent_buffer *eb;
10539 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10541 int found_parent = 0;
10544 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10545 if (!extent_buffer_uptodate(eb))
10549 level = query_tree_block_level(fs_info, bytenr);
10553 /* It's possible it's a tree reloc root */
10554 if (parent == bytenr) {
10555 if (is_tree_reloc_root(fs_info, eb))
10560 if (level + 1 != btrfs_header_level(eb))
10563 nr = btrfs_header_nritems(eb);
10564 for (i = 0; i < nr; i++) {
10565 if (bytenr == btrfs_node_blockptr(eb, i)) {
10571 free_extent_buffer(eb);
10572 if (!found_parent) {
10574 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10575 bytenr, nodesize, parent, level);
10576 return REFERENCER_MISSING;
10582 * Check referencer for normal (inlined) data ref
10583 * If len == 0, it will be resolved by searching in extent tree
10585 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10586 u64 root_id, u64 objectid, u64 offset,
10587 u64 bytenr, u64 len, u32 count)
10589 struct btrfs_root *root;
10590 struct btrfs_root *extent_root = fs_info->extent_root;
10591 struct btrfs_key key;
10592 struct btrfs_path path;
10593 struct extent_buffer *leaf;
10594 struct btrfs_file_extent_item *fi;
10595 u32 found_count = 0;
10600 key.objectid = bytenr;
10601 key.type = BTRFS_EXTENT_ITEM_KEY;
10602 key.offset = (u64)-1;
10604 btrfs_init_path(&path);
10605 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10608 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10611 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10612 if (key.objectid != bytenr ||
10613 key.type != BTRFS_EXTENT_ITEM_KEY)
10616 btrfs_release_path(&path);
10618 key.objectid = root_id;
10619 key.type = BTRFS_ROOT_ITEM_KEY;
10620 key.offset = (u64)-1;
10621 btrfs_init_path(&path);
10623 root = btrfs_read_fs_root(fs_info, &key);
10627 key.objectid = objectid;
10628 key.type = BTRFS_EXTENT_DATA_KEY;
10630 * It can be nasty as data backref offset is
10631 * file offset - file extent offset, which is smaller or
10632 * equal to original backref offset. The only special case is
10633 * overflow. So we need to special check and do further search.
10635 key.offset = offset & (1ULL << 63) ? 0 : offset;
10637 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10642 * Search afterwards to get correct one
10643 * NOTE: As we must do a comprehensive check on the data backref to
10644 * make sure the dref count also matches, we must iterate all file
10645 * extents for that inode.
10648 leaf = path.nodes[0];
10649 slot = path.slots[0];
10651 if (slot >= btrfs_header_nritems(leaf))
10653 btrfs_item_key_to_cpu(leaf, &key, slot);
10654 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10656 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10658 * Except normal disk bytenr and disk num bytes, we still
10659 * need to do extra check on dbackref offset as
10660 * dbackref offset = file_offset - file_extent_offset
10662 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10663 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10664 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10669 ret = btrfs_next_item(root, &path);
10674 btrfs_release_path(&path);
10675 if (found_count != count) {
10677 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10678 bytenr, len, root_id, objectid, offset, count, found_count);
10679 return REFERENCER_MISSING;
10685 * Check if the referencer of a shared data backref exists
10687 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10688 u64 parent, u64 bytenr)
10690 struct extent_buffer *eb;
10691 struct btrfs_key key;
10692 struct btrfs_file_extent_item *fi;
10693 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10695 int found_parent = 0;
10698 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10699 if (!extent_buffer_uptodate(eb))
10702 nr = btrfs_header_nritems(eb);
10703 for (i = 0; i < nr; i++) {
10704 btrfs_item_key_to_cpu(eb, &key, i);
10705 if (key.type != BTRFS_EXTENT_DATA_KEY)
10708 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10709 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10712 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10719 free_extent_buffer(eb);
10720 if (!found_parent) {
10721 error("shared extent %llu referencer lost (parent: %llu)",
10723 return REFERENCER_MISSING;
10729 * This function will check a given extent item, including its backref and
10730 * itself (like crossing stripe boundary and type)
10732 * Since we don't use extent_record anymore, introduce new error bit
10734 static int check_extent_item(struct btrfs_fs_info *fs_info,
10735 struct extent_buffer *eb, int slot)
10737 struct btrfs_extent_item *ei;
10738 struct btrfs_extent_inline_ref *iref;
10739 struct btrfs_extent_data_ref *dref;
10743 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10744 u32 item_size = btrfs_item_size_nr(eb, slot);
10749 struct btrfs_key key;
10753 btrfs_item_key_to_cpu(eb, &key, slot);
10754 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10755 bytes_used += key.offset;
10757 bytes_used += nodesize;
10759 if (item_size < sizeof(*ei)) {
10761 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10762 * old thing when on disk format is still un-determined.
10763 * No need to care about it anymore
10765 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10769 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10770 flags = btrfs_extent_flags(eb, ei);
10772 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10774 if (metadata && check_crossing_stripes(global_info, key.objectid,
10776 error("bad metadata [%llu, %llu) crossing stripe boundary",
10777 key.objectid, key.objectid + nodesize);
10778 err |= CROSSING_STRIPE_BOUNDARY;
10781 ptr = (unsigned long)(ei + 1);
10783 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10784 /* Old EXTENT_ITEM metadata */
10785 struct btrfs_tree_block_info *info;
10787 info = (struct btrfs_tree_block_info *)ptr;
10788 level = btrfs_tree_block_level(eb, info);
10789 ptr += sizeof(struct btrfs_tree_block_info);
10791 /* New METADATA_ITEM */
10792 level = key.offset;
10794 end = (unsigned long)ei + item_size;
10797 /* Reached extent item end normally */
10801 /* Beyond extent item end, wrong item size */
10803 err |= ITEM_SIZE_MISMATCH;
10804 error("extent item at bytenr %llu slot %d has wrong size",
10809 /* Now check every backref in this extent item */
10810 iref = (struct btrfs_extent_inline_ref *)ptr;
10811 type = btrfs_extent_inline_ref_type(eb, iref);
10812 offset = btrfs_extent_inline_ref_offset(eb, iref);
10814 case BTRFS_TREE_BLOCK_REF_KEY:
10815 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10819 case BTRFS_SHARED_BLOCK_REF_KEY:
10820 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10824 case BTRFS_EXTENT_DATA_REF_KEY:
10825 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10826 ret = check_extent_data_backref(fs_info,
10827 btrfs_extent_data_ref_root(eb, dref),
10828 btrfs_extent_data_ref_objectid(eb, dref),
10829 btrfs_extent_data_ref_offset(eb, dref),
10830 key.objectid, key.offset,
10831 btrfs_extent_data_ref_count(eb, dref));
10834 case BTRFS_SHARED_DATA_REF_KEY:
10835 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10839 error("extent[%llu %d %llu] has unknown ref type: %d",
10840 key.objectid, key.type, key.offset, type);
10841 err |= UNKNOWN_TYPE;
10845 ptr += btrfs_extent_inline_ref_size(type);
10853 * Check if a dev extent item is referred correctly by its chunk
10855 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10856 struct extent_buffer *eb, int slot)
10858 struct btrfs_root *chunk_root = fs_info->chunk_root;
10859 struct btrfs_dev_extent *ptr;
10860 struct btrfs_path path;
10861 struct btrfs_key chunk_key;
10862 struct btrfs_key devext_key;
10863 struct btrfs_chunk *chunk;
10864 struct extent_buffer *l;
10868 int found_chunk = 0;
10871 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10872 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10873 length = btrfs_dev_extent_length(eb, ptr);
10875 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10876 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10877 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10879 btrfs_init_path(&path);
10880 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10885 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10886 if (btrfs_chunk_length(l, chunk) != length)
10889 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10890 for (i = 0; i < num_stripes; i++) {
10891 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10892 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10894 if (devid == devext_key.objectid &&
10895 offset == devext_key.offset) {
10901 btrfs_release_path(&path);
10902 if (!found_chunk) {
10904 "device extent[%llu, %llu, %llu] did not find the related chunk",
10905 devext_key.objectid, devext_key.offset, length);
10906 return REFERENCER_MISSING;
10912 * Check if the used space is correct with the dev item
10914 static int check_dev_item(struct btrfs_fs_info *fs_info,
10915 struct extent_buffer *eb, int slot)
10917 struct btrfs_root *dev_root = fs_info->dev_root;
10918 struct btrfs_dev_item *dev_item;
10919 struct btrfs_path path;
10920 struct btrfs_key key;
10921 struct btrfs_dev_extent *ptr;
10927 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10928 dev_id = btrfs_device_id(eb, dev_item);
10929 used = btrfs_device_bytes_used(eb, dev_item);
10931 key.objectid = dev_id;
10932 key.type = BTRFS_DEV_EXTENT_KEY;
10935 btrfs_init_path(&path);
10936 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10938 btrfs_item_key_to_cpu(eb, &key, slot);
10939 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10940 key.objectid, key.type, key.offset);
10941 btrfs_release_path(&path);
10942 return REFERENCER_MISSING;
10945 /* Iterate dev_extents to calculate the used space of a device */
10947 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10950 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10951 if (key.objectid > dev_id)
10953 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10956 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10957 struct btrfs_dev_extent);
10958 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10960 ret = btrfs_next_item(dev_root, &path);
10964 btrfs_release_path(&path);
10966 if (used != total) {
10967 btrfs_item_key_to_cpu(eb, &key, slot);
10969 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10970 total, used, BTRFS_ROOT_TREE_OBJECTID,
10971 BTRFS_DEV_EXTENT_KEY, dev_id);
10972 return ACCOUNTING_MISMATCH;
10978 * Check a block group item with its referener (chunk) and its used space
10979 * with extent/metadata item
10981 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10982 struct extent_buffer *eb, int slot)
10984 struct btrfs_root *extent_root = fs_info->extent_root;
10985 struct btrfs_root *chunk_root = fs_info->chunk_root;
10986 struct btrfs_block_group_item *bi;
10987 struct btrfs_block_group_item bg_item;
10988 struct btrfs_path path;
10989 struct btrfs_key bg_key;
10990 struct btrfs_key chunk_key;
10991 struct btrfs_key extent_key;
10992 struct btrfs_chunk *chunk;
10993 struct extent_buffer *leaf;
10994 struct btrfs_extent_item *ei;
10995 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11003 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11004 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11005 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11006 used = btrfs_block_group_used(&bg_item);
11007 bg_flags = btrfs_block_group_flags(&bg_item);
11009 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11010 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11011 chunk_key.offset = bg_key.objectid;
11013 btrfs_init_path(&path);
11014 /* Search for the referencer chunk */
11015 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11018 "block group[%llu %llu] did not find the related chunk item",
11019 bg_key.objectid, bg_key.offset);
11020 err |= REFERENCER_MISSING;
11022 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11023 struct btrfs_chunk);
11024 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11027 "block group[%llu %llu] related chunk item length does not match",
11028 bg_key.objectid, bg_key.offset);
11029 err |= REFERENCER_MISMATCH;
11032 btrfs_release_path(&path);
11034 /* Search from the block group bytenr */
11035 extent_key.objectid = bg_key.objectid;
11036 extent_key.type = 0;
11037 extent_key.offset = 0;
11039 btrfs_init_path(&path);
11040 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11044 /* Iterate extent tree to account used space */
11046 leaf = path.nodes[0];
11048 /* Search slot can point to the last item beyond leaf nritems */
11049 if (path.slots[0] >= btrfs_header_nritems(leaf))
11052 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11053 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11056 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11057 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11059 if (extent_key.objectid < bg_key.objectid)
11062 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11065 total += extent_key.offset;
11067 ei = btrfs_item_ptr(leaf, path.slots[0],
11068 struct btrfs_extent_item);
11069 flags = btrfs_extent_flags(leaf, ei);
11070 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11071 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11073 "bad extent[%llu, %llu) type mismatch with chunk",
11074 extent_key.objectid,
11075 extent_key.objectid + extent_key.offset);
11076 err |= CHUNK_TYPE_MISMATCH;
11078 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11079 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11080 BTRFS_BLOCK_GROUP_METADATA))) {
11082 "bad extent[%llu, %llu) type mismatch with chunk",
11083 extent_key.objectid,
11084 extent_key.objectid + nodesize);
11085 err |= CHUNK_TYPE_MISMATCH;
11089 ret = btrfs_next_item(extent_root, &path);
11095 btrfs_release_path(&path);
11097 if (total != used) {
11099 "block group[%llu %llu] used %llu but extent items used %llu",
11100 bg_key.objectid, bg_key.offset, used, total);
11101 err |= ACCOUNTING_MISMATCH;
11107 * Check a chunk item.
11108 * Including checking all referred dev_extents and block group
11110 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11111 struct extent_buffer *eb, int slot)
11113 struct btrfs_root *extent_root = fs_info->extent_root;
11114 struct btrfs_root *dev_root = fs_info->dev_root;
11115 struct btrfs_path path;
11116 struct btrfs_key chunk_key;
11117 struct btrfs_key bg_key;
11118 struct btrfs_key devext_key;
11119 struct btrfs_chunk *chunk;
11120 struct extent_buffer *leaf;
11121 struct btrfs_block_group_item *bi;
11122 struct btrfs_block_group_item bg_item;
11123 struct btrfs_dev_extent *ptr;
11124 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11136 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11137 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11138 length = btrfs_chunk_length(eb, chunk);
11139 chunk_end = chunk_key.offset + length;
11140 if (!IS_ALIGNED(length, sectorsize)) {
11141 error("chunk[%llu %llu) not aligned to %u",
11142 chunk_key.offset, chunk_end, sectorsize);
11143 err |= BYTES_UNALIGNED;
11147 type = btrfs_chunk_type(eb, chunk);
11148 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11149 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11150 error("chunk[%llu %llu) has no chunk type",
11151 chunk_key.offset, chunk_end);
11152 err |= UNKNOWN_TYPE;
11154 if (profile && (profile & (profile - 1))) {
11155 error("chunk[%llu %llu) multiple profiles detected: %llx",
11156 chunk_key.offset, chunk_end, profile);
11157 err |= UNKNOWN_TYPE;
11160 bg_key.objectid = chunk_key.offset;
11161 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11162 bg_key.offset = length;
11164 btrfs_init_path(&path);
11165 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11168 "chunk[%llu %llu) did not find the related block group item",
11169 chunk_key.offset, chunk_end);
11170 err |= REFERENCER_MISSING;
11172 leaf = path.nodes[0];
11173 bi = btrfs_item_ptr(leaf, path.slots[0],
11174 struct btrfs_block_group_item);
11175 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11177 if (btrfs_block_group_flags(&bg_item) != type) {
11179 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11180 chunk_key.offset, chunk_end, type,
11181 btrfs_block_group_flags(&bg_item));
11182 err |= REFERENCER_MISSING;
11186 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11187 for (i = 0; i < num_stripes; i++) {
11188 btrfs_release_path(&path);
11189 btrfs_init_path(&path);
11190 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11191 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11192 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11194 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11197 goto not_match_dev;
11199 leaf = path.nodes[0];
11200 ptr = btrfs_item_ptr(leaf, path.slots[0],
11201 struct btrfs_dev_extent);
11202 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11203 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11204 if (objectid != chunk_key.objectid ||
11205 offset != chunk_key.offset ||
11206 btrfs_dev_extent_length(leaf, ptr) != length)
11207 goto not_match_dev;
11210 err |= BACKREF_MISSING;
11212 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11213 chunk_key.objectid, chunk_end, i);
11216 btrfs_release_path(&path);
11222 * Main entry function to check known items and update related accounting info
11224 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11226 struct btrfs_fs_info *fs_info = root->fs_info;
11227 struct btrfs_key key;
11230 struct btrfs_extent_data_ref *dref;
11235 btrfs_item_key_to_cpu(eb, &key, slot);
11239 case BTRFS_EXTENT_DATA_KEY:
11240 ret = check_extent_data_item(root, eb, slot);
11243 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11244 ret = check_block_group_item(fs_info, eb, slot);
11247 case BTRFS_DEV_ITEM_KEY:
11248 ret = check_dev_item(fs_info, eb, slot);
11251 case BTRFS_CHUNK_ITEM_KEY:
11252 ret = check_chunk_item(fs_info, eb, slot);
11255 case BTRFS_DEV_EXTENT_KEY:
11256 ret = check_dev_extent_item(fs_info, eb, slot);
11259 case BTRFS_EXTENT_ITEM_KEY:
11260 case BTRFS_METADATA_ITEM_KEY:
11261 ret = check_extent_item(fs_info, eb, slot);
11264 case BTRFS_EXTENT_CSUM_KEY:
11265 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11267 case BTRFS_TREE_BLOCK_REF_KEY:
11268 ret = check_tree_block_backref(fs_info, key.offset,
11272 case BTRFS_EXTENT_DATA_REF_KEY:
11273 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11274 ret = check_extent_data_backref(fs_info,
11275 btrfs_extent_data_ref_root(eb, dref),
11276 btrfs_extent_data_ref_objectid(eb, dref),
11277 btrfs_extent_data_ref_offset(eb, dref),
11279 btrfs_extent_data_ref_count(eb, dref));
11282 case BTRFS_SHARED_BLOCK_REF_KEY:
11283 ret = check_shared_block_backref(fs_info, key.offset,
11287 case BTRFS_SHARED_DATA_REF_KEY:
11288 ret = check_shared_data_backref(fs_info, key.offset,
11296 if (++slot < btrfs_header_nritems(eb))
11303 * Helper function for later fs/subvol tree check. To determine if a tree
11304 * block should be checked.
11305 * This function will ensure only the direct referencer with lowest rootid to
11306 * check a fs/subvolume tree block.
11308 * Backref check at extent tree would detect errors like missing subvolume
11309 * tree, so we can do aggressive check to reduce duplicated checks.
11311 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11313 struct btrfs_root *extent_root = root->fs_info->extent_root;
11314 struct btrfs_key key;
11315 struct btrfs_path path;
11316 struct extent_buffer *leaf;
11318 struct btrfs_extent_item *ei;
11324 struct btrfs_extent_inline_ref *iref;
11327 btrfs_init_path(&path);
11328 key.objectid = btrfs_header_bytenr(eb);
11329 key.type = BTRFS_METADATA_ITEM_KEY;
11330 key.offset = (u64)-1;
11333 * Any failure in backref resolving means we can't determine
11334 * whom the tree block belongs to.
11335 * So in that case, we need to check that tree block
11337 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11341 ret = btrfs_previous_extent_item(extent_root, &path,
11342 btrfs_header_bytenr(eb));
11346 leaf = path.nodes[0];
11347 slot = path.slots[0];
11348 btrfs_item_key_to_cpu(leaf, &key, slot);
11349 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11351 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11352 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11354 struct btrfs_tree_block_info *info;
11356 info = (struct btrfs_tree_block_info *)(ei + 1);
11357 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11360 item_size = btrfs_item_size_nr(leaf, slot);
11361 ptr = (unsigned long)iref;
11362 end = (unsigned long)ei + item_size;
11363 while (ptr < end) {
11364 iref = (struct btrfs_extent_inline_ref *)ptr;
11365 type = btrfs_extent_inline_ref_type(leaf, iref);
11366 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11369 * We only check the tree block if current root is
11370 * the lowest referencer of it.
11372 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11373 offset < root->objectid) {
11374 btrfs_release_path(&path);
11378 ptr += btrfs_extent_inline_ref_size(type);
11381 * Normally we should also check keyed tree block ref, but that may be
11382 * very time consuming. Inlined ref should already make us skip a lot
11383 * of refs now. So skip search keyed tree block ref.
11387 btrfs_release_path(&path);
11392 * Traversal function for tree block. We will do:
11393 * 1) Skip shared fs/subvolume tree blocks
11394 * 2) Update related bytes accounting
11395 * 3) Pre-order traversal
11397 static int traverse_tree_block(struct btrfs_root *root,
11398 struct extent_buffer *node)
11400 struct extent_buffer *eb;
11401 struct btrfs_key key;
11402 struct btrfs_key drop_key;
11410 * Skip shared fs/subvolume tree block, in that case they will
11411 * be checked by referencer with lowest rootid
11413 if (is_fstree(root->objectid) && !should_check(root, node))
11416 /* Update bytes accounting */
11417 total_btree_bytes += node->len;
11418 if (fs_root_objectid(btrfs_header_owner(node)))
11419 total_fs_tree_bytes += node->len;
11420 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11421 total_extent_tree_bytes += node->len;
11422 if (!found_old_backref &&
11423 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11424 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11425 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11426 found_old_backref = 1;
11428 /* pre-order tranversal, check itself first */
11429 level = btrfs_header_level(node);
11430 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11431 btrfs_header_level(node),
11432 btrfs_header_owner(node));
11436 "check %s failed root %llu bytenr %llu level %d, force continue check",
11437 level ? "node":"leaf", root->objectid,
11438 btrfs_header_bytenr(node), btrfs_header_level(node));
11441 btree_space_waste += btrfs_leaf_free_space(root, node);
11442 ret = check_leaf_items(root, node);
11447 nr = btrfs_header_nritems(node);
11448 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11449 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11450 sizeof(struct btrfs_key_ptr);
11452 /* Then check all its children */
11453 for (i = 0; i < nr; i++) {
11454 u64 blocknr = btrfs_node_blockptr(node, i);
11456 btrfs_node_key_to_cpu(node, &key, i);
11457 if (level == root->root_item.drop_level &&
11458 is_dropped_key(&key, &drop_key))
11462 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11463 * to call the function itself.
11465 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11466 if (extent_buffer_uptodate(eb)) {
11467 ret = traverse_tree_block(root, eb);
11470 free_extent_buffer(eb);
11477 * Low memory usage version check_chunks_and_extents.
11479 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11481 struct btrfs_path path;
11482 struct btrfs_key key;
11483 struct btrfs_root *root1;
11484 struct btrfs_root *cur_root;
11488 root1 = root->fs_info->chunk_root;
11489 ret = traverse_tree_block(root1, root1->node);
11492 root1 = root->fs_info->tree_root;
11493 ret = traverse_tree_block(root1, root1->node);
11496 btrfs_init_path(&path);
11497 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11499 key.type = BTRFS_ROOT_ITEM_KEY;
11501 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11503 error("cannot find extent treet in tree_root");
11508 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11509 if (key.type != BTRFS_ROOT_ITEM_KEY)
11511 key.offset = (u64)-1;
11513 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11514 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11517 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11518 if (IS_ERR(cur_root) || !cur_root) {
11519 error("failed to read tree: %lld", key.objectid);
11523 ret = traverse_tree_block(cur_root, cur_root->node);
11526 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11527 btrfs_free_fs_root(cur_root);
11529 ret = btrfs_next_item(root1, &path);
11535 btrfs_release_path(&path);
11539 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11540 struct btrfs_root *root, int overwrite)
11542 struct extent_buffer *c;
11543 struct extent_buffer *old = root->node;
11546 struct btrfs_disk_key disk_key = {0,0,0};
11552 extent_buffer_get(c);
11555 c = btrfs_alloc_free_block(trans, root,
11557 root->root_key.objectid,
11558 &disk_key, level, 0, 0);
11561 extent_buffer_get(c);
11565 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11566 btrfs_set_header_level(c, level);
11567 btrfs_set_header_bytenr(c, c->start);
11568 btrfs_set_header_generation(c, trans->transid);
11569 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11570 btrfs_set_header_owner(c, root->root_key.objectid);
11572 write_extent_buffer(c, root->fs_info->fsid,
11573 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11575 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11576 btrfs_header_chunk_tree_uuid(c),
11579 btrfs_mark_buffer_dirty(c);
11581 * this case can happen in the following case:
11583 * 1.overwrite previous root.
11585 * 2.reinit reloc data root, this is because we skip pin
11586 * down reloc data tree before which means we can allocate
11587 * same block bytenr here.
11589 if (old->start == c->start) {
11590 btrfs_set_root_generation(&root->root_item,
11592 root->root_item.level = btrfs_header_level(root->node);
11593 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11594 &root->root_key, &root->root_item);
11596 free_extent_buffer(c);
11600 free_extent_buffer(old);
11602 add_root_to_dirty_list(root);
11606 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11607 struct extent_buffer *eb, int tree_root)
11609 struct extent_buffer *tmp;
11610 struct btrfs_root_item *ri;
11611 struct btrfs_key key;
11614 int level = btrfs_header_level(eb);
11620 * If we have pinned this block before, don't pin it again.
11621 * This can not only avoid forever loop with broken filesystem
11622 * but also give us some speedups.
11624 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11625 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11628 btrfs_pin_extent(fs_info, eb->start, eb->len);
11630 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11631 nritems = btrfs_header_nritems(eb);
11632 for (i = 0; i < nritems; i++) {
11634 btrfs_item_key_to_cpu(eb, &key, i);
11635 if (key.type != BTRFS_ROOT_ITEM_KEY)
11637 /* Skip the extent root and reloc roots */
11638 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11639 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11640 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11642 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11643 bytenr = btrfs_disk_root_bytenr(eb, ri);
11646 * If at any point we start needing the real root we
11647 * will have to build a stump root for the root we are
11648 * in, but for now this doesn't actually use the root so
11649 * just pass in extent_root.
11651 tmp = read_tree_block(fs_info->extent_root, bytenr,
11653 if (!extent_buffer_uptodate(tmp)) {
11654 fprintf(stderr, "Error reading root block\n");
11657 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11658 free_extent_buffer(tmp);
11662 bytenr = btrfs_node_blockptr(eb, i);
11664 /* If we aren't the tree root don't read the block */
11665 if (level == 1 && !tree_root) {
11666 btrfs_pin_extent(fs_info, bytenr, nodesize);
11670 tmp = read_tree_block(fs_info->extent_root, bytenr,
11672 if (!extent_buffer_uptodate(tmp)) {
11673 fprintf(stderr, "Error reading tree block\n");
11676 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11677 free_extent_buffer(tmp);
11686 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11690 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11694 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11697 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11699 struct btrfs_block_group_cache *cache;
11700 struct btrfs_path path;
11701 struct extent_buffer *leaf;
11702 struct btrfs_chunk *chunk;
11703 struct btrfs_key key;
11707 btrfs_init_path(&path);
11709 key.type = BTRFS_CHUNK_ITEM_KEY;
11711 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11713 btrfs_release_path(&path);
11718 * We do this in case the block groups were screwed up and had alloc
11719 * bits that aren't actually set on the chunks. This happens with
11720 * restored images every time and could happen in real life I guess.
11722 fs_info->avail_data_alloc_bits = 0;
11723 fs_info->avail_metadata_alloc_bits = 0;
11724 fs_info->avail_system_alloc_bits = 0;
11726 /* First we need to create the in-memory block groups */
11728 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11729 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11731 btrfs_release_path(&path);
11739 leaf = path.nodes[0];
11740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11741 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11746 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11747 btrfs_add_block_group(fs_info, 0,
11748 btrfs_chunk_type(leaf, chunk),
11749 key.objectid, key.offset,
11750 btrfs_chunk_length(leaf, chunk));
11751 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11752 key.offset + btrfs_chunk_length(leaf, chunk));
11757 cache = btrfs_lookup_first_block_group(fs_info, start);
11761 start = cache->key.objectid + cache->key.offset;
11764 btrfs_release_path(&path);
11768 static int reset_balance(struct btrfs_trans_handle *trans,
11769 struct btrfs_fs_info *fs_info)
11771 struct btrfs_root *root = fs_info->tree_root;
11772 struct btrfs_path path;
11773 struct extent_buffer *leaf;
11774 struct btrfs_key key;
11775 int del_slot, del_nr = 0;
11779 btrfs_init_path(&path);
11780 key.objectid = BTRFS_BALANCE_OBJECTID;
11781 key.type = BTRFS_BALANCE_ITEM_KEY;
11783 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11788 goto reinit_data_reloc;
11793 ret = btrfs_del_item(trans, root, &path);
11796 btrfs_release_path(&path);
11798 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11799 key.type = BTRFS_ROOT_ITEM_KEY;
11801 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11805 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11810 ret = btrfs_del_items(trans, root, &path,
11817 btrfs_release_path(&path);
11820 ret = btrfs_search_slot(trans, root, &key, &path,
11827 leaf = path.nodes[0];
11828 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11829 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11831 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11836 del_slot = path.slots[0];
11845 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11849 btrfs_release_path(&path);
11852 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11853 key.type = BTRFS_ROOT_ITEM_KEY;
11854 key.offset = (u64)-1;
11855 root = btrfs_read_fs_root(fs_info, &key);
11856 if (IS_ERR(root)) {
11857 fprintf(stderr, "Error reading data reloc tree\n");
11858 ret = PTR_ERR(root);
11861 record_root_in_trans(trans, root);
11862 ret = btrfs_fsck_reinit_root(trans, root, 0);
11865 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11867 btrfs_release_path(&path);
11871 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11872 struct btrfs_fs_info *fs_info)
11878 * The only reason we don't do this is because right now we're just
11879 * walking the trees we find and pinning down their bytes, we don't look
11880 * at any of the leaves. In order to do mixed groups we'd have to check
11881 * the leaves of any fs roots and pin down the bytes for any file
11882 * extents we find. Not hard but why do it if we don't have to?
11884 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11885 fprintf(stderr, "We don't support re-initing the extent tree "
11886 "for mixed block groups yet, please notify a btrfs "
11887 "developer you want to do this so they can add this "
11888 "functionality.\n");
11893 * first we need to walk all of the trees except the extent tree and pin
11894 * down the bytes that are in use so we don't overwrite any existing
11897 ret = pin_metadata_blocks(fs_info);
11899 fprintf(stderr, "error pinning down used bytes\n");
11904 * Need to drop all the block groups since we're going to recreate all
11907 btrfs_free_block_groups(fs_info);
11908 ret = reset_block_groups(fs_info);
11910 fprintf(stderr, "error resetting the block groups\n");
11914 /* Ok we can allocate now, reinit the extent root */
11915 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11917 fprintf(stderr, "extent root initialization failed\n");
11919 * When the transaction code is updated we should end the
11920 * transaction, but for now progs only knows about commit so
11921 * just return an error.
11927 * Now we have all the in-memory block groups setup so we can make
11928 * allocations properly, and the metadata we care about is safe since we
11929 * pinned all of it above.
11932 struct btrfs_block_group_cache *cache;
11934 cache = btrfs_lookup_first_block_group(fs_info, start);
11937 start = cache->key.objectid + cache->key.offset;
11938 ret = btrfs_insert_item(trans, fs_info->extent_root,
11939 &cache->key, &cache->item,
11940 sizeof(cache->item));
11942 fprintf(stderr, "Error adding block group\n");
11945 btrfs_extent_post_op(trans, fs_info->extent_root);
11948 ret = reset_balance(trans, fs_info);
11950 fprintf(stderr, "error resetting the pending balance\n");
11955 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11957 struct btrfs_path path;
11958 struct btrfs_trans_handle *trans;
11959 struct btrfs_key key;
11962 printf("Recowing metadata block %llu\n", eb->start);
11963 key.objectid = btrfs_header_owner(eb);
11964 key.type = BTRFS_ROOT_ITEM_KEY;
11965 key.offset = (u64)-1;
11967 root = btrfs_read_fs_root(root->fs_info, &key);
11968 if (IS_ERR(root)) {
11969 fprintf(stderr, "Couldn't find owner root %llu\n",
11971 return PTR_ERR(root);
11974 trans = btrfs_start_transaction(root, 1);
11976 return PTR_ERR(trans);
11978 btrfs_init_path(&path);
11979 path.lowest_level = btrfs_header_level(eb);
11980 if (path.lowest_level)
11981 btrfs_node_key_to_cpu(eb, &key, 0);
11983 btrfs_item_key_to_cpu(eb, &key, 0);
11985 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11986 btrfs_commit_transaction(trans, root);
11987 btrfs_release_path(&path);
11991 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11993 struct btrfs_path path;
11994 struct btrfs_trans_handle *trans;
11995 struct btrfs_key key;
11998 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11999 bad->key.type, bad->key.offset);
12000 key.objectid = bad->root_id;
12001 key.type = BTRFS_ROOT_ITEM_KEY;
12002 key.offset = (u64)-1;
12004 root = btrfs_read_fs_root(root->fs_info, &key);
12005 if (IS_ERR(root)) {
12006 fprintf(stderr, "Couldn't find owner root %llu\n",
12008 return PTR_ERR(root);
12011 trans = btrfs_start_transaction(root, 1);
12013 return PTR_ERR(trans);
12015 btrfs_init_path(&path);
12016 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12022 ret = btrfs_del_item(trans, root, &path);
12024 btrfs_commit_transaction(trans, root);
12025 btrfs_release_path(&path);
12029 static int zero_log_tree(struct btrfs_root *root)
12031 struct btrfs_trans_handle *trans;
12034 trans = btrfs_start_transaction(root, 1);
12035 if (IS_ERR(trans)) {
12036 ret = PTR_ERR(trans);
12039 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12040 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12041 ret = btrfs_commit_transaction(trans, root);
12045 static int populate_csum(struct btrfs_trans_handle *trans,
12046 struct btrfs_root *csum_root, char *buf, u64 start,
12053 while (offset < len) {
12054 sectorsize = csum_root->sectorsize;
12055 ret = read_extent_data(csum_root, buf, start + offset,
12059 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12060 start + offset, buf, sectorsize);
12063 offset += sectorsize;
12068 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12069 struct btrfs_root *csum_root,
12070 struct btrfs_root *cur_root)
12072 struct btrfs_path path;
12073 struct btrfs_key key;
12074 struct extent_buffer *node;
12075 struct btrfs_file_extent_item *fi;
12082 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12086 btrfs_init_path(&path);
12090 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12093 /* Iterate all regular file extents and fill its csum */
12095 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12097 if (key.type != BTRFS_EXTENT_DATA_KEY)
12099 node = path.nodes[0];
12100 slot = path.slots[0];
12101 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12102 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12104 start = btrfs_file_extent_disk_bytenr(node, fi);
12105 len = btrfs_file_extent_disk_num_bytes(node, fi);
12107 ret = populate_csum(trans, csum_root, buf, start, len);
12108 if (ret == -EEXIST)
12114 * TODO: if next leaf is corrupted, jump to nearest next valid
12117 ret = btrfs_next_item(cur_root, &path);
12127 btrfs_release_path(&path);
12132 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12133 struct btrfs_root *csum_root)
12135 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12136 struct btrfs_path path;
12137 struct btrfs_root *tree_root = fs_info->tree_root;
12138 struct btrfs_root *cur_root;
12139 struct extent_buffer *node;
12140 struct btrfs_key key;
12144 btrfs_init_path(&path);
12145 key.objectid = BTRFS_FS_TREE_OBJECTID;
12147 key.type = BTRFS_ROOT_ITEM_KEY;
12148 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12157 node = path.nodes[0];
12158 slot = path.slots[0];
12159 btrfs_item_key_to_cpu(node, &key, slot);
12160 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12162 if (key.type != BTRFS_ROOT_ITEM_KEY)
12164 if (!is_fstree(key.objectid))
12166 key.offset = (u64)-1;
12168 cur_root = btrfs_read_fs_root(fs_info, &key);
12169 if (IS_ERR(cur_root) || !cur_root) {
12170 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12174 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12179 ret = btrfs_next_item(tree_root, &path);
12189 btrfs_release_path(&path);
12193 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12194 struct btrfs_root *csum_root)
12196 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12197 struct btrfs_path path;
12198 struct btrfs_extent_item *ei;
12199 struct extent_buffer *leaf;
12201 struct btrfs_key key;
12204 btrfs_init_path(&path);
12206 key.type = BTRFS_EXTENT_ITEM_KEY;
12208 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12210 btrfs_release_path(&path);
12214 buf = malloc(csum_root->sectorsize);
12216 btrfs_release_path(&path);
12221 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12222 ret = btrfs_next_leaf(extent_root, &path);
12230 leaf = path.nodes[0];
12232 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12233 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12238 ei = btrfs_item_ptr(leaf, path.slots[0],
12239 struct btrfs_extent_item);
12240 if (!(btrfs_extent_flags(leaf, ei) &
12241 BTRFS_EXTENT_FLAG_DATA)) {
12246 ret = populate_csum(trans, csum_root, buf, key.objectid,
12253 btrfs_release_path(&path);
12259 * Recalculate the csum and put it into the csum tree.
12261 * Extent tree init will wipe out all the extent info, so in that case, we
12262 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12263 * will use fs/subvol trees to init the csum tree.
12265 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12266 struct btrfs_root *csum_root,
12267 int search_fs_tree)
12269 if (search_fs_tree)
12270 return fill_csum_tree_from_fs(trans, csum_root);
12272 return fill_csum_tree_from_extent(trans, csum_root);
12275 static void free_roots_info_cache(void)
12277 if (!roots_info_cache)
12280 while (!cache_tree_empty(roots_info_cache)) {
12281 struct cache_extent *entry;
12282 struct root_item_info *rii;
12284 entry = first_cache_extent(roots_info_cache);
12287 remove_cache_extent(roots_info_cache, entry);
12288 rii = container_of(entry, struct root_item_info, cache_extent);
12292 free(roots_info_cache);
12293 roots_info_cache = NULL;
12296 static int build_roots_info_cache(struct btrfs_fs_info *info)
12299 struct btrfs_key key;
12300 struct extent_buffer *leaf;
12301 struct btrfs_path path;
12303 if (!roots_info_cache) {
12304 roots_info_cache = malloc(sizeof(*roots_info_cache));
12305 if (!roots_info_cache)
12307 cache_tree_init(roots_info_cache);
12310 btrfs_init_path(&path);
12312 key.type = BTRFS_EXTENT_ITEM_KEY;
12314 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12317 leaf = path.nodes[0];
12320 struct btrfs_key found_key;
12321 struct btrfs_extent_item *ei;
12322 struct btrfs_extent_inline_ref *iref;
12323 int slot = path.slots[0];
12328 struct cache_extent *entry;
12329 struct root_item_info *rii;
12331 if (slot >= btrfs_header_nritems(leaf)) {
12332 ret = btrfs_next_leaf(info->extent_root, &path);
12339 leaf = path.nodes[0];
12340 slot = path.slots[0];
12343 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12345 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12346 found_key.type != BTRFS_METADATA_ITEM_KEY)
12349 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12350 flags = btrfs_extent_flags(leaf, ei);
12352 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12353 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12356 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12357 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12358 level = found_key.offset;
12360 struct btrfs_tree_block_info *binfo;
12362 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12363 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12364 level = btrfs_tree_block_level(leaf, binfo);
12368 * For a root extent, it must be of the following type and the
12369 * first (and only one) iref in the item.
12371 type = btrfs_extent_inline_ref_type(leaf, iref);
12372 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12375 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12376 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12378 rii = malloc(sizeof(struct root_item_info));
12383 rii->cache_extent.start = root_id;
12384 rii->cache_extent.size = 1;
12385 rii->level = (u8)-1;
12386 entry = &rii->cache_extent;
12387 ret = insert_cache_extent(roots_info_cache, entry);
12390 rii = container_of(entry, struct root_item_info,
12394 ASSERT(rii->cache_extent.start == root_id);
12395 ASSERT(rii->cache_extent.size == 1);
12397 if (level > rii->level || rii->level == (u8)-1) {
12398 rii->level = level;
12399 rii->bytenr = found_key.objectid;
12400 rii->gen = btrfs_extent_generation(leaf, ei);
12401 rii->node_count = 1;
12402 } else if (level == rii->level) {
12410 btrfs_release_path(&path);
12415 static int maybe_repair_root_item(struct btrfs_path *path,
12416 const struct btrfs_key *root_key,
12417 const int read_only_mode)
12419 const u64 root_id = root_key->objectid;
12420 struct cache_extent *entry;
12421 struct root_item_info *rii;
12422 struct btrfs_root_item ri;
12423 unsigned long offset;
12425 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12428 "Error: could not find extent items for root %llu\n",
12429 root_key->objectid);
12433 rii = container_of(entry, struct root_item_info, cache_extent);
12434 ASSERT(rii->cache_extent.start == root_id);
12435 ASSERT(rii->cache_extent.size == 1);
12437 if (rii->node_count != 1) {
12439 "Error: could not find btree root extent for root %llu\n",
12444 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12445 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12447 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12448 btrfs_root_level(&ri) != rii->level ||
12449 btrfs_root_generation(&ri) != rii->gen) {
12452 * If we're in repair mode but our caller told us to not update
12453 * the root item, i.e. just check if it needs to be updated, don't
12454 * print this message, since the caller will call us again shortly
12455 * for the same root item without read only mode (the caller will
12456 * open a transaction first).
12458 if (!(read_only_mode && repair))
12460 "%sroot item for root %llu,"
12461 " current bytenr %llu, current gen %llu, current level %u,"
12462 " new bytenr %llu, new gen %llu, new level %u\n",
12463 (read_only_mode ? "" : "fixing "),
12465 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12466 btrfs_root_level(&ri),
12467 rii->bytenr, rii->gen, rii->level);
12469 if (btrfs_root_generation(&ri) > rii->gen) {
12471 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12472 root_id, btrfs_root_generation(&ri), rii->gen);
12476 if (!read_only_mode) {
12477 btrfs_set_root_bytenr(&ri, rii->bytenr);
12478 btrfs_set_root_level(&ri, rii->level);
12479 btrfs_set_root_generation(&ri, rii->gen);
12480 write_extent_buffer(path->nodes[0], &ri,
12481 offset, sizeof(ri));
12491 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12492 * caused read-only snapshots to be corrupted if they were created at a moment
12493 * when the source subvolume/snapshot had orphan items. The issue was that the
12494 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12495 * node instead of the post orphan cleanup root node.
12496 * So this function, and its callees, just detects and fixes those cases. Even
12497 * though the regression was for read-only snapshots, this function applies to
12498 * any snapshot/subvolume root.
12499 * This must be run before any other repair code - not doing it so, makes other
12500 * repair code delete or modify backrefs in the extent tree for example, which
12501 * will result in an inconsistent fs after repairing the root items.
12503 static int repair_root_items(struct btrfs_fs_info *info)
12505 struct btrfs_path path;
12506 struct btrfs_key key;
12507 struct extent_buffer *leaf;
12508 struct btrfs_trans_handle *trans = NULL;
12511 int need_trans = 0;
12513 btrfs_init_path(&path);
12515 ret = build_roots_info_cache(info);
12519 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12520 key.type = BTRFS_ROOT_ITEM_KEY;
12525 * Avoid opening and committing transactions if a leaf doesn't have
12526 * any root items that need to be fixed, so that we avoid rotating
12527 * backup roots unnecessarily.
12530 trans = btrfs_start_transaction(info->tree_root, 1);
12531 if (IS_ERR(trans)) {
12532 ret = PTR_ERR(trans);
12537 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12541 leaf = path.nodes[0];
12544 struct btrfs_key found_key;
12546 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12547 int no_more_keys = find_next_key(&path, &key);
12549 btrfs_release_path(&path);
12551 ret = btrfs_commit_transaction(trans,
12563 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12565 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12567 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12570 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12574 if (!trans && repair) {
12577 btrfs_release_path(&path);
12587 free_roots_info_cache();
12588 btrfs_release_path(&path);
12590 btrfs_commit_transaction(trans, info->tree_root);
12597 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12599 struct btrfs_trans_handle *trans;
12600 struct btrfs_block_group_cache *bg_cache;
12604 /* Clear all free space cache inodes and its extent data */
12606 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12609 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12612 current = bg_cache->key.objectid + bg_cache->key.offset;
12615 /* Don't forget to set cache_generation to -1 */
12616 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12617 if (IS_ERR(trans)) {
12618 error("failed to update super block cache generation");
12619 return PTR_ERR(trans);
12621 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12622 btrfs_commit_transaction(trans, fs_info->tree_root);
12627 const char * const cmd_check_usage[] = {
12628 "btrfs check [options] <device>",
12629 "Check structural integrity of a filesystem (unmounted).",
12630 "Check structural integrity of an unmounted filesystem. Verify internal",
12631 "trees' consistency and item connectivity. In the repair mode try to",
12632 "fix the problems found. ",
12633 "WARNING: the repair mode is considered dangerous",
12635 "-s|--super <superblock> use this superblock copy",
12636 "-b|--backup use the first valid backup root copy",
12637 "--repair try to repair the filesystem",
12638 "--readonly run in read-only mode (default)",
12639 "--init-csum-tree create a new CRC tree",
12640 "--init-extent-tree create a new extent tree",
12641 "--mode <MODE> allows choice of memory/IO trade-offs",
12642 " where MODE is one of:",
12643 " original - read inodes and extents to memory (requires",
12644 " more memory, does less IO)",
12645 " lowmem - try to use less memory but read blocks again",
12647 "--check-data-csum verify checksums of data blocks",
12648 "-Q|--qgroup-report print a report on qgroup consistency",
12649 "-E|--subvol-extents <subvolid>",
12650 " print subvolume extents and sharing state",
12651 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12652 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12653 "-p|--progress indicate progress",
12654 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12658 int cmd_check(int argc, char **argv)
12660 struct cache_tree root_cache;
12661 struct btrfs_root *root;
12662 struct btrfs_fs_info *info;
12665 u64 tree_root_bytenr = 0;
12666 u64 chunk_root_bytenr = 0;
12667 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12671 int init_csum_tree = 0;
12673 int clear_space_cache = 0;
12674 int qgroup_report = 0;
12675 int qgroups_repaired = 0;
12676 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12680 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12681 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12682 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12683 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12684 static const struct option long_options[] = {
12685 { "super", required_argument, NULL, 's' },
12686 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12687 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12688 { "init-csum-tree", no_argument, NULL,
12689 GETOPT_VAL_INIT_CSUM },
12690 { "init-extent-tree", no_argument, NULL,
12691 GETOPT_VAL_INIT_EXTENT },
12692 { "check-data-csum", no_argument, NULL,
12693 GETOPT_VAL_CHECK_CSUM },
12694 { "backup", no_argument, NULL, 'b' },
12695 { "subvol-extents", required_argument, NULL, 'E' },
12696 { "qgroup-report", no_argument, NULL, 'Q' },
12697 { "tree-root", required_argument, NULL, 'r' },
12698 { "chunk-root", required_argument, NULL,
12699 GETOPT_VAL_CHUNK_TREE },
12700 { "progress", no_argument, NULL, 'p' },
12701 { "mode", required_argument, NULL,
12703 { "clear-space-cache", required_argument, NULL,
12704 GETOPT_VAL_CLEAR_SPACE_CACHE},
12705 { NULL, 0, NULL, 0}
12708 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12712 case 'a': /* ignored */ break;
12714 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12717 num = arg_strtou64(optarg);
12718 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12720 "super mirror should be less than %d",
12721 BTRFS_SUPER_MIRROR_MAX);
12724 bytenr = btrfs_sb_offset(((int)num));
12725 printf("using SB copy %llu, bytenr %llu\n", num,
12726 (unsigned long long)bytenr);
12732 subvolid = arg_strtou64(optarg);
12735 tree_root_bytenr = arg_strtou64(optarg);
12737 case GETOPT_VAL_CHUNK_TREE:
12738 chunk_root_bytenr = arg_strtou64(optarg);
12741 ctx.progress_enabled = true;
12745 usage(cmd_check_usage);
12746 case GETOPT_VAL_REPAIR:
12747 printf("enabling repair mode\n");
12749 ctree_flags |= OPEN_CTREE_WRITES;
12751 case GETOPT_VAL_READONLY:
12754 case GETOPT_VAL_INIT_CSUM:
12755 printf("Creating a new CRC tree\n");
12756 init_csum_tree = 1;
12758 ctree_flags |= OPEN_CTREE_WRITES;
12760 case GETOPT_VAL_INIT_EXTENT:
12761 init_extent_tree = 1;
12762 ctree_flags |= (OPEN_CTREE_WRITES |
12763 OPEN_CTREE_NO_BLOCK_GROUPS);
12766 case GETOPT_VAL_CHECK_CSUM:
12767 check_data_csum = 1;
12769 case GETOPT_VAL_MODE:
12770 check_mode = parse_check_mode(optarg);
12771 if (check_mode == CHECK_MODE_UNKNOWN) {
12772 error("unknown mode: %s", optarg);
12776 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12777 if (strcmp(optarg, "v1") == 0) {
12778 clear_space_cache = 1;
12779 } else if (strcmp(optarg, "v2") == 0) {
12780 clear_space_cache = 2;
12781 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12784 "invalid argument to --clear-space-cache, must be v1 or v2");
12787 ctree_flags |= OPEN_CTREE_WRITES;
12792 if (check_argc_exact(argc - optind, 1))
12793 usage(cmd_check_usage);
12795 if (ctx.progress_enabled) {
12796 ctx.tp = TASK_NOTHING;
12797 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12800 /* This check is the only reason for --readonly to exist */
12801 if (readonly && repair) {
12802 error("repair options are not compatible with --readonly");
12807 * Not supported yet
12809 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12810 error("low memory mode doesn't support repair yet");
12815 cache_tree_init(&root_cache);
12817 if((ret = check_mounted(argv[optind])) < 0) {
12818 error("could not check mount status: %s", strerror(-ret));
12822 error("%s is currently mounted, aborting", argv[optind]);
12828 /* only allow partial opening under repair mode */
12830 ctree_flags |= OPEN_CTREE_PARTIAL;
12832 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12833 chunk_root_bytenr, ctree_flags);
12835 error("cannot open file system");
12841 global_info = info;
12842 root = info->fs_root;
12843 if (clear_space_cache == 1) {
12844 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12846 "free space cache v2 detected, use --clear-space-cache v2");
12850 printf("Clearing free space cache\n");
12851 ret = clear_free_space_cache(info);
12853 error("failed to clear free space cache");
12856 printf("Free space cache cleared\n");
12859 } else if (clear_space_cache == 2) {
12860 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12861 printf("no free space cache v2 to clear\n");
12865 printf("Clear free space cache v2\n");
12866 ret = btrfs_clear_free_space_tree(info);
12868 error("failed to clear free space cache v2: %d", ret);
12871 printf("free space cache v2 cleared\n");
12877 * repair mode will force us to commit transaction which
12878 * will make us fail to load log tree when mounting.
12880 if (repair && btrfs_super_log_root(info->super_copy)) {
12881 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12887 ret = zero_log_tree(root);
12890 error("failed to zero log tree: %d", ret);
12895 uuid_unparse(info->super_copy->fsid, uuidbuf);
12896 if (qgroup_report) {
12897 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12899 ret = qgroup_verify_all(info);
12906 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12907 subvolid, argv[optind], uuidbuf);
12908 ret = print_extent_state(info, subvolid);
12912 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12914 if (!extent_buffer_uptodate(info->tree_root->node) ||
12915 !extent_buffer_uptodate(info->dev_root->node) ||
12916 !extent_buffer_uptodate(info->chunk_root->node)) {
12917 error("critical roots corrupted, unable to check the filesystem");
12923 if (init_extent_tree || init_csum_tree) {
12924 struct btrfs_trans_handle *trans;
12926 trans = btrfs_start_transaction(info->extent_root, 0);
12927 if (IS_ERR(trans)) {
12928 error("error starting transaction");
12929 ret = PTR_ERR(trans);
12934 if (init_extent_tree) {
12935 printf("Creating a new extent tree\n");
12936 ret = reinit_extent_tree(trans, info);
12942 if (init_csum_tree) {
12943 printf("Reinitialize checksum tree\n");
12944 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12946 error("checksum tree initialization failed: %d",
12953 ret = fill_csum_tree(trans, info->csum_root,
12957 error("checksum tree refilling failed: %d", ret);
12962 * Ok now we commit and run the normal fsck, which will add
12963 * extent entries for all of the items it finds.
12965 ret = btrfs_commit_transaction(trans, info->extent_root);
12970 if (!extent_buffer_uptodate(info->extent_root->node)) {
12971 error("critical: extent_root, unable to check the filesystem");
12976 if (!extent_buffer_uptodate(info->csum_root->node)) {
12977 error("critical: csum_root, unable to check the filesystem");
12983 if (!ctx.progress_enabled)
12984 fprintf(stderr, "checking extents\n");
12985 if (check_mode == CHECK_MODE_LOWMEM)
12986 ret = check_chunks_and_extents_v2(root);
12988 ret = check_chunks_and_extents(root);
12992 "errors found in extent allocation tree or chunk allocation");
12994 ret = repair_root_items(info);
12997 error("failed to repair root items: %s", strerror(-ret));
13001 fprintf(stderr, "Fixed %d roots.\n", ret);
13003 } else if (ret > 0) {
13005 "Found %d roots with an outdated root item.\n",
13008 "Please run a filesystem check with the option --repair to fix them.\n");
13014 if (!ctx.progress_enabled) {
13015 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13016 fprintf(stderr, "checking free space tree\n");
13018 fprintf(stderr, "checking free space cache\n");
13020 ret = check_space_cache(root);
13023 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13024 error("errors found in free space tree");
13026 error("errors found in free space cache");
13031 * We used to have to have these hole extents in between our real
13032 * extents so if we don't have this flag set we need to make sure there
13033 * are no gaps in the file extents for inodes, otherwise we can just
13034 * ignore it when this happens.
13036 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13037 if (!ctx.progress_enabled)
13038 fprintf(stderr, "checking fs roots\n");
13039 if (check_mode == CHECK_MODE_LOWMEM)
13040 ret = check_fs_roots_v2(root->fs_info);
13042 ret = check_fs_roots(root, &root_cache);
13045 error("errors found in fs roots");
13049 fprintf(stderr, "checking csums\n");
13050 ret = check_csums(root);
13053 error("errors found in csum tree");
13057 fprintf(stderr, "checking root refs\n");
13058 /* For low memory mode, check_fs_roots_v2 handles root refs */
13059 if (check_mode != CHECK_MODE_LOWMEM) {
13060 ret = check_root_refs(root, &root_cache);
13063 error("errors found in root refs");
13068 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13069 struct extent_buffer *eb;
13071 eb = list_first_entry(&root->fs_info->recow_ebs,
13072 struct extent_buffer, recow);
13073 list_del_init(&eb->recow);
13074 ret = recow_extent_buffer(root, eb);
13077 error("fails to fix transid errors");
13082 while (!list_empty(&delete_items)) {
13083 struct bad_item *bad;
13085 bad = list_first_entry(&delete_items, struct bad_item, list);
13086 list_del_init(&bad->list);
13088 ret = delete_bad_item(root, bad);
13094 if (info->quota_enabled) {
13095 fprintf(stderr, "checking quota groups\n");
13096 ret = qgroup_verify_all(info);
13099 error("failed to check quota groups");
13103 ret = repair_qgroups(info, &qgroups_repaired);
13106 error("failed to repair quota groups");
13112 if (!list_empty(&root->fs_info->recow_ebs)) {
13113 error("transid errors in file system");
13118 if (found_old_backref) { /*
13119 * there was a disk format change when mixed
13120 * backref was in testing tree. The old format
13121 * existed about one week.
13123 printf("\n * Found old mixed backref format. "
13124 "The old format is not supported! *"
13125 "\n * Please mount the FS in readonly mode, "
13126 "backup data and re-format the FS. *\n\n");
13129 printf("found %llu bytes used, ",
13130 (unsigned long long)bytes_used);
13132 printf("error(s) found\n");
13134 printf("no error found\n");
13135 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13136 printf("total tree bytes: %llu\n",
13137 (unsigned long long)total_btree_bytes);
13138 printf("total fs tree bytes: %llu\n",
13139 (unsigned long long)total_fs_tree_bytes);
13140 printf("total extent tree bytes: %llu\n",
13141 (unsigned long long)total_extent_tree_bytes);
13142 printf("btree space waste bytes: %llu\n",
13143 (unsigned long long)btree_space_waste);
13144 printf("file data blocks allocated: %llu\n referenced %llu\n",
13145 (unsigned long long)data_bytes_allocated,
13146 (unsigned long long)data_bytes_referenced);
13148 free_qgroup_counts();
13149 free_root_recs_tree(&root_cache);
13153 if (ctx.progress_enabled)
13154 task_deinit(ctx.info);