2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1534 fprintf(stderr, "invalid location in dir item %u\n",
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1572 if (name_len <= BTRFS_NAME_LEN) {
1576 len = BTRFS_NAME_LEN;
1577 error = REF_ERR_NAME_TOO_LONG;
1579 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1580 add_inode_backref(inode_cache, key->objectid, key->offset,
1581 index, namebuf, len, 0, key->type, error);
1583 len = sizeof(*ref) + name_len;
1584 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1590 static int process_inode_extref(struct extent_buffer *eb,
1591 int slot, struct btrfs_key *key,
1592 struct shared_node *active_node)
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_extref *extref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_extref_name_len(eb, extref);
1611 index = btrfs_inode_extref_index(eb, extref);
1612 parent = btrfs_inode_extref_parent(eb, extref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf,
1621 (unsigned long)(extref + 1), len);
1622 add_inode_backref(inode_cache, key->objectid, parent,
1623 index, namebuf, len, 0, key->type, error);
1625 len = sizeof(*extref) + name_len;
1626 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1633 static int count_csum_range(struct btrfs_root *root, u64 start,
1634 u64 len, u64 *found)
1636 struct btrfs_key key;
1637 struct btrfs_path path;
1638 struct extent_buffer *leaf;
1643 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1645 btrfs_init_path(&path);
1647 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1649 key.type = BTRFS_EXTENT_CSUM_KEY;
1651 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1655 if (ret > 0 && path.slots[0] > 0) {
1656 leaf = path.nodes[0];
1657 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1658 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1659 key.type == BTRFS_EXTENT_CSUM_KEY)
1664 leaf = path.nodes[0];
1665 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1666 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1671 leaf = path.nodes[0];
1674 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1675 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1676 key.type != BTRFS_EXTENT_CSUM_KEY)
1679 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1680 if (key.offset >= start + len)
1683 if (key.offset > start)
1686 size = btrfs_item_size_nr(leaf, path.slots[0]);
1687 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1688 if (csum_end > start) {
1689 size = min(csum_end - start, len);
1698 btrfs_release_path(&path);
1704 static int process_file_extent(struct btrfs_root *root,
1705 struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1709 struct inode_record *rec;
1710 struct btrfs_file_extent_item *fi;
1712 u64 disk_bytenr = 0;
1713 u64 extent_offset = 0;
1714 u64 mask = root->sectorsize - 1;
1718 rec = active_node->current;
1719 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1720 rec->found_file_extent = 1;
1722 if (rec->extent_start == (u64)-1) {
1723 rec->extent_start = key->offset;
1724 rec->extent_end = key->offset;
1727 if (rec->extent_end > key->offset)
1728 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1729 else if (rec->extent_end < key->offset) {
1730 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1731 key->offset - rec->extent_end);
1736 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1737 extent_type = btrfs_file_extent_type(eb, fi);
1739 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1740 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1742 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1743 rec->found_size += num_bytes;
1744 num_bytes = (num_bytes + mask) & ~mask;
1745 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1746 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1747 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1748 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1749 extent_offset = btrfs_file_extent_offset(eb, fi);
1750 if (num_bytes == 0 || (num_bytes & mask))
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 if (num_bytes + extent_offset >
1753 btrfs_file_extent_ram_bytes(eb, fi))
1754 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1755 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1756 (btrfs_file_extent_compression(eb, fi) ||
1757 btrfs_file_extent_encryption(eb, fi) ||
1758 btrfs_file_extent_other_encoding(eb, fi)))
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 if (disk_bytenr > 0)
1761 rec->found_size += num_bytes;
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1765 rec->extent_end = key->offset + num_bytes;
1768 * The data reloc tree will copy full extents into its inode and then
1769 * copy the corresponding csums. Because the extent it copied could be
1770 * a preallocated extent that hasn't been written to yet there may be no
1771 * csums to copy, ergo we won't have csums for our file extent. This is
1772 * ok so just don't bother checking csums if the inode belongs to the
1775 if (disk_bytenr > 0 &&
1776 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1778 if (btrfs_file_extent_compression(eb, fi))
1779 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1781 disk_bytenr += extent_offset;
1783 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1786 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1788 rec->found_csum_item = 1;
1789 if (found < num_bytes)
1790 rec->some_csum_missing = 1;
1791 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1793 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1799 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1800 struct walk_control *wc)
1802 struct btrfs_key key;
1806 struct cache_tree *inode_cache;
1807 struct shared_node *active_node;
1809 if (wc->root_level == wc->active_node &&
1810 btrfs_root_refs(&root->root_item) == 0)
1813 active_node = wc->nodes[wc->active_node];
1814 inode_cache = &active_node->inode_cache;
1815 nritems = btrfs_header_nritems(eb);
1816 for (i = 0; i < nritems; i++) {
1817 btrfs_item_key_to_cpu(eb, &key, i);
1819 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1821 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1824 if (active_node->current == NULL ||
1825 active_node->current->ino < key.objectid) {
1826 if (active_node->current) {
1827 active_node->current->checked = 1;
1828 maybe_free_inode_rec(inode_cache,
1829 active_node->current);
1831 active_node->current = get_inode_rec(inode_cache,
1833 BUG_ON(IS_ERR(active_node->current));
1836 case BTRFS_DIR_ITEM_KEY:
1837 case BTRFS_DIR_INDEX_KEY:
1838 ret = process_dir_item(eb, i, &key, active_node);
1840 case BTRFS_INODE_REF_KEY:
1841 ret = process_inode_ref(eb, i, &key, active_node);
1843 case BTRFS_INODE_EXTREF_KEY:
1844 ret = process_inode_extref(eb, i, &key, active_node);
1846 case BTRFS_INODE_ITEM_KEY:
1847 ret = process_inode_item(eb, i, &key, active_node);
1849 case BTRFS_EXTENT_DATA_KEY:
1850 ret = process_file_extent(root, eb, i, &key,
1861 u64 bytenr[BTRFS_MAX_LEVEL];
1862 u64 refs[BTRFS_MAX_LEVEL];
1863 int need_check[BTRFS_MAX_LEVEL];
1866 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1867 struct node_refs *nrefs, u64 level);
1868 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1869 unsigned int ext_ref);
1872 * Returns >0 Found error, not fatal, should continue
1873 * Returns <0 Fatal error, must exit the whole check
1874 * Returns 0 No errors found
1876 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1877 struct node_refs *nrefs, int *level, int ext_ref)
1879 struct extent_buffer *cur = path->nodes[0];
1880 struct btrfs_key key;
1884 int root_level = btrfs_header_level(root->node);
1886 int ret = 0; /* Final return value */
1887 int err = 0; /* Positive error bitmap */
1889 cur_bytenr = cur->start;
1891 /* skip to first inode item or the first inode number change */
1892 nritems = btrfs_header_nritems(cur);
1893 for (i = 0; i < nritems; i++) {
1894 btrfs_item_key_to_cpu(cur, &key, i);
1896 first_ino = key.objectid;
1897 if (key.type == BTRFS_INODE_ITEM_KEY ||
1898 (first_ino && first_ino != key.objectid))
1902 path->slots[0] = nritems;
1908 err |= check_inode_item(root, path, ext_ref);
1910 if (err & LAST_ITEM)
1913 /* still have inode items in thie leaf */
1914 if (cur->start == cur_bytenr)
1918 * we have switched to another leaf, above nodes may
1919 * have changed, here walk down the path, if a node
1920 * or leaf is shared, check whether we can skip this
1923 for (i = root_level; i >= 0; i--) {
1924 if (path->nodes[i]->start == nrefs->bytenr[i])
1927 ret = update_nodes_refs(root,
1928 path->nodes[i]->start,
1933 if (!nrefs->need_check[i]) {
1939 for (i = 0; i < *level; i++) {
1940 free_extent_buffer(path->nodes[i]);
1941 path->nodes[i] = NULL;
1950 static void reada_walk_down(struct btrfs_root *root,
1951 struct extent_buffer *node, int slot)
1960 level = btrfs_header_level(node);
1964 nritems = btrfs_header_nritems(node);
1965 blocksize = root->nodesize;
1966 for (i = slot; i < nritems; i++) {
1967 bytenr = btrfs_node_blockptr(node, i);
1968 ptr_gen = btrfs_node_ptr_generation(node, i);
1969 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1974 * Check the child node/leaf by the following condition:
1975 * 1. the first item key of the node/leaf should be the same with the one
1977 * 2. block in parent node should match the child node/leaf.
1978 * 3. generation of parent node and child's header should be consistent.
1980 * Or the child node/leaf pointed by the key in parent is not valid.
1982 * We hope to check leaf owner too, but since subvol may share leaves,
1983 * which makes leaf owner check not so strong, key check should be
1984 * sufficient enough for that case.
1986 static int check_child_node(struct extent_buffer *parent, int slot,
1987 struct extent_buffer *child)
1989 struct btrfs_key parent_key;
1990 struct btrfs_key child_key;
1993 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1994 if (btrfs_header_level(child) == 0)
1995 btrfs_item_key_to_cpu(child, &child_key, 0);
1997 btrfs_node_key_to_cpu(child, &child_key, 0);
1999 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2002 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2003 parent_key.objectid, parent_key.type, parent_key.offset,
2004 child_key.objectid, child_key.type, child_key.offset);
2006 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2008 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2009 btrfs_node_blockptr(parent, slot),
2010 btrfs_header_bytenr(child));
2012 if (btrfs_node_ptr_generation(parent, slot) !=
2013 btrfs_header_generation(child)) {
2015 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2016 btrfs_header_generation(child),
2017 btrfs_node_ptr_generation(parent, slot));
2023 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2024 * in every fs or file tree check. Here we find its all root ids, and only check
2025 * it in the fs or file tree which has the smallest root id.
2027 static int need_check(struct btrfs_root *root, struct ulist *roots)
2029 struct rb_node *node;
2030 struct ulist_node *u;
2032 if (roots->nnodes == 1)
2035 node = rb_first(&roots->root);
2036 u = rb_entry(node, struct ulist_node, rb_node);
2038 * current root id is not smallest, we skip it and let it be checked
2039 * in the fs or file tree who hash the smallest root id.
2041 if (root->objectid != u->val)
2048 * for a tree node or leaf, we record its reference count, so later if we still
2049 * process this node or leaf, don't need to compute its reference count again.
2051 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2052 struct node_refs *nrefs, u64 level)
2056 struct ulist *roots;
2058 if (nrefs->bytenr[level] != bytenr) {
2059 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2060 level, 1, &refs, NULL);
2064 nrefs->bytenr[level] = bytenr;
2065 nrefs->refs[level] = refs;
2067 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2072 check = need_check(root, roots);
2074 nrefs->need_check[level] = check;
2076 nrefs->need_check[level] = 1;
2083 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2084 struct walk_control *wc, int *level,
2085 struct node_refs *nrefs)
2087 enum btrfs_tree_block_status status;
2090 struct extent_buffer *next;
2091 struct extent_buffer *cur;
2096 WARN_ON(*level < 0);
2097 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2099 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2100 refs = nrefs->refs[*level];
2103 ret = btrfs_lookup_extent_info(NULL, root,
2104 path->nodes[*level]->start,
2105 *level, 1, &refs, NULL);
2110 nrefs->bytenr[*level] = path->nodes[*level]->start;
2111 nrefs->refs[*level] = refs;
2115 ret = enter_shared_node(root, path->nodes[*level]->start,
2123 while (*level >= 0) {
2124 WARN_ON(*level < 0);
2125 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2126 cur = path->nodes[*level];
2128 if (btrfs_header_level(cur) != *level)
2131 if (path->slots[*level] >= btrfs_header_nritems(cur))
2134 ret = process_one_leaf(root, cur, wc);
2139 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2140 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2141 blocksize = root->nodesize;
2143 if (bytenr == nrefs->bytenr[*level - 1]) {
2144 refs = nrefs->refs[*level - 1];
2146 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2147 *level - 1, 1, &refs, NULL);
2151 nrefs->bytenr[*level - 1] = bytenr;
2152 nrefs->refs[*level - 1] = refs;
2157 ret = enter_shared_node(root, bytenr, refs,
2160 path->slots[*level]++;
2165 next = btrfs_find_tree_block(root, bytenr, blocksize);
2166 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2167 free_extent_buffer(next);
2168 reada_walk_down(root, cur, path->slots[*level]);
2169 next = read_tree_block(root, bytenr, blocksize,
2171 if (!extent_buffer_uptodate(next)) {
2172 struct btrfs_key node_key;
2174 btrfs_node_key_to_cpu(path->nodes[*level],
2176 path->slots[*level]);
2177 btrfs_add_corrupt_extent_record(root->fs_info,
2179 path->nodes[*level]->start,
2180 root->nodesize, *level);
2186 ret = check_child_node(cur, path->slots[*level], next);
2188 free_extent_buffer(next);
2193 if (btrfs_is_leaf(next))
2194 status = btrfs_check_leaf(root, NULL, next);
2196 status = btrfs_check_node(root, NULL, next);
2197 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2198 free_extent_buffer(next);
2203 *level = *level - 1;
2204 free_extent_buffer(path->nodes[*level]);
2205 path->nodes[*level] = next;
2206 path->slots[*level] = 0;
2209 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2213 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2214 unsigned int ext_ref);
2217 * Returns >0 Found error, should continue
2218 * Returns <0 Fatal error, must exit the whole check
2219 * Returns 0 No errors found
2221 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2222 int *level, struct node_refs *nrefs, int ext_ref)
2224 enum btrfs_tree_block_status status;
2227 struct extent_buffer *next;
2228 struct extent_buffer *cur;
2232 WARN_ON(*level < 0);
2233 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235 ret = update_nodes_refs(root, path->nodes[*level]->start,
2240 while (*level >= 0) {
2241 WARN_ON(*level < 0);
2242 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2243 cur = path->nodes[*level];
2245 if (btrfs_header_level(cur) != *level)
2248 if (path->slots[*level] >= btrfs_header_nritems(cur))
2250 /* Don't forgot to check leaf/node validation */
2252 ret = btrfs_check_leaf(root, NULL, cur);
2253 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2257 ret = process_one_leaf_v2(root, path, nrefs,
2261 ret = btrfs_check_node(root, NULL, cur);
2262 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2267 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2268 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2269 blocksize = root->nodesize;
2271 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2274 if (!nrefs->need_check[*level - 1]) {
2275 path->slots[*level]++;
2279 next = btrfs_find_tree_block(root, bytenr, blocksize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root, bytenr, blocksize,
2285 if (!extent_buffer_uptodate(next)) {
2286 struct btrfs_key node_key;
2288 btrfs_node_key_to_cpu(path->nodes[*level],
2290 path->slots[*level]);
2291 btrfs_add_corrupt_extent_record(root->fs_info,
2293 path->nodes[*level]->start,
2294 root->nodesize, *level);
2300 ret = check_child_node(cur, path->slots[*level], next);
2304 if (btrfs_is_leaf(next))
2305 status = btrfs_check_leaf(root, NULL, next);
2307 status = btrfs_check_node(root, NULL, next);
2308 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309 free_extent_buffer(next);
2314 *level = *level - 1;
2315 free_extent_buffer(path->nodes[*level]);
2316 path->nodes[*level] = next;
2317 path->slots[*level] = 0;
2322 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2323 struct walk_control *wc, int *level)
2326 struct extent_buffer *leaf;
2328 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2329 leaf = path->nodes[i];
2330 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2335 free_extent_buffer(path->nodes[*level]);
2336 path->nodes[*level] = NULL;
2337 BUG_ON(*level > wc->active_node);
2338 if (*level == wc->active_node)
2339 leave_shared_node(root, wc, *level);
2346 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2350 struct extent_buffer *leaf;
2352 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2353 leaf = path->nodes[i];
2354 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2359 free_extent_buffer(path->nodes[*level]);
2360 path->nodes[*level] = NULL;
2367 static int check_root_dir(struct inode_record *rec)
2369 struct inode_backref *backref;
2372 if (!rec->found_inode_item || rec->errors)
2374 if (rec->nlink != 1 || rec->found_link != 0)
2376 if (list_empty(&rec->backrefs))
2378 backref = to_inode_backref(rec->backrefs.next);
2379 if (!backref->found_inode_ref)
2381 if (backref->index != 0 || backref->namelen != 2 ||
2382 memcmp(backref->name, "..", 2))
2384 if (backref->found_dir_index || backref->found_dir_item)
2391 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root, struct btrfs_path *path,
2393 struct inode_record *rec)
2395 struct btrfs_inode_item *ei;
2396 struct btrfs_key key;
2399 key.objectid = rec->ino;
2400 key.type = BTRFS_INODE_ITEM_KEY;
2401 key.offset = (u64)-1;
2403 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2407 if (!path->slots[0]) {
2414 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2415 if (key.objectid != rec->ino) {
2420 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2421 struct btrfs_inode_item);
2422 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2423 btrfs_mark_buffer_dirty(path->nodes[0]);
2424 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2425 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2426 root->root_key.objectid);
2428 btrfs_release_path(path);
2432 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2433 struct btrfs_root *root,
2434 struct btrfs_path *path,
2435 struct inode_record *rec)
2439 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2440 btrfs_release_path(path);
2442 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2446 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2447 struct btrfs_root *root,
2448 struct btrfs_path *path,
2449 struct inode_record *rec)
2451 struct btrfs_inode_item *ei;
2452 struct btrfs_key key;
2455 key.objectid = rec->ino;
2456 key.type = BTRFS_INODE_ITEM_KEY;
2459 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2466 /* Since ret == 0, no need to check anything */
2467 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2468 struct btrfs_inode_item);
2469 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2470 btrfs_mark_buffer_dirty(path->nodes[0]);
2471 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2472 printf("reset nbytes for ino %llu root %llu\n",
2473 rec->ino, root->root_key.objectid);
2475 btrfs_release_path(path);
2479 static int add_missing_dir_index(struct btrfs_root *root,
2480 struct cache_tree *inode_cache,
2481 struct inode_record *rec,
2482 struct inode_backref *backref)
2484 struct btrfs_path path;
2485 struct btrfs_trans_handle *trans;
2486 struct btrfs_dir_item *dir_item;
2487 struct extent_buffer *leaf;
2488 struct btrfs_key key;
2489 struct btrfs_disk_key disk_key;
2490 struct inode_record *dir_rec;
2491 unsigned long name_ptr;
2492 u32 data_size = sizeof(*dir_item) + backref->namelen;
2495 trans = btrfs_start_transaction(root, 1);
2497 return PTR_ERR(trans);
2499 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2500 (unsigned long long)rec->ino);
2502 btrfs_init_path(&path);
2503 key.objectid = backref->dir;
2504 key.type = BTRFS_DIR_INDEX_KEY;
2505 key.offset = backref->index;
2506 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2509 leaf = path.nodes[0];
2510 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2512 disk_key.objectid = cpu_to_le64(rec->ino);
2513 disk_key.type = BTRFS_INODE_ITEM_KEY;
2514 disk_key.offset = 0;
2516 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2517 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2518 btrfs_set_dir_data_len(leaf, dir_item, 0);
2519 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2520 name_ptr = (unsigned long)(dir_item + 1);
2521 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2522 btrfs_mark_buffer_dirty(leaf);
2523 btrfs_release_path(&path);
2524 btrfs_commit_transaction(trans, root);
2526 backref->found_dir_index = 1;
2527 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2528 BUG_ON(IS_ERR(dir_rec));
2531 dir_rec->found_size += backref->namelen;
2532 if (dir_rec->found_size == dir_rec->isize &&
2533 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2534 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2535 if (dir_rec->found_size != dir_rec->isize)
2536 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2541 static int delete_dir_index(struct btrfs_root *root,
2542 struct inode_backref *backref)
2544 struct btrfs_trans_handle *trans;
2545 struct btrfs_dir_item *di;
2546 struct btrfs_path path;
2549 trans = btrfs_start_transaction(root, 1);
2551 return PTR_ERR(trans);
2553 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2554 (unsigned long long)backref->dir,
2555 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2556 (unsigned long long)root->objectid);
2558 btrfs_init_path(&path);
2559 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2560 backref->name, backref->namelen,
2561 backref->index, -1);
2564 btrfs_release_path(&path);
2565 btrfs_commit_transaction(trans, root);
2572 ret = btrfs_del_item(trans, root, &path);
2574 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2576 btrfs_release_path(&path);
2577 btrfs_commit_transaction(trans, root);
2581 static int create_inode_item(struct btrfs_root *root,
2582 struct inode_record *rec,
2585 struct btrfs_trans_handle *trans;
2586 struct btrfs_inode_item inode_item;
2587 time_t now = time(NULL);
2590 trans = btrfs_start_transaction(root, 1);
2591 if (IS_ERR(trans)) {
2592 ret = PTR_ERR(trans);
2596 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2597 "be incomplete, please check permissions and content after "
2598 "the fsck completes.\n", (unsigned long long)root->objectid,
2599 (unsigned long long)rec->ino);
2601 memset(&inode_item, 0, sizeof(inode_item));
2602 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2604 btrfs_set_stack_inode_nlink(&inode_item, 1);
2606 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2607 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2608 if (rec->found_dir_item) {
2609 if (rec->found_file_extent)
2610 fprintf(stderr, "root %llu inode %llu has both a dir "
2611 "item and extents, unsure if it is a dir or a "
2612 "regular file so setting it as a directory\n",
2613 (unsigned long long)root->objectid,
2614 (unsigned long long)rec->ino);
2615 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2616 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2617 } else if (!rec->found_dir_item) {
2618 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2619 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2621 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2622 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2623 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2624 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2625 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2626 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2627 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2628 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2630 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2632 btrfs_commit_transaction(trans, root);
2636 static int repair_inode_backrefs(struct btrfs_root *root,
2637 struct inode_record *rec,
2638 struct cache_tree *inode_cache,
2641 struct inode_backref *tmp, *backref;
2642 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2646 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2647 if (!delete && rec->ino == root_dirid) {
2648 if (!rec->found_inode_item) {
2649 ret = create_inode_item(root, rec, 1);
2656 /* Index 0 for root dir's are special, don't mess with it */
2657 if (rec->ino == root_dirid && backref->index == 0)
2661 ((backref->found_dir_index && !backref->found_inode_ref) ||
2662 (backref->found_dir_index && backref->found_inode_ref &&
2663 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2664 ret = delete_dir_index(root, backref);
2668 list_del(&backref->list);
2672 if (!delete && !backref->found_dir_index &&
2673 backref->found_dir_item && backref->found_inode_ref) {
2674 ret = add_missing_dir_index(root, inode_cache, rec,
2679 if (backref->found_dir_item &&
2680 backref->found_dir_index &&
2681 backref->found_dir_index) {
2682 if (!backref->errors &&
2683 backref->found_inode_ref) {
2684 list_del(&backref->list);
2690 if (!delete && (!backref->found_dir_index &&
2691 !backref->found_dir_item &&
2692 backref->found_inode_ref)) {
2693 struct btrfs_trans_handle *trans;
2694 struct btrfs_key location;
2696 ret = check_dir_conflict(root, backref->name,
2702 * let nlink fixing routine to handle it,
2703 * which can do it better.
2708 location.objectid = rec->ino;
2709 location.type = BTRFS_INODE_ITEM_KEY;
2710 location.offset = 0;
2712 trans = btrfs_start_transaction(root, 1);
2713 if (IS_ERR(trans)) {
2714 ret = PTR_ERR(trans);
2717 fprintf(stderr, "adding missing dir index/item pair "
2719 (unsigned long long)rec->ino);
2720 ret = btrfs_insert_dir_item(trans, root, backref->name,
2722 backref->dir, &location,
2723 imode_to_type(rec->imode),
2726 btrfs_commit_transaction(trans, root);
2730 if (!delete && (backref->found_inode_ref &&
2731 backref->found_dir_index &&
2732 backref->found_dir_item &&
2733 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2734 !rec->found_inode_item)) {
2735 ret = create_inode_item(root, rec, 0);
2742 return ret ? ret : repaired;
2746 * To determine the file type for nlink/inode_item repair
2748 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2749 * Return -ENOENT if file type is not found.
2751 static int find_file_type(struct inode_record *rec, u8 *type)
2753 struct inode_backref *backref;
2755 /* For inode item recovered case */
2756 if (rec->found_inode_item) {
2757 *type = imode_to_type(rec->imode);
2761 list_for_each_entry(backref, &rec->backrefs, list) {
2762 if (backref->found_dir_index || backref->found_dir_item) {
2763 *type = backref->filetype;
2771 * To determine the file name for nlink repair
2773 * Return 0 if file name is found, set name and namelen.
2774 * Return -ENOENT if file name is not found.
2776 static int find_file_name(struct inode_record *rec,
2777 char *name, int *namelen)
2779 struct inode_backref *backref;
2781 list_for_each_entry(backref, &rec->backrefs, list) {
2782 if (backref->found_dir_index || backref->found_dir_item ||
2783 backref->found_inode_ref) {
2784 memcpy(name, backref->name, backref->namelen);
2785 *namelen = backref->namelen;
2792 /* Reset the nlink of the inode to the correct one */
2793 static int reset_nlink(struct btrfs_trans_handle *trans,
2794 struct btrfs_root *root,
2795 struct btrfs_path *path,
2796 struct inode_record *rec)
2798 struct inode_backref *backref;
2799 struct inode_backref *tmp;
2800 struct btrfs_key key;
2801 struct btrfs_inode_item *inode_item;
2804 /* We don't believe this either, reset it and iterate backref */
2805 rec->found_link = 0;
2807 /* Remove all backref including the valid ones */
2808 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2809 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2810 backref->index, backref->name,
2811 backref->namelen, 0);
2815 /* remove invalid backref, so it won't be added back */
2816 if (!(backref->found_dir_index &&
2817 backref->found_dir_item &&
2818 backref->found_inode_ref)) {
2819 list_del(&backref->list);
2826 /* Set nlink to 0 */
2827 key.objectid = rec->ino;
2828 key.type = BTRFS_INODE_ITEM_KEY;
2830 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2837 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2838 struct btrfs_inode_item);
2839 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2840 btrfs_mark_buffer_dirty(path->nodes[0]);
2841 btrfs_release_path(path);
2844 * Add back valid inode_ref/dir_item/dir_index,
2845 * add_link() will handle the nlink inc, so new nlink must be correct
2847 list_for_each_entry(backref, &rec->backrefs, list) {
2848 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2849 backref->name, backref->namelen,
2850 backref->filetype, &backref->index, 1);
2855 btrfs_release_path(path);
2859 static int get_highest_inode(struct btrfs_trans_handle *trans,
2860 struct btrfs_root *root,
2861 struct btrfs_path *path,
2864 struct btrfs_key key, found_key;
2867 btrfs_init_path(path);
2868 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2870 key.type = BTRFS_INODE_ITEM_KEY;
2871 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2873 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2874 path->slots[0] - 1);
2875 *highest_ino = found_key.objectid;
2878 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2880 btrfs_release_path(path);
2884 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2885 struct btrfs_root *root,
2886 struct btrfs_path *path,
2887 struct inode_record *rec)
2889 char *dir_name = "lost+found";
2890 char namebuf[BTRFS_NAME_LEN] = {0};
2895 int name_recovered = 0;
2896 int type_recovered = 0;
2900 * Get file name and type first before these invalid inode ref
2901 * are deleted by remove_all_invalid_backref()
2903 name_recovered = !find_file_name(rec, namebuf, &namelen);
2904 type_recovered = !find_file_type(rec, &type);
2906 if (!name_recovered) {
2907 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2908 rec->ino, rec->ino);
2909 namelen = count_digits(rec->ino);
2910 sprintf(namebuf, "%llu", rec->ino);
2913 if (!type_recovered) {
2914 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2916 type = BTRFS_FT_REG_FILE;
2920 ret = reset_nlink(trans, root, path, rec);
2923 "Failed to reset nlink for inode %llu: %s\n",
2924 rec->ino, strerror(-ret));
2928 if (rec->found_link == 0) {
2929 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2933 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2934 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2937 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2938 dir_name, strerror(-ret));
2941 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2942 namebuf, namelen, type, NULL, 1);
2944 * Add ".INO" suffix several times to handle case where
2945 * "FILENAME.INO" is already taken by another file.
2947 while (ret == -EEXIST) {
2949 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2951 if (namelen + count_digits(rec->ino) + 1 >
2956 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2958 namelen += count_digits(rec->ino) + 1;
2959 ret = btrfs_add_link(trans, root, rec->ino,
2960 lost_found_ino, namebuf,
2961 namelen, type, NULL, 1);
2965 "Failed to link the inode %llu to %s dir: %s\n",
2966 rec->ino, dir_name, strerror(-ret));
2970 * Just increase the found_link, don't actually add the
2971 * backref. This will make things easier and this inode
2972 * record will be freed after the repair is done.
2973 * So fsck will not report problem about this inode.
2976 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2977 namelen, namebuf, dir_name);
2979 printf("Fixed the nlink of inode %llu\n", rec->ino);
2982 * Clear the flag anyway, or we will loop forever for the same inode
2983 * as it will not be removed from the bad inode list and the dead loop
2986 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2987 btrfs_release_path(path);
2992 * Check if there is any normal(reg or prealloc) file extent for given
2994 * This is used to determine the file type when neither its dir_index/item or
2995 * inode_item exists.
2997 * This will *NOT* report error, if any error happens, just consider it does
2998 * not have any normal file extent.
3000 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3002 struct btrfs_path path;
3003 struct btrfs_key key;
3004 struct btrfs_key found_key;
3005 struct btrfs_file_extent_item *fi;
3009 btrfs_init_path(&path);
3011 key.type = BTRFS_EXTENT_DATA_KEY;
3014 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3019 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3020 ret = btrfs_next_leaf(root, &path);
3027 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3029 if (found_key.objectid != ino ||
3030 found_key.type != BTRFS_EXTENT_DATA_KEY)
3032 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3033 struct btrfs_file_extent_item);
3034 type = btrfs_file_extent_type(path.nodes[0], fi);
3035 if (type != BTRFS_FILE_EXTENT_INLINE) {
3041 btrfs_release_path(&path);
3045 static u32 btrfs_type_to_imode(u8 type)
3047 static u32 imode_by_btrfs_type[] = {
3048 [BTRFS_FT_REG_FILE] = S_IFREG,
3049 [BTRFS_FT_DIR] = S_IFDIR,
3050 [BTRFS_FT_CHRDEV] = S_IFCHR,
3051 [BTRFS_FT_BLKDEV] = S_IFBLK,
3052 [BTRFS_FT_FIFO] = S_IFIFO,
3053 [BTRFS_FT_SOCK] = S_IFSOCK,
3054 [BTRFS_FT_SYMLINK] = S_IFLNK,
3057 return imode_by_btrfs_type[(type)];
3060 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3061 struct btrfs_root *root,
3062 struct btrfs_path *path,
3063 struct inode_record *rec)
3067 int type_recovered = 0;
3070 printf("Trying to rebuild inode:%llu\n", rec->ino);
3072 type_recovered = !find_file_type(rec, &filetype);
3075 * Try to determine inode type if type not found.
3077 * For found regular file extent, it must be FILE.
3078 * For found dir_item/index, it must be DIR.
3080 * For undetermined one, use FILE as fallback.
3083 * 1. If found backref(inode_index/item is already handled) to it,
3085 * Need new inode-inode ref structure to allow search for that.
3087 if (!type_recovered) {
3088 if (rec->found_file_extent &&
3089 find_normal_file_extent(root, rec->ino)) {
3091 filetype = BTRFS_FT_REG_FILE;
3092 } else if (rec->found_dir_item) {
3094 filetype = BTRFS_FT_DIR;
3095 } else if (!list_empty(&rec->orphan_extents)) {
3097 filetype = BTRFS_FT_REG_FILE;
3099 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3102 filetype = BTRFS_FT_REG_FILE;
3106 ret = btrfs_new_inode(trans, root, rec->ino,
3107 mode | btrfs_type_to_imode(filetype));
3112 * Here inode rebuild is done, we only rebuild the inode item,
3113 * don't repair the nlink(like move to lost+found).
3114 * That is the job of nlink repair.
3116 * We just fill the record and return
3118 rec->found_dir_item = 1;
3119 rec->imode = mode | btrfs_type_to_imode(filetype);
3121 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3122 /* Ensure the inode_nlinks repair function will be called */
3123 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3128 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3129 struct btrfs_root *root,
3130 struct btrfs_path *path,
3131 struct inode_record *rec)
3133 struct orphan_data_extent *orphan;
3134 struct orphan_data_extent *tmp;
3137 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3139 * Check for conflicting file extents
3141 * Here we don't know whether the extents is compressed or not,
3142 * so we can only assume it not compressed nor data offset,
3143 * and use its disk_len as extent length.
3145 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3146 orphan->offset, orphan->disk_len, 0);
3147 btrfs_release_path(path);
3152 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3153 orphan->disk_bytenr, orphan->disk_len);
3154 ret = btrfs_free_extent(trans,
3155 root->fs_info->extent_root,
3156 orphan->disk_bytenr, orphan->disk_len,
3157 0, root->objectid, orphan->objectid,
3162 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3163 orphan->offset, orphan->disk_bytenr,
3164 orphan->disk_len, orphan->disk_len);
3168 /* Update file size info */
3169 rec->found_size += orphan->disk_len;
3170 if (rec->found_size == rec->nbytes)
3171 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3173 /* Update the file extent hole info too */
3174 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3178 if (RB_EMPTY_ROOT(&rec->holes))
3179 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3181 list_del(&orphan->list);
3184 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3189 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3190 struct btrfs_root *root,
3191 struct btrfs_path *path,
3192 struct inode_record *rec)
3194 struct rb_node *node;
3195 struct file_extent_hole *hole;
3199 node = rb_first(&rec->holes);
3203 hole = rb_entry(node, struct file_extent_hole, node);
3204 ret = btrfs_punch_hole(trans, root, rec->ino,
3205 hole->start, hole->len);
3208 ret = del_file_extent_hole(&rec->holes, hole->start,
3212 if (RB_EMPTY_ROOT(&rec->holes))
3213 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3214 node = rb_first(&rec->holes);
3216 /* special case for a file losing all its file extent */
3218 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3219 round_up(rec->isize, root->sectorsize));
3223 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3224 rec->ino, root->objectid);
3229 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3231 struct btrfs_trans_handle *trans;
3232 struct btrfs_path path;
3235 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3236 I_ERR_NO_ORPHAN_ITEM |
3237 I_ERR_LINK_COUNT_WRONG |
3238 I_ERR_NO_INODE_ITEM |
3239 I_ERR_FILE_EXTENT_ORPHAN |
3240 I_ERR_FILE_EXTENT_DISCOUNT|
3241 I_ERR_FILE_NBYTES_WRONG)))
3245 * For nlink repair, it may create a dir and add link, so
3246 * 2 for parent(256)'s dir_index and dir_item
3247 * 2 for lost+found dir's inode_item and inode_ref
3248 * 1 for the new inode_ref of the file
3249 * 2 for lost+found dir's dir_index and dir_item for the file
3251 trans = btrfs_start_transaction(root, 7);
3253 return PTR_ERR(trans);
3255 btrfs_init_path(&path);
3256 if (rec->errors & I_ERR_NO_INODE_ITEM)
3257 ret = repair_inode_no_item(trans, root, &path, rec);
3258 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3259 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3260 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3261 ret = repair_inode_discount_extent(trans, root, &path, rec);
3262 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3263 ret = repair_inode_isize(trans, root, &path, rec);
3264 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3265 ret = repair_inode_orphan_item(trans, root, &path, rec);
3266 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3267 ret = repair_inode_nlinks(trans, root, &path, rec);
3268 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3269 ret = repair_inode_nbytes(trans, root, &path, rec);
3270 btrfs_commit_transaction(trans, root);
3271 btrfs_release_path(&path);
3275 static int check_inode_recs(struct btrfs_root *root,
3276 struct cache_tree *inode_cache)
3278 struct cache_extent *cache;
3279 struct ptr_node *node;
3280 struct inode_record *rec;
3281 struct inode_backref *backref;
3286 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3288 if (btrfs_root_refs(&root->root_item) == 0) {
3289 if (!cache_tree_empty(inode_cache))
3290 fprintf(stderr, "warning line %d\n", __LINE__);
3295 * We need to repair backrefs first because we could change some of the
3296 * errors in the inode recs.
3298 * We also need to go through and delete invalid backrefs first and then
3299 * add the correct ones second. We do this because we may get EEXIST
3300 * when adding back the correct index because we hadn't yet deleted the
3303 * For example, if we were missing a dir index then the directories
3304 * isize would be wrong, so if we fixed the isize to what we thought it
3305 * would be and then fixed the backref we'd still have a invalid fs, so
3306 * we need to add back the dir index and then check to see if the isize
3311 if (stage == 3 && !err)
3314 cache = search_cache_extent(inode_cache, 0);
3315 while (repair && cache) {
3316 node = container_of(cache, struct ptr_node, cache);
3318 cache = next_cache_extent(cache);
3320 /* Need to free everything up and rescan */
3322 remove_cache_extent(inode_cache, &node->cache);
3324 free_inode_rec(rec);
3328 if (list_empty(&rec->backrefs))
3331 ret = repair_inode_backrefs(root, rec, inode_cache,
3345 rec = get_inode_rec(inode_cache, root_dirid, 0);
3346 BUG_ON(IS_ERR(rec));
3348 ret = check_root_dir(rec);
3350 fprintf(stderr, "root %llu root dir %llu error\n",
3351 (unsigned long long)root->root_key.objectid,
3352 (unsigned long long)root_dirid);
3353 print_inode_error(root, rec);
3358 struct btrfs_trans_handle *trans;
3360 trans = btrfs_start_transaction(root, 1);
3361 if (IS_ERR(trans)) {
3362 err = PTR_ERR(trans);
3367 "root %llu missing its root dir, recreating\n",
3368 (unsigned long long)root->objectid);
3370 ret = btrfs_make_root_dir(trans, root, root_dirid);
3373 btrfs_commit_transaction(trans, root);
3377 fprintf(stderr, "root %llu root dir %llu not found\n",
3378 (unsigned long long)root->root_key.objectid,
3379 (unsigned long long)root_dirid);
3383 cache = search_cache_extent(inode_cache, 0);
3386 node = container_of(cache, struct ptr_node, cache);
3388 remove_cache_extent(inode_cache, &node->cache);
3390 if (rec->ino == root_dirid ||
3391 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3392 free_inode_rec(rec);
3396 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3397 ret = check_orphan_item(root, rec->ino);
3399 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3400 if (can_free_inode_rec(rec)) {
3401 free_inode_rec(rec);
3406 if (!rec->found_inode_item)
3407 rec->errors |= I_ERR_NO_INODE_ITEM;
3408 if (rec->found_link != rec->nlink)
3409 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3411 ret = try_repair_inode(root, rec);
3412 if (ret == 0 && can_free_inode_rec(rec)) {
3413 free_inode_rec(rec);
3419 if (!(repair && ret == 0))
3421 print_inode_error(root, rec);
3422 list_for_each_entry(backref, &rec->backrefs, list) {
3423 if (!backref->found_dir_item)
3424 backref->errors |= REF_ERR_NO_DIR_ITEM;
3425 if (!backref->found_dir_index)
3426 backref->errors |= REF_ERR_NO_DIR_INDEX;
3427 if (!backref->found_inode_ref)
3428 backref->errors |= REF_ERR_NO_INODE_REF;
3429 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3430 " namelen %u name %s filetype %d errors %x",
3431 (unsigned long long)backref->dir,
3432 (unsigned long long)backref->index,
3433 backref->namelen, backref->name,
3434 backref->filetype, backref->errors);
3435 print_ref_error(backref->errors);
3437 free_inode_rec(rec);
3439 return (error > 0) ? -1 : 0;
3442 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3445 struct cache_extent *cache;
3446 struct root_record *rec = NULL;
3449 cache = lookup_cache_extent(root_cache, objectid, 1);
3451 rec = container_of(cache, struct root_record, cache);
3453 rec = calloc(1, sizeof(*rec));
3455 return ERR_PTR(-ENOMEM);
3456 rec->objectid = objectid;
3457 INIT_LIST_HEAD(&rec->backrefs);
3458 rec->cache.start = objectid;
3459 rec->cache.size = 1;
3461 ret = insert_cache_extent(root_cache, &rec->cache);
3463 return ERR_PTR(-EEXIST);
3468 static struct root_backref *get_root_backref(struct root_record *rec,
3469 u64 ref_root, u64 dir, u64 index,
3470 const char *name, int namelen)
3472 struct root_backref *backref;
3474 list_for_each_entry(backref, &rec->backrefs, list) {
3475 if (backref->ref_root != ref_root || backref->dir != dir ||
3476 backref->namelen != namelen)
3478 if (memcmp(name, backref->name, namelen))
3483 backref = calloc(1, sizeof(*backref) + namelen + 1);
3486 backref->ref_root = ref_root;
3488 backref->index = index;
3489 backref->namelen = namelen;
3490 memcpy(backref->name, name, namelen);
3491 backref->name[namelen] = '\0';
3492 list_add_tail(&backref->list, &rec->backrefs);
3496 static void free_root_record(struct cache_extent *cache)
3498 struct root_record *rec;
3499 struct root_backref *backref;
3501 rec = container_of(cache, struct root_record, cache);
3502 while (!list_empty(&rec->backrefs)) {
3503 backref = to_root_backref(rec->backrefs.next);
3504 list_del(&backref->list);
3511 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3513 static int add_root_backref(struct cache_tree *root_cache,
3514 u64 root_id, u64 ref_root, u64 dir, u64 index,
3515 const char *name, int namelen,
3516 int item_type, int errors)
3518 struct root_record *rec;
3519 struct root_backref *backref;
3521 rec = get_root_rec(root_cache, root_id);
3522 BUG_ON(IS_ERR(rec));
3523 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3526 backref->errors |= errors;
3528 if (item_type != BTRFS_DIR_ITEM_KEY) {
3529 if (backref->found_dir_index || backref->found_back_ref ||
3530 backref->found_forward_ref) {
3531 if (backref->index != index)
3532 backref->errors |= REF_ERR_INDEX_UNMATCH;
3534 backref->index = index;
3538 if (item_type == BTRFS_DIR_ITEM_KEY) {
3539 if (backref->found_forward_ref)
3541 backref->found_dir_item = 1;
3542 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3543 backref->found_dir_index = 1;
3544 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3545 if (backref->found_forward_ref)
3546 backref->errors |= REF_ERR_DUP_ROOT_REF;
3547 else if (backref->found_dir_item)
3549 backref->found_forward_ref = 1;
3550 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3551 if (backref->found_back_ref)
3552 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3553 backref->found_back_ref = 1;
3558 if (backref->found_forward_ref && backref->found_dir_item)
3559 backref->reachable = 1;
3563 static int merge_root_recs(struct btrfs_root *root,
3564 struct cache_tree *src_cache,
3565 struct cache_tree *dst_cache)
3567 struct cache_extent *cache;
3568 struct ptr_node *node;
3569 struct inode_record *rec;
3570 struct inode_backref *backref;
3573 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3574 free_inode_recs_tree(src_cache);
3579 cache = search_cache_extent(src_cache, 0);
3582 node = container_of(cache, struct ptr_node, cache);
3584 remove_cache_extent(src_cache, &node->cache);
3587 ret = is_child_root(root, root->objectid, rec->ino);
3593 list_for_each_entry(backref, &rec->backrefs, list) {
3594 BUG_ON(backref->found_inode_ref);
3595 if (backref->found_dir_item)
3596 add_root_backref(dst_cache, rec->ino,
3597 root->root_key.objectid, backref->dir,
3598 backref->index, backref->name,
3599 backref->namelen, BTRFS_DIR_ITEM_KEY,
3601 if (backref->found_dir_index)
3602 add_root_backref(dst_cache, rec->ino,
3603 root->root_key.objectid, backref->dir,
3604 backref->index, backref->name,
3605 backref->namelen, BTRFS_DIR_INDEX_KEY,
3609 free_inode_rec(rec);
3616 static int check_root_refs(struct btrfs_root *root,
3617 struct cache_tree *root_cache)
3619 struct root_record *rec;
3620 struct root_record *ref_root;
3621 struct root_backref *backref;
3622 struct cache_extent *cache;
3628 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3629 BUG_ON(IS_ERR(rec));
3632 /* fixme: this can not detect circular references */
3635 cache = search_cache_extent(root_cache, 0);
3639 rec = container_of(cache, struct root_record, cache);
3640 cache = next_cache_extent(cache);
3642 if (rec->found_ref == 0)
3645 list_for_each_entry(backref, &rec->backrefs, list) {
3646 if (!backref->reachable)
3649 ref_root = get_root_rec(root_cache,
3651 BUG_ON(IS_ERR(ref_root));
3652 if (ref_root->found_ref > 0)
3655 backref->reachable = 0;
3657 if (rec->found_ref == 0)
3663 cache = search_cache_extent(root_cache, 0);
3667 rec = container_of(cache, struct root_record, cache);
3668 cache = next_cache_extent(cache);
3670 if (rec->found_ref == 0 &&
3671 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3672 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3673 ret = check_orphan_item(root->fs_info->tree_root,
3679 * If we don't have a root item then we likely just have
3680 * a dir item in a snapshot for this root but no actual
3681 * ref key or anything so it's meaningless.
3683 if (!rec->found_root_item)
3686 fprintf(stderr, "fs tree %llu not referenced\n",
3687 (unsigned long long)rec->objectid);
3691 if (rec->found_ref > 0 && !rec->found_root_item)
3693 list_for_each_entry(backref, &rec->backrefs, list) {
3694 if (!backref->found_dir_item)
3695 backref->errors |= REF_ERR_NO_DIR_ITEM;
3696 if (!backref->found_dir_index)
3697 backref->errors |= REF_ERR_NO_DIR_INDEX;
3698 if (!backref->found_back_ref)
3699 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3700 if (!backref->found_forward_ref)
3701 backref->errors |= REF_ERR_NO_ROOT_REF;
3702 if (backref->reachable && backref->errors)
3709 fprintf(stderr, "fs tree %llu refs %u %s\n",
3710 (unsigned long long)rec->objectid, rec->found_ref,
3711 rec->found_root_item ? "" : "not found");
3713 list_for_each_entry(backref, &rec->backrefs, list) {
3714 if (!backref->reachable)
3716 if (!backref->errors && rec->found_root_item)
3718 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3719 " index %llu namelen %u name %s errors %x\n",
3720 (unsigned long long)backref->ref_root,
3721 (unsigned long long)backref->dir,
3722 (unsigned long long)backref->index,
3723 backref->namelen, backref->name,
3725 print_ref_error(backref->errors);
3728 return errors > 0 ? 1 : 0;
3731 static int process_root_ref(struct extent_buffer *eb, int slot,
3732 struct btrfs_key *key,
3733 struct cache_tree *root_cache)
3739 struct btrfs_root_ref *ref;
3740 char namebuf[BTRFS_NAME_LEN];
3743 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3745 dirid = btrfs_root_ref_dirid(eb, ref);
3746 index = btrfs_root_ref_sequence(eb, ref);
3747 name_len = btrfs_root_ref_name_len(eb, ref);
3749 if (name_len <= BTRFS_NAME_LEN) {
3753 len = BTRFS_NAME_LEN;
3754 error = REF_ERR_NAME_TOO_LONG;
3756 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3758 if (key->type == BTRFS_ROOT_REF_KEY) {
3759 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3760 index, namebuf, len, key->type, error);
3762 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3763 index, namebuf, len, key->type, error);
3768 static void free_corrupt_block(struct cache_extent *cache)
3770 struct btrfs_corrupt_block *corrupt;
3772 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3776 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3779 * Repair the btree of the given root.
3781 * The fix is to remove the node key in corrupt_blocks cache_tree.
3782 * and rebalance the tree.
3783 * After the fix, the btree should be writeable.
3785 static int repair_btree(struct btrfs_root *root,
3786 struct cache_tree *corrupt_blocks)
3788 struct btrfs_trans_handle *trans;
3789 struct btrfs_path path;
3790 struct btrfs_corrupt_block *corrupt;
3791 struct cache_extent *cache;
3792 struct btrfs_key key;
3797 if (cache_tree_empty(corrupt_blocks))
3800 trans = btrfs_start_transaction(root, 1);
3801 if (IS_ERR(trans)) {
3802 ret = PTR_ERR(trans);
3803 fprintf(stderr, "Error starting transaction: %s\n",
3807 btrfs_init_path(&path);
3808 cache = first_cache_extent(corrupt_blocks);
3810 corrupt = container_of(cache, struct btrfs_corrupt_block,
3812 level = corrupt->level;
3813 path.lowest_level = level;
3814 key.objectid = corrupt->key.objectid;
3815 key.type = corrupt->key.type;
3816 key.offset = corrupt->key.offset;
3819 * Here we don't want to do any tree balance, since it may
3820 * cause a balance with corrupted brother leaf/node,
3821 * so ins_len set to 0 here.
3822 * Balance will be done after all corrupt node/leaf is deleted.
3824 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3827 offset = btrfs_node_blockptr(path.nodes[level],
3830 /* Remove the ptr */
3831 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3835 * Remove the corresponding extent
3836 * return value is not concerned.
3838 btrfs_release_path(&path);
3839 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3840 0, root->root_key.objectid,
3842 cache = next_cache_extent(cache);
3845 /* Balance the btree using btrfs_search_slot() */
3846 cache = first_cache_extent(corrupt_blocks);
3848 corrupt = container_of(cache, struct btrfs_corrupt_block,
3850 memcpy(&key, &corrupt->key, sizeof(key));
3851 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3854 /* return will always >0 since it won't find the item */
3856 btrfs_release_path(&path);
3857 cache = next_cache_extent(cache);
3860 btrfs_commit_transaction(trans, root);
3861 btrfs_release_path(&path);
3865 static int check_fs_root(struct btrfs_root *root,
3866 struct cache_tree *root_cache,
3867 struct walk_control *wc)
3873 struct btrfs_path path;
3874 struct shared_node root_node;
3875 struct root_record *rec;
3876 struct btrfs_root_item *root_item = &root->root_item;
3877 struct cache_tree corrupt_blocks;
3878 struct orphan_data_extent *orphan;
3879 struct orphan_data_extent *tmp;
3880 enum btrfs_tree_block_status status;
3881 struct node_refs nrefs;
3884 * Reuse the corrupt_block cache tree to record corrupted tree block
3886 * Unlike the usage in extent tree check, here we do it in a per
3887 * fs/subvol tree base.
3889 cache_tree_init(&corrupt_blocks);
3890 root->fs_info->corrupt_blocks = &corrupt_blocks;
3892 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3893 rec = get_root_rec(root_cache, root->root_key.objectid);
3894 BUG_ON(IS_ERR(rec));
3895 if (btrfs_root_refs(root_item) > 0)
3896 rec->found_root_item = 1;
3899 btrfs_init_path(&path);
3900 memset(&root_node, 0, sizeof(root_node));
3901 cache_tree_init(&root_node.root_cache);
3902 cache_tree_init(&root_node.inode_cache);
3903 memset(&nrefs, 0, sizeof(nrefs));
3905 /* Move the orphan extent record to corresponding inode_record */
3906 list_for_each_entry_safe(orphan, tmp,
3907 &root->orphan_data_extents, list) {
3908 struct inode_record *inode;
3910 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3912 BUG_ON(IS_ERR(inode));
3913 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3914 list_move(&orphan->list, &inode->orphan_extents);
3917 level = btrfs_header_level(root->node);
3918 memset(wc->nodes, 0, sizeof(wc->nodes));
3919 wc->nodes[level] = &root_node;
3920 wc->active_node = level;
3921 wc->root_level = level;
3923 /* We may not have checked the root block, lets do that now */
3924 if (btrfs_is_leaf(root->node))
3925 status = btrfs_check_leaf(root, NULL, root->node);
3927 status = btrfs_check_node(root, NULL, root->node);
3928 if (status != BTRFS_TREE_BLOCK_CLEAN)
3931 if (btrfs_root_refs(root_item) > 0 ||
3932 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3933 path.nodes[level] = root->node;
3934 extent_buffer_get(root->node);
3935 path.slots[level] = 0;
3937 struct btrfs_key key;
3938 struct btrfs_disk_key found_key;
3940 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3941 level = root_item->drop_level;
3942 path.lowest_level = level;
3943 if (level > btrfs_header_level(root->node) ||
3944 level >= BTRFS_MAX_LEVEL) {
3945 error("ignoring invalid drop level: %u", level);
3948 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3951 btrfs_node_key(path.nodes[level], &found_key,
3953 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3954 sizeof(found_key)));
3958 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3964 wret = walk_up_tree(root, &path, wc, &level);
3971 btrfs_release_path(&path);
3973 if (!cache_tree_empty(&corrupt_blocks)) {
3974 struct cache_extent *cache;
3975 struct btrfs_corrupt_block *corrupt;
3977 printf("The following tree block(s) is corrupted in tree %llu:\n",
3978 root->root_key.objectid);
3979 cache = first_cache_extent(&corrupt_blocks);
3981 corrupt = container_of(cache,
3982 struct btrfs_corrupt_block,
3984 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3985 cache->start, corrupt->level,
3986 corrupt->key.objectid, corrupt->key.type,
3987 corrupt->key.offset);
3988 cache = next_cache_extent(cache);
3991 printf("Try to repair the btree for root %llu\n",
3992 root->root_key.objectid);
3993 ret = repair_btree(root, &corrupt_blocks);
3995 fprintf(stderr, "Failed to repair btree: %s\n",
3998 printf("Btree for root %llu is fixed\n",
3999 root->root_key.objectid);
4003 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4007 if (root_node.current) {
4008 root_node.current->checked = 1;
4009 maybe_free_inode_rec(&root_node.inode_cache,
4013 err = check_inode_recs(root, &root_node.inode_cache);
4017 free_corrupt_blocks_tree(&corrupt_blocks);
4018 root->fs_info->corrupt_blocks = NULL;
4019 free_orphan_data_extents(&root->orphan_data_extents);
4023 static int fs_root_objectid(u64 objectid)
4025 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4026 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4028 return is_fstree(objectid);
4031 static int check_fs_roots(struct btrfs_root *root,
4032 struct cache_tree *root_cache)
4034 struct btrfs_path path;
4035 struct btrfs_key key;
4036 struct walk_control wc;
4037 struct extent_buffer *leaf, *tree_node;
4038 struct btrfs_root *tmp_root;
4039 struct btrfs_root *tree_root = root->fs_info->tree_root;
4043 if (ctx.progress_enabled) {
4044 ctx.tp = TASK_FS_ROOTS;
4045 task_start(ctx.info);
4049 * Just in case we made any changes to the extent tree that weren't
4050 * reflected into the free space cache yet.
4053 reset_cached_block_groups(root->fs_info);
4054 memset(&wc, 0, sizeof(wc));
4055 cache_tree_init(&wc.shared);
4056 btrfs_init_path(&path);
4061 key.type = BTRFS_ROOT_ITEM_KEY;
4062 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4067 tree_node = tree_root->node;
4069 if (tree_node != tree_root->node) {
4070 free_root_recs_tree(root_cache);
4071 btrfs_release_path(&path);
4074 leaf = path.nodes[0];
4075 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4076 ret = btrfs_next_leaf(tree_root, &path);
4082 leaf = path.nodes[0];
4084 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4085 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4086 fs_root_objectid(key.objectid)) {
4087 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4088 tmp_root = btrfs_read_fs_root_no_cache(
4089 root->fs_info, &key);
4091 key.offset = (u64)-1;
4092 tmp_root = btrfs_read_fs_root(
4093 root->fs_info, &key);
4095 if (IS_ERR(tmp_root)) {
4099 ret = check_fs_root(tmp_root, root_cache, &wc);
4100 if (ret == -EAGAIN) {
4101 free_root_recs_tree(root_cache);
4102 btrfs_release_path(&path);
4107 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4108 btrfs_free_fs_root(tmp_root);
4109 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4110 key.type == BTRFS_ROOT_BACKREF_KEY) {
4111 process_root_ref(leaf, path.slots[0], &key,
4118 btrfs_release_path(&path);
4120 free_extent_cache_tree(&wc.shared);
4121 if (!cache_tree_empty(&wc.shared))
4122 fprintf(stderr, "warning line %d\n", __LINE__);
4124 task_stop(ctx.info);
4130 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4131 * INODE_REF/INODE_EXTREF match.
4133 * @root: the root of the fs/file tree
4134 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4135 * @key: the key of the DIR_ITEM/DIR_INDEX
4136 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4137 * distinguish root_dir between normal dir/file
4138 * @name: the name in the INODE_REF/INODE_EXTREF
4139 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4140 * @mode: the st_mode of INODE_ITEM
4142 * Return 0 if no error occurred.
4143 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4144 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4146 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4147 * not match for normal dir/file.
4149 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4150 struct btrfs_key *key, u64 index, char *name,
4151 u32 namelen, u32 mode)
4153 struct btrfs_path path;
4154 struct extent_buffer *node;
4155 struct btrfs_dir_item *di;
4156 struct btrfs_key location;
4157 char namebuf[BTRFS_NAME_LEN] = {0};
4167 btrfs_init_path(&path);
4168 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170 ret = DIR_ITEM_MISSING;
4174 /* Process root dir and goto out*/
4177 ret = ROOT_DIR_ERROR;
4179 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4181 ref_key->type == BTRFS_INODE_REF_KEY ?
4183 ref_key->objectid, ref_key->offset,
4184 key->type == BTRFS_DIR_ITEM_KEY ?
4185 "DIR_ITEM" : "DIR_INDEX");
4193 /* Process normal file/dir */
4195 ret = DIR_ITEM_MISSING;
4197 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4199 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4200 ref_key->objectid, ref_key->offset,
4201 key->type == BTRFS_DIR_ITEM_KEY ?
4202 "DIR_ITEM" : "DIR_INDEX",
4203 key->objectid, key->offset, namelen, name,
4204 imode_to_type(mode));
4208 /* Check whether inode_id/filetype/name match */
4209 node = path.nodes[0];
4210 slot = path.slots[0];
4211 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4212 total = btrfs_item_size_nr(node, slot);
4213 while (cur < total) {
4214 ret = DIR_ITEM_MISMATCH;
4215 name_len = btrfs_dir_name_len(node, di);
4216 data_len = btrfs_dir_data_len(node, di);
4218 btrfs_dir_item_key_to_cpu(node, di, &location);
4219 if (location.objectid != ref_key->objectid ||
4220 location.type != BTRFS_INODE_ITEM_KEY ||
4221 location.offset != 0)
4224 filetype = btrfs_dir_type(node, di);
4225 if (imode_to_type(mode) != filetype)
4228 if (name_len <= BTRFS_NAME_LEN) {
4231 len = BTRFS_NAME_LEN;
4232 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4234 key->type == BTRFS_DIR_ITEM_KEY ?
4235 "DIR_ITEM" : "DIR_INDEX",
4236 key->objectid, key->offset, name_len);
4238 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4239 if (len != namelen || strncmp(namebuf, name, len))
4245 len = sizeof(*di) + name_len + data_len;
4246 di = (struct btrfs_dir_item *)((char *)di + len);
4249 if (ret == DIR_ITEM_MISMATCH)
4251 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4253 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4254 ref_key->objectid, ref_key->offset,
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, namelen, name,
4258 imode_to_type(mode));
4260 btrfs_release_path(&path);
4265 * Traverse the given INODE_REF and call find_dir_item() to find related
4266 * DIR_ITEM/DIR_INDEX.
4268 * @root: the root of the fs/file tree
4269 * @ref_key: the key of the INODE_REF
4270 * @refs: the count of INODE_REF
4271 * @mode: the st_mode of INODE_ITEM
4273 * Return 0 if no error occurred.
4275 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4276 struct extent_buffer *node, int slot, u64 *refs,
4279 struct btrfs_key key;
4280 struct btrfs_inode_ref *ref;
4281 char namebuf[BTRFS_NAME_LEN] = {0};
4289 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4290 total = btrfs_item_size_nr(node, slot);
4293 /* Update inode ref count */
4296 index = btrfs_inode_ref_index(node, ref);
4297 name_len = btrfs_inode_ref_name_len(node, ref);
4298 if (name_len <= BTRFS_NAME_LEN) {
4301 len = BTRFS_NAME_LEN;
4302 warning("root %llu INODE_REF[%llu %llu] name too long",
4303 root->objectid, ref_key->objectid, ref_key->offset);
4306 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4308 /* Check root dir ref name */
4309 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4310 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4311 root->objectid, ref_key->objectid, ref_key->offset,
4313 err |= ROOT_DIR_ERROR;
4316 /* Find related DIR_INDEX */
4317 key.objectid = ref_key->offset;
4318 key.type = BTRFS_DIR_INDEX_KEY;
4320 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4323 /* Find related dir_item */
4324 key.objectid = ref_key->offset;
4325 key.type = BTRFS_DIR_ITEM_KEY;
4326 key.offset = btrfs_name_hash(namebuf, len);
4327 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4330 len = sizeof(*ref) + name_len;
4331 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4340 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4341 * DIR_ITEM/DIR_INDEX.
4343 * @root: the root of the fs/file tree
4344 * @ref_key: the key of the INODE_EXTREF
4345 * @refs: the count of INODE_EXTREF
4346 * @mode: the st_mode of INODE_ITEM
4348 * Return 0 if no error occurred.
4350 static int check_inode_extref(struct btrfs_root *root,
4351 struct btrfs_key *ref_key,
4352 struct extent_buffer *node, int slot, u64 *refs,
4355 struct btrfs_key key;
4356 struct btrfs_inode_extref *extref;
4357 char namebuf[BTRFS_NAME_LEN] = {0};
4367 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4368 total = btrfs_item_size_nr(node, slot);
4371 /* update inode ref count */
4373 name_len = btrfs_inode_extref_name_len(node, extref);
4374 index = btrfs_inode_extref_index(node, extref);
4375 parent = btrfs_inode_extref_parent(node, extref);
4376 if (name_len <= BTRFS_NAME_LEN) {
4379 len = BTRFS_NAME_LEN;
4380 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4381 root->objectid, ref_key->objectid, ref_key->offset);
4383 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4385 /* Check root dir ref name */
4386 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4387 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4388 root->objectid, ref_key->objectid, ref_key->offset,
4390 err |= ROOT_DIR_ERROR;
4393 /* find related dir_index */
4394 key.objectid = parent;
4395 key.type = BTRFS_DIR_INDEX_KEY;
4397 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4400 /* find related dir_item */
4401 key.objectid = parent;
4402 key.type = BTRFS_DIR_ITEM_KEY;
4403 key.offset = btrfs_name_hash(namebuf, len);
4404 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4407 len = sizeof(*extref) + name_len;
4408 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4418 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4419 * DIR_ITEM/DIR_INDEX match.
4421 * @root: the root of the fs/file tree
4422 * @key: the key of the INODE_REF/INODE_EXTREF
4423 * @name: the name in the INODE_REF/INODE_EXTREF
4424 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4425 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4427 * @ext_ref: the EXTENDED_IREF feature
4429 * Return 0 if no error occurred.
4430 * Return >0 for error bitmap
4432 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4433 char *name, int namelen, u64 index,
4434 unsigned int ext_ref)
4436 struct btrfs_path path;
4437 struct btrfs_inode_ref *ref;
4438 struct btrfs_inode_extref *extref;
4439 struct extent_buffer *node;
4440 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4451 btrfs_init_path(&path);
4452 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4454 ret = INODE_REF_MISSING;
4458 node = path.nodes[0];
4459 slot = path.slots[0];
4461 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4462 total = btrfs_item_size_nr(node, slot);
4464 /* Iterate all entry of INODE_REF */
4465 while (cur < total) {
4466 ret = INODE_REF_MISSING;
4468 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4469 ref_index = btrfs_inode_ref_index(node, ref);
4470 if (index != (u64)-1 && index != ref_index)
4473 if (ref_namelen <= BTRFS_NAME_LEN) {
4476 len = BTRFS_NAME_LEN;
4477 warning("root %llu INODE %s[%llu %llu] name too long",
4479 key->type == BTRFS_INODE_REF_KEY ?
4481 key->objectid, key->offset);
4483 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4486 if (len != namelen || strncmp(ref_namebuf, name, len))
4492 len = sizeof(*ref) + ref_namelen;
4493 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4498 /* Skip if not support EXTENDED_IREF feature */
4502 btrfs_release_path(&path);
4503 btrfs_init_path(&path);
4505 dir_id = key->offset;
4506 key->type = BTRFS_INODE_EXTREF_KEY;
4507 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4509 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4511 ret = INODE_REF_MISSING;
4515 node = path.nodes[0];
4516 slot = path.slots[0];
4518 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4520 total = btrfs_item_size_nr(node, slot);
4522 /* Iterate all entry of INODE_EXTREF */
4523 while (cur < total) {
4524 ret = INODE_REF_MISSING;
4526 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4527 ref_index = btrfs_inode_extref_index(node, extref);
4528 parent = btrfs_inode_extref_parent(node, extref);
4529 if (index != (u64)-1 && index != ref_index)
4532 if (parent != dir_id)
4535 if (ref_namelen <= BTRFS_NAME_LEN) {
4538 len = BTRFS_NAME_LEN;
4539 warning("root %llu INODE %s[%llu %llu] name too long",
4541 key->type == BTRFS_INODE_REF_KEY ?
4543 key->objectid, key->offset);
4545 read_extent_buffer(node, ref_namebuf,
4546 (unsigned long)(extref + 1), len);
4548 if (len != namelen || strncmp(ref_namebuf, name, len))
4555 len = sizeof(*extref) + ref_namelen;
4556 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4561 btrfs_release_path(&path);
4566 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4567 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4569 * @root: the root of the fs/file tree
4570 * @key: the key of the INODE_REF/INODE_EXTREF
4571 * @size: the st_size of the INODE_ITEM
4572 * @ext_ref: the EXTENDED_IREF feature
4574 * Return 0 if no error occurred.
4576 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4577 struct extent_buffer *node, int slot, u64 *size,
4578 unsigned int ext_ref)
4580 struct btrfs_dir_item *di;
4581 struct btrfs_inode_item *ii;
4582 struct btrfs_path path;
4583 struct btrfs_key location;
4584 char namebuf[BTRFS_NAME_LEN] = {0};
4597 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4598 * ignore index check.
4600 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4602 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4603 total = btrfs_item_size_nr(node, slot);
4605 while (cur < total) {
4606 data_len = btrfs_dir_data_len(node, di);
4608 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4609 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4610 "DIR_ITEM" : "DIR_INDEX",
4611 key->objectid, key->offset, data_len);
4613 name_len = btrfs_dir_name_len(node, di);
4614 if (name_len <= BTRFS_NAME_LEN) {
4617 len = BTRFS_NAME_LEN;
4618 warning("root %llu %s[%llu %llu] name too long",
4620 key->type == BTRFS_DIR_ITEM_KEY ?
4621 "DIR_ITEM" : "DIR_INDEX",
4622 key->objectid, key->offset);
4624 (*size) += name_len;
4626 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4627 filetype = btrfs_dir_type(node, di);
4629 btrfs_init_path(&path);
4630 btrfs_dir_item_key_to_cpu(node, di, &location);
4632 /* Ignore related ROOT_ITEM check */
4633 if (location.type == BTRFS_ROOT_ITEM_KEY)
4636 /* Check relative INODE_ITEM(existence/filetype) */
4637 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4639 err |= INODE_ITEM_MISSING;
4640 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4641 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4642 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4643 key->offset, location.objectid, name_len,
4648 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4649 struct btrfs_inode_item);
4650 mode = btrfs_inode_mode(path.nodes[0], ii);
4652 if (imode_to_type(mode) != filetype) {
4653 err |= INODE_ITEM_MISMATCH;
4654 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4655 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4656 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4657 key->offset, name_len, namebuf, filetype);
4660 /* Check relative INODE_REF/INODE_EXTREF */
4661 location.type = BTRFS_INODE_REF_KEY;
4662 location.offset = key->objectid;
4663 ret = find_inode_ref(root, &location, namebuf, len,
4666 if (ret & INODE_REF_MISSING)
4667 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4668 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4669 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4670 key->offset, name_len, namebuf, filetype);
4673 btrfs_release_path(&path);
4674 len = sizeof(*di) + name_len + data_len;
4675 di = (struct btrfs_dir_item *)((char *)di + len);
4678 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4679 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4680 root->objectid, key->objectid, key->offset);
4689 * Check file extent datasum/hole, update the size of the file extents,
4690 * check and update the last offset of the file extent.
4692 * @root: the root of fs/file tree.
4693 * @fkey: the key of the file extent.
4694 * @nodatasum: INODE_NODATASUM feature.
4695 * @size: the sum of all EXTENT_DATA items size for this inode.
4696 * @end: the offset of the last extent.
4698 * Return 0 if no error occurred.
4700 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4701 struct extent_buffer *node, int slot,
4702 unsigned int nodatasum, u64 *size, u64 *end)
4704 struct btrfs_file_extent_item *fi;
4707 u64 extent_num_bytes;
4709 u64 csum_found; /* In byte size, sectorsize aligned */
4710 u64 search_start; /* Logical range start we search for csum */
4711 u64 search_len; /* Logical range len we search for csum */
4712 unsigned int extent_type;
4713 unsigned int is_hole;
4718 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4720 /* Check inline extent */
4721 extent_type = btrfs_file_extent_type(node, fi);
4722 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4723 struct btrfs_item *e = btrfs_item_nr(slot);
4724 u32 item_inline_len;
4726 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4727 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4728 compressed = btrfs_file_extent_compression(node, fi);
4729 if (extent_num_bytes == 0) {
4731 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4732 root->objectid, fkey->objectid, fkey->offset);
4733 err |= FILE_EXTENT_ERROR;
4735 if (!compressed && extent_num_bytes != item_inline_len) {
4737 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4738 root->objectid, fkey->objectid, fkey->offset,
4739 extent_num_bytes, item_inline_len);
4740 err |= FILE_EXTENT_ERROR;
4742 *size += extent_num_bytes;
4746 /* Check extent type */
4747 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4748 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4749 err |= FILE_EXTENT_ERROR;
4750 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4751 root->objectid, fkey->objectid, fkey->offset);
4755 /* Check REG_EXTENT/PREALLOC_EXTENT */
4756 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4757 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4758 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4759 extent_offset = btrfs_file_extent_offset(node, fi);
4760 compressed = btrfs_file_extent_compression(node, fi);
4761 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4764 * Check EXTENT_DATA csum
4766 * For plain (uncompressed) extent, we should only check the range
4767 * we're referring to, as it's possible that part of prealloc extent
4768 * has been written, and has csum:
4770 * |<--- Original large preallocated extent A ---->|
4771 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4774 * For compressed extent, we should check the whole range.
4777 search_start = disk_bytenr + extent_offset;
4778 search_len = extent_num_bytes;
4780 search_start = disk_bytenr;
4781 search_len = disk_num_bytes;
4783 ret = count_csum_range(root, search_start, search_len, &csum_found);
4784 if (csum_found > 0 && nodatasum) {
4785 err |= ODD_CSUM_ITEM;
4786 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4787 root->objectid, fkey->objectid, fkey->offset);
4788 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4789 !is_hole && (ret < 0 || csum_found < search_len)) {
4790 err |= CSUM_ITEM_MISSING;
4791 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4792 root->objectid, fkey->objectid, fkey->offset,
4793 csum_found, search_len);
4794 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4795 err |= ODD_CSUM_ITEM;
4796 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4797 root->objectid, fkey->objectid, fkey->offset, csum_found);
4800 /* Check EXTENT_DATA hole */
4801 if (no_holes && is_hole) {
4802 err |= FILE_EXTENT_ERROR;
4803 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4804 root->objectid, fkey->objectid, fkey->offset);
4805 } else if (!no_holes && *end != fkey->offset) {
4806 err |= FILE_EXTENT_ERROR;
4807 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4808 root->objectid, fkey->objectid, fkey->offset);
4811 *end += extent_num_bytes;
4813 *size += extent_num_bytes;
4819 * Check INODE_ITEM and related ITEMs (the same inode number)
4820 * 1. check link count
4821 * 2. check inode ref/extref
4822 * 3. check dir item/index
4824 * @ext_ref: the EXTENDED_IREF feature
4826 * Return 0 if no error occurred.
4827 * Return >0 for error or hit the traversal is done(by error bitmap)
4829 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4830 unsigned int ext_ref)
4832 struct extent_buffer *node;
4833 struct btrfs_inode_item *ii;
4834 struct btrfs_key key;
4843 u64 extent_size = 0;
4845 unsigned int nodatasum;
4850 node = path->nodes[0];
4851 slot = path->slots[0];
4853 btrfs_item_key_to_cpu(node, &key, slot);
4854 inode_id = key.objectid;
4856 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4857 ret = btrfs_next_item(root, path);
4863 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4864 isize = btrfs_inode_size(node, ii);
4865 nbytes = btrfs_inode_nbytes(node, ii);
4866 mode = btrfs_inode_mode(node, ii);
4867 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4868 nlink = btrfs_inode_nlink(node, ii);
4869 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4872 ret = btrfs_next_item(root, path);
4874 /* out will fill 'err' rusing current statistics */
4876 } else if (ret > 0) {
4881 node = path->nodes[0];
4882 slot = path->slots[0];
4883 btrfs_item_key_to_cpu(node, &key, slot);
4884 if (key.objectid != inode_id)
4888 case BTRFS_INODE_REF_KEY:
4889 ret = check_inode_ref(root, &key, node, slot, &refs,
4893 case BTRFS_INODE_EXTREF_KEY:
4894 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4895 warning("root %llu EXTREF[%llu %llu] isn't supported",
4896 root->objectid, key.objectid,
4898 ret = check_inode_extref(root, &key, node, slot, &refs,
4902 case BTRFS_DIR_ITEM_KEY:
4903 case BTRFS_DIR_INDEX_KEY:
4905 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4906 root->objectid, inode_id,
4907 imode_to_type(mode), key.objectid,
4910 ret = check_dir_item(root, &key, node, slot, &size,
4914 case BTRFS_EXTENT_DATA_KEY:
4916 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4917 root->objectid, inode_id, key.objectid,
4920 ret = check_file_extent(root, &key, node, slot,
4921 nodatasum, &extent_size,
4925 case BTRFS_XATTR_ITEM_KEY:
4928 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4929 key.objectid, key.type, key.offset);
4934 /* verify INODE_ITEM nlink/isize/nbytes */
4937 err |= LINK_COUNT_ERROR;
4938 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4939 root->objectid, inode_id, nlink);
4943 * Just a warning, as dir inode nbytes is just an
4944 * instructive value.
4946 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4947 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4948 root->objectid, inode_id, root->nodesize);
4951 if (isize != size) {
4953 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4954 root->objectid, inode_id, isize, size);
4957 if (nlink != refs) {
4958 err |= LINK_COUNT_ERROR;
4959 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4960 root->objectid, inode_id, nlink, refs);
4961 } else if (!nlink) {
4965 if (!nbytes && !no_holes && extent_end < isize) {
4966 err |= NBYTES_ERROR;
4967 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4968 root->objectid, inode_id, isize);
4971 if (nbytes != extent_size) {
4972 err |= NBYTES_ERROR;
4973 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4974 root->objectid, inode_id, nbytes, extent_size);
4981 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
4983 struct btrfs_path path;
4984 struct btrfs_key key;
4988 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4989 key.type = BTRFS_INODE_ITEM_KEY;
4992 /* For root being dropped, we don't need to check first inode */
4993 if (btrfs_root_refs(&root->root_item) == 0 &&
4994 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
4998 btrfs_init_path(&path);
5000 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5005 err |= INODE_ITEM_MISSING;
5006 error("first inode item of root %llu is missing",
5010 err |= check_inode_item(root, &path, ext_ref);
5015 btrfs_release_path(&path);
5020 * Iterate all item on the tree and call check_inode_item() to check.
5022 * @root: the root of the tree to be checked.
5023 * @ext_ref: the EXTENDED_IREF feature
5025 * Return 0 if no error found.
5026 * Return <0 for error.
5028 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5030 struct btrfs_path path;
5031 struct node_refs nrefs;
5032 struct btrfs_root_item *root_item = &root->root_item;
5038 * We need to manually check the first inode item(256)
5039 * As the following traversal function will only start from
5040 * the first inode item in the leaf, if inode item(256) is missing
5041 * we will just skip it forever.
5043 ret = check_fs_first_inode(root, ext_ref);
5047 memset(&nrefs, 0, sizeof(nrefs));
5048 level = btrfs_header_level(root->node);
5049 btrfs_init_path(&path);
5051 if (btrfs_root_refs(root_item) > 0 ||
5052 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5053 path.nodes[level] = root->node;
5054 path.slots[level] = 0;
5055 extent_buffer_get(root->node);
5057 struct btrfs_key key;
5059 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5060 level = root_item->drop_level;
5061 path.lowest_level = level;
5062 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5069 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5072 /* if ret is negative, walk shall stop */
5078 ret = walk_up_tree_v2(root, &path, &level);
5080 /* Normal exit, reset ret to err */
5087 btrfs_release_path(&path);
5092 * Find the relative ref for root_ref and root_backref.
5094 * @root: the root of the root tree.
5095 * @ref_key: the key of the root ref.
5097 * Return 0 if no error occurred.
5099 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5100 struct extent_buffer *node, int slot)
5102 struct btrfs_path path;
5103 struct btrfs_key key;
5104 struct btrfs_root_ref *ref;
5105 struct btrfs_root_ref *backref;
5106 char ref_name[BTRFS_NAME_LEN] = {0};
5107 char backref_name[BTRFS_NAME_LEN] = {0};
5113 u32 backref_namelen;
5118 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5119 ref_dirid = btrfs_root_ref_dirid(node, ref);
5120 ref_seq = btrfs_root_ref_sequence(node, ref);
5121 ref_namelen = btrfs_root_ref_name_len(node, ref);
5123 if (ref_namelen <= BTRFS_NAME_LEN) {
5126 len = BTRFS_NAME_LEN;
5127 warning("%s[%llu %llu] ref_name too long",
5128 ref_key->type == BTRFS_ROOT_REF_KEY ?
5129 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5132 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5134 /* Find relative root_ref */
5135 key.objectid = ref_key->offset;
5136 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5137 key.offset = ref_key->objectid;
5139 btrfs_init_path(&path);
5140 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5142 err |= ROOT_REF_MISSING;
5143 error("%s[%llu %llu] couldn't find relative ref",
5144 ref_key->type == BTRFS_ROOT_REF_KEY ?
5145 "ROOT_REF" : "ROOT_BACKREF",
5146 ref_key->objectid, ref_key->offset);
5150 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5151 struct btrfs_root_ref);
5152 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5153 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5154 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5156 if (backref_namelen <= BTRFS_NAME_LEN) {
5157 len = backref_namelen;
5159 len = BTRFS_NAME_LEN;
5160 warning("%s[%llu %llu] ref_name too long",
5161 key.type == BTRFS_ROOT_REF_KEY ?
5162 "ROOT_REF" : "ROOT_BACKREF",
5163 key.objectid, key.offset);
5165 read_extent_buffer(path.nodes[0], backref_name,
5166 (unsigned long)(backref + 1), len);
5168 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5169 ref_namelen != backref_namelen ||
5170 strncmp(ref_name, backref_name, len)) {
5171 err |= ROOT_REF_MISMATCH;
5172 error("%s[%llu %llu] mismatch relative ref",
5173 ref_key->type == BTRFS_ROOT_REF_KEY ?
5174 "ROOT_REF" : "ROOT_BACKREF",
5175 ref_key->objectid, ref_key->offset);
5178 btrfs_release_path(&path);
5183 * Check all fs/file tree in low_memory mode.
5185 * 1. for fs tree root item, call check_fs_root_v2()
5186 * 2. for fs tree root ref/backref, call check_root_ref()
5188 * Return 0 if no error occurred.
5190 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5192 struct btrfs_root *tree_root = fs_info->tree_root;
5193 struct btrfs_root *cur_root = NULL;
5194 struct btrfs_path path;
5195 struct btrfs_key key;
5196 struct extent_buffer *node;
5197 unsigned int ext_ref;
5202 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5204 btrfs_init_path(&path);
5205 key.objectid = BTRFS_FS_TREE_OBJECTID;
5207 key.type = BTRFS_ROOT_ITEM_KEY;
5209 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5213 } else if (ret > 0) {
5219 node = path.nodes[0];
5220 slot = path.slots[0];
5221 btrfs_item_key_to_cpu(node, &key, slot);
5222 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5224 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5225 fs_root_objectid(key.objectid)) {
5226 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5227 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5230 key.offset = (u64)-1;
5231 cur_root = btrfs_read_fs_root(fs_info, &key);
5234 if (IS_ERR(cur_root)) {
5235 error("Fail to read fs/subvol tree: %lld",
5241 ret = check_fs_root_v2(cur_root, ext_ref);
5244 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5245 btrfs_free_fs_root(cur_root);
5246 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5247 key.type == BTRFS_ROOT_BACKREF_KEY) {
5248 ret = check_root_ref(tree_root, &key, node, slot);
5252 ret = btrfs_next_item(tree_root, &path);
5262 btrfs_release_path(&path);
5266 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5268 struct list_head *cur = rec->backrefs.next;
5269 struct extent_backref *back;
5270 struct tree_backref *tback;
5271 struct data_backref *dback;
5275 while(cur != &rec->backrefs) {
5276 back = to_extent_backref(cur);
5278 if (!back->found_extent_tree) {
5282 if (back->is_data) {
5283 dback = to_data_backref(back);
5284 fprintf(stderr, "Backref %llu %s %llu"
5285 " owner %llu offset %llu num_refs %lu"
5286 " not found in extent tree\n",
5287 (unsigned long long)rec->start,
5288 back->full_backref ?
5290 back->full_backref ?
5291 (unsigned long long)dback->parent:
5292 (unsigned long long)dback->root,
5293 (unsigned long long)dback->owner,
5294 (unsigned long long)dback->offset,
5295 (unsigned long)dback->num_refs);
5297 tback = to_tree_backref(back);
5298 fprintf(stderr, "Backref %llu parent %llu"
5299 " root %llu not found in extent tree\n",
5300 (unsigned long long)rec->start,
5301 (unsigned long long)tback->parent,
5302 (unsigned long long)tback->root);
5305 if (!back->is_data && !back->found_ref) {
5309 tback = to_tree_backref(back);
5310 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5311 (unsigned long long)rec->start,
5312 back->full_backref ? "parent" : "root",
5313 back->full_backref ?
5314 (unsigned long long)tback->parent :
5315 (unsigned long long)tback->root, back);
5317 if (back->is_data) {
5318 dback = to_data_backref(back);
5319 if (dback->found_ref != dback->num_refs) {
5323 fprintf(stderr, "Incorrect local backref count"
5324 " on %llu %s %llu owner %llu"
5325 " offset %llu found %u wanted %u back %p\n",
5326 (unsigned long long)rec->start,
5327 back->full_backref ?
5329 back->full_backref ?
5330 (unsigned long long)dback->parent:
5331 (unsigned long long)dback->root,
5332 (unsigned long long)dback->owner,
5333 (unsigned long long)dback->offset,
5334 dback->found_ref, dback->num_refs, back);
5336 if (dback->disk_bytenr != rec->start) {
5340 fprintf(stderr, "Backref disk bytenr does not"
5341 " match extent record, bytenr=%llu, "
5342 "ref bytenr=%llu\n",
5343 (unsigned long long)rec->start,
5344 (unsigned long long)dback->disk_bytenr);
5347 if (dback->bytes != rec->nr) {
5351 fprintf(stderr, "Backref bytes do not match "
5352 "extent backref, bytenr=%llu, ref "
5353 "bytes=%llu, backref bytes=%llu\n",
5354 (unsigned long long)rec->start,
5355 (unsigned long long)rec->nr,
5356 (unsigned long long)dback->bytes);
5359 if (!back->is_data) {
5362 dback = to_data_backref(back);
5363 found += dback->found_ref;
5366 if (found != rec->refs) {
5370 fprintf(stderr, "Incorrect global backref count "
5371 "on %llu found %llu wanted %llu\n",
5372 (unsigned long long)rec->start,
5373 (unsigned long long)found,
5374 (unsigned long long)rec->refs);
5380 static int free_all_extent_backrefs(struct extent_record *rec)
5382 struct extent_backref *back;
5383 struct list_head *cur;
5384 while (!list_empty(&rec->backrefs)) {
5385 cur = rec->backrefs.next;
5386 back = to_extent_backref(cur);
5393 static void free_extent_record_cache(struct cache_tree *extent_cache)
5395 struct cache_extent *cache;
5396 struct extent_record *rec;
5399 cache = first_cache_extent(extent_cache);
5402 rec = container_of(cache, struct extent_record, cache);
5403 remove_cache_extent(extent_cache, cache);
5404 free_all_extent_backrefs(rec);
5409 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5410 struct extent_record *rec)
5412 if (rec->content_checked && rec->owner_ref_checked &&
5413 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5414 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5415 !rec->bad_full_backref && !rec->crossing_stripes &&
5416 !rec->wrong_chunk_type) {
5417 remove_cache_extent(extent_cache, &rec->cache);
5418 free_all_extent_backrefs(rec);
5419 list_del_init(&rec->list);
5425 static int check_owner_ref(struct btrfs_root *root,
5426 struct extent_record *rec,
5427 struct extent_buffer *buf)
5429 struct extent_backref *node;
5430 struct tree_backref *back;
5431 struct btrfs_root *ref_root;
5432 struct btrfs_key key;
5433 struct btrfs_path path;
5434 struct extent_buffer *parent;
5439 list_for_each_entry(node, &rec->backrefs, list) {
5442 if (!node->found_ref)
5444 if (node->full_backref)
5446 back = to_tree_backref(node);
5447 if (btrfs_header_owner(buf) == back->root)
5450 BUG_ON(rec->is_root);
5452 /* try to find the block by search corresponding fs tree */
5453 key.objectid = btrfs_header_owner(buf);
5454 key.type = BTRFS_ROOT_ITEM_KEY;
5455 key.offset = (u64)-1;
5457 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5458 if (IS_ERR(ref_root))
5461 level = btrfs_header_level(buf);
5463 btrfs_item_key_to_cpu(buf, &key, 0);
5465 btrfs_node_key_to_cpu(buf, &key, 0);
5467 btrfs_init_path(&path);
5468 path.lowest_level = level + 1;
5469 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5473 parent = path.nodes[level + 1];
5474 if (parent && buf->start == btrfs_node_blockptr(parent,
5475 path.slots[level + 1]))
5478 btrfs_release_path(&path);
5479 return found ? 0 : 1;
5482 static int is_extent_tree_record(struct extent_record *rec)
5484 struct list_head *cur = rec->backrefs.next;
5485 struct extent_backref *node;
5486 struct tree_backref *back;
5489 while(cur != &rec->backrefs) {
5490 node = to_extent_backref(cur);
5494 back = to_tree_backref(node);
5495 if (node->full_backref)
5497 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5504 static int record_bad_block_io(struct btrfs_fs_info *info,
5505 struct cache_tree *extent_cache,
5508 struct extent_record *rec;
5509 struct cache_extent *cache;
5510 struct btrfs_key key;
5512 cache = lookup_cache_extent(extent_cache, start, len);
5516 rec = container_of(cache, struct extent_record, cache);
5517 if (!is_extent_tree_record(rec))
5520 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5521 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5524 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5525 struct extent_buffer *buf, int slot)
5527 if (btrfs_header_level(buf)) {
5528 struct btrfs_key_ptr ptr1, ptr2;
5530 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5531 sizeof(struct btrfs_key_ptr));
5532 read_extent_buffer(buf, &ptr2,
5533 btrfs_node_key_ptr_offset(slot + 1),
5534 sizeof(struct btrfs_key_ptr));
5535 write_extent_buffer(buf, &ptr1,
5536 btrfs_node_key_ptr_offset(slot + 1),
5537 sizeof(struct btrfs_key_ptr));
5538 write_extent_buffer(buf, &ptr2,
5539 btrfs_node_key_ptr_offset(slot),
5540 sizeof(struct btrfs_key_ptr));
5542 struct btrfs_disk_key key;
5543 btrfs_node_key(buf, &key, 0);
5544 btrfs_fixup_low_keys(root, path, &key,
5545 btrfs_header_level(buf) + 1);
5548 struct btrfs_item *item1, *item2;
5549 struct btrfs_key k1, k2;
5550 char *item1_data, *item2_data;
5551 u32 item1_offset, item2_offset, item1_size, item2_size;
5553 item1 = btrfs_item_nr(slot);
5554 item2 = btrfs_item_nr(slot + 1);
5555 btrfs_item_key_to_cpu(buf, &k1, slot);
5556 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5557 item1_offset = btrfs_item_offset(buf, item1);
5558 item2_offset = btrfs_item_offset(buf, item2);
5559 item1_size = btrfs_item_size(buf, item1);
5560 item2_size = btrfs_item_size(buf, item2);
5562 item1_data = malloc(item1_size);
5565 item2_data = malloc(item2_size);
5571 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5572 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5574 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5575 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5579 btrfs_set_item_offset(buf, item1, item2_offset);
5580 btrfs_set_item_offset(buf, item2, item1_offset);
5581 btrfs_set_item_size(buf, item1, item2_size);
5582 btrfs_set_item_size(buf, item2, item1_size);
5584 path->slots[0] = slot;
5585 btrfs_set_item_key_unsafe(root, path, &k2);
5586 path->slots[0] = slot + 1;
5587 btrfs_set_item_key_unsafe(root, path, &k1);
5592 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5594 struct extent_buffer *buf;
5595 struct btrfs_key k1, k2;
5597 int level = path->lowest_level;
5600 buf = path->nodes[level];
5601 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5603 btrfs_node_key_to_cpu(buf, &k1, i);
5604 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5606 btrfs_item_key_to_cpu(buf, &k1, i);
5607 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5609 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5611 ret = swap_values(root, path, buf, i);
5614 btrfs_mark_buffer_dirty(buf);
5620 static int delete_bogus_item(struct btrfs_root *root,
5621 struct btrfs_path *path,
5622 struct extent_buffer *buf, int slot)
5624 struct btrfs_key key;
5625 int nritems = btrfs_header_nritems(buf);
5627 btrfs_item_key_to_cpu(buf, &key, slot);
5629 /* These are all the keys we can deal with missing. */
5630 if (key.type != BTRFS_DIR_INDEX_KEY &&
5631 key.type != BTRFS_EXTENT_ITEM_KEY &&
5632 key.type != BTRFS_METADATA_ITEM_KEY &&
5633 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5634 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5637 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5638 (unsigned long long)key.objectid, key.type,
5639 (unsigned long long)key.offset, slot, buf->start);
5640 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5641 btrfs_item_nr_offset(slot + 1),
5642 sizeof(struct btrfs_item) *
5643 (nritems - slot - 1));
5644 btrfs_set_header_nritems(buf, nritems - 1);
5646 struct btrfs_disk_key disk_key;
5648 btrfs_item_key(buf, &disk_key, 0);
5649 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5651 btrfs_mark_buffer_dirty(buf);
5655 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5657 struct extent_buffer *buf;
5661 /* We should only get this for leaves */
5662 BUG_ON(path->lowest_level);
5663 buf = path->nodes[0];
5665 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5666 unsigned int shift = 0, offset;
5668 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5669 BTRFS_LEAF_DATA_SIZE(root)) {
5670 if (btrfs_item_end_nr(buf, i) >
5671 BTRFS_LEAF_DATA_SIZE(root)) {
5672 ret = delete_bogus_item(root, path, buf, i);
5675 fprintf(stderr, "item is off the end of the "
5676 "leaf, can't fix\n");
5680 shift = BTRFS_LEAF_DATA_SIZE(root) -
5681 btrfs_item_end_nr(buf, i);
5682 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5683 btrfs_item_offset_nr(buf, i - 1)) {
5684 if (btrfs_item_end_nr(buf, i) >
5685 btrfs_item_offset_nr(buf, i - 1)) {
5686 ret = delete_bogus_item(root, path, buf, i);
5689 fprintf(stderr, "items overlap, can't fix\n");
5693 shift = btrfs_item_offset_nr(buf, i - 1) -
5694 btrfs_item_end_nr(buf, i);
5699 printf("Shifting item nr %d by %u bytes in block %llu\n",
5700 i, shift, (unsigned long long)buf->start);
5701 offset = btrfs_item_offset_nr(buf, i);
5702 memmove_extent_buffer(buf,
5703 btrfs_leaf_data(buf) + offset + shift,
5704 btrfs_leaf_data(buf) + offset,
5705 btrfs_item_size_nr(buf, i));
5706 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5708 btrfs_mark_buffer_dirty(buf);
5712 * We may have moved things, in which case we want to exit so we don't
5713 * write those changes out. Once we have proper abort functionality in
5714 * progs this can be changed to something nicer.
5721 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5722 * then just return -EIO.
5724 static int try_to_fix_bad_block(struct btrfs_root *root,
5725 struct extent_buffer *buf,
5726 enum btrfs_tree_block_status status)
5728 struct btrfs_trans_handle *trans;
5729 struct ulist *roots;
5730 struct ulist_node *node;
5731 struct btrfs_root *search_root;
5732 struct btrfs_path path;
5733 struct ulist_iterator iter;
5734 struct btrfs_key root_key, key;
5737 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5738 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5741 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5745 btrfs_init_path(&path);
5746 ULIST_ITER_INIT(&iter);
5747 while ((node = ulist_next(roots, &iter))) {
5748 root_key.objectid = node->val;
5749 root_key.type = BTRFS_ROOT_ITEM_KEY;
5750 root_key.offset = (u64)-1;
5752 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5759 trans = btrfs_start_transaction(search_root, 0);
5760 if (IS_ERR(trans)) {
5761 ret = PTR_ERR(trans);
5765 path.lowest_level = btrfs_header_level(buf);
5766 path.skip_check_block = 1;
5767 if (path.lowest_level)
5768 btrfs_node_key_to_cpu(buf, &key, 0);
5770 btrfs_item_key_to_cpu(buf, &key, 0);
5771 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5774 btrfs_commit_transaction(trans, search_root);
5777 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5778 ret = fix_key_order(search_root, &path);
5779 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5780 ret = fix_item_offset(search_root, &path);
5782 btrfs_commit_transaction(trans, search_root);
5785 btrfs_release_path(&path);
5786 btrfs_commit_transaction(trans, search_root);
5789 btrfs_release_path(&path);
5793 static int check_block(struct btrfs_root *root,
5794 struct cache_tree *extent_cache,
5795 struct extent_buffer *buf, u64 flags)
5797 struct extent_record *rec;
5798 struct cache_extent *cache;
5799 struct btrfs_key key;
5800 enum btrfs_tree_block_status status;
5804 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5807 rec = container_of(cache, struct extent_record, cache);
5808 rec->generation = btrfs_header_generation(buf);
5810 level = btrfs_header_level(buf);
5811 if (btrfs_header_nritems(buf) > 0) {
5814 btrfs_item_key_to_cpu(buf, &key, 0);
5816 btrfs_node_key_to_cpu(buf, &key, 0);
5818 rec->info_objectid = key.objectid;
5820 rec->info_level = level;
5822 if (btrfs_is_leaf(buf))
5823 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5825 status = btrfs_check_node(root, &rec->parent_key, buf);
5827 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5829 status = try_to_fix_bad_block(root, buf, status);
5830 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5832 fprintf(stderr, "bad block %llu\n",
5833 (unsigned long long)buf->start);
5836 * Signal to callers we need to start the scan over
5837 * again since we'll have cowed blocks.
5842 rec->content_checked = 1;
5843 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5844 rec->owner_ref_checked = 1;
5846 ret = check_owner_ref(root, rec, buf);
5848 rec->owner_ref_checked = 1;
5852 maybe_free_extent_rec(extent_cache, rec);
5856 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5857 u64 parent, u64 root)
5859 struct list_head *cur = rec->backrefs.next;
5860 struct extent_backref *node;
5861 struct tree_backref *back;
5863 while(cur != &rec->backrefs) {
5864 node = to_extent_backref(cur);
5868 back = to_tree_backref(node);
5870 if (!node->full_backref)
5872 if (parent == back->parent)
5875 if (node->full_backref)
5877 if (back->root == root)
5884 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5885 u64 parent, u64 root)
5887 struct tree_backref *ref = malloc(sizeof(*ref));
5891 memset(&ref->node, 0, sizeof(ref->node));
5893 ref->parent = parent;
5894 ref->node.full_backref = 1;
5897 ref->node.full_backref = 0;
5899 list_add_tail(&ref->node.list, &rec->backrefs);
5904 static struct data_backref *find_data_backref(struct extent_record *rec,
5905 u64 parent, u64 root,
5906 u64 owner, u64 offset,
5908 u64 disk_bytenr, u64 bytes)
5910 struct list_head *cur = rec->backrefs.next;
5911 struct extent_backref *node;
5912 struct data_backref *back;
5914 while(cur != &rec->backrefs) {
5915 node = to_extent_backref(cur);
5919 back = to_data_backref(node);
5921 if (!node->full_backref)
5923 if (parent == back->parent)
5926 if (node->full_backref)
5928 if (back->root == root && back->owner == owner &&
5929 back->offset == offset) {
5930 if (found_ref && node->found_ref &&
5931 (back->bytes != bytes ||
5932 back->disk_bytenr != disk_bytenr))
5941 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5942 u64 parent, u64 root,
5943 u64 owner, u64 offset,
5946 struct data_backref *ref = malloc(sizeof(*ref));
5950 memset(&ref->node, 0, sizeof(ref->node));
5951 ref->node.is_data = 1;
5954 ref->parent = parent;
5957 ref->node.full_backref = 1;
5961 ref->offset = offset;
5962 ref->node.full_backref = 0;
5964 ref->bytes = max_size;
5967 list_add_tail(&ref->node.list, &rec->backrefs);
5968 if (max_size > rec->max_size)
5969 rec->max_size = max_size;
5973 /* Check if the type of extent matches with its chunk */
5974 static void check_extent_type(struct extent_record *rec)
5976 struct btrfs_block_group_cache *bg_cache;
5978 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5982 /* data extent, check chunk directly*/
5983 if (!rec->metadata) {
5984 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5985 rec->wrong_chunk_type = 1;
5989 /* metadata extent, check the obvious case first */
5990 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5991 BTRFS_BLOCK_GROUP_METADATA))) {
5992 rec->wrong_chunk_type = 1;
5997 * Check SYSTEM extent, as it's also marked as metadata, we can only
5998 * make sure it's a SYSTEM extent by its backref
6000 if (!list_empty(&rec->backrefs)) {
6001 struct extent_backref *node;
6002 struct tree_backref *tback;
6005 node = to_extent_backref(rec->backrefs.next);
6006 if (node->is_data) {
6007 /* tree block shouldn't have data backref */
6008 rec->wrong_chunk_type = 1;
6011 tback = container_of(node, struct tree_backref, node);
6013 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6014 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6016 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6017 if (!(bg_cache->flags & bg_type))
6018 rec->wrong_chunk_type = 1;
6023 * Allocate a new extent record, fill default values from @tmpl and insert int
6024 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6025 * the cache, otherwise it fails.
6027 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6028 struct extent_record *tmpl)
6030 struct extent_record *rec;
6033 BUG_ON(tmpl->max_size == 0);
6034 rec = malloc(sizeof(*rec));
6037 rec->start = tmpl->start;
6038 rec->max_size = tmpl->max_size;
6039 rec->nr = max(tmpl->nr, tmpl->max_size);
6040 rec->found_rec = tmpl->found_rec;
6041 rec->content_checked = tmpl->content_checked;
6042 rec->owner_ref_checked = tmpl->owner_ref_checked;
6043 rec->num_duplicates = 0;
6044 rec->metadata = tmpl->metadata;
6045 rec->flag_block_full_backref = FLAG_UNSET;
6046 rec->bad_full_backref = 0;
6047 rec->crossing_stripes = 0;
6048 rec->wrong_chunk_type = 0;
6049 rec->is_root = tmpl->is_root;
6050 rec->refs = tmpl->refs;
6051 rec->extent_item_refs = tmpl->extent_item_refs;
6052 rec->parent_generation = tmpl->parent_generation;
6053 INIT_LIST_HEAD(&rec->backrefs);
6054 INIT_LIST_HEAD(&rec->dups);
6055 INIT_LIST_HEAD(&rec->list);
6056 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6057 rec->cache.start = tmpl->start;
6058 rec->cache.size = tmpl->nr;
6059 ret = insert_cache_extent(extent_cache, &rec->cache);
6064 bytes_used += rec->nr;
6067 rec->crossing_stripes = check_crossing_stripes(global_info,
6068 rec->start, global_info->tree_root->nodesize);
6069 check_extent_type(rec);
6074 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6076 * - refs - if found, increase refs
6077 * - is_root - if found, set
6078 * - content_checked - if found, set
6079 * - owner_ref_checked - if found, set
6081 * If not found, create a new one, initialize and insert.
6083 static int add_extent_rec(struct cache_tree *extent_cache,
6084 struct extent_record *tmpl)
6086 struct extent_record *rec;
6087 struct cache_extent *cache;
6091 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6093 rec = container_of(cache, struct extent_record, cache);
6097 rec->nr = max(tmpl->nr, tmpl->max_size);
6100 * We need to make sure to reset nr to whatever the extent
6101 * record says was the real size, this way we can compare it to
6104 if (tmpl->found_rec) {
6105 if (tmpl->start != rec->start || rec->found_rec) {
6106 struct extent_record *tmp;
6109 if (list_empty(&rec->list))
6110 list_add_tail(&rec->list,
6111 &duplicate_extents);
6114 * We have to do this song and dance in case we
6115 * find an extent record that falls inside of
6116 * our current extent record but does not have
6117 * the same objectid.
6119 tmp = malloc(sizeof(*tmp));
6122 tmp->start = tmpl->start;
6123 tmp->max_size = tmpl->max_size;
6126 tmp->metadata = tmpl->metadata;
6127 tmp->extent_item_refs = tmpl->extent_item_refs;
6128 INIT_LIST_HEAD(&tmp->list);
6129 list_add_tail(&tmp->list, &rec->dups);
6130 rec->num_duplicates++;
6137 if (tmpl->extent_item_refs && !dup) {
6138 if (rec->extent_item_refs) {
6139 fprintf(stderr, "block %llu rec "
6140 "extent_item_refs %llu, passed %llu\n",
6141 (unsigned long long)tmpl->start,
6142 (unsigned long long)
6143 rec->extent_item_refs,
6144 (unsigned long long)tmpl->extent_item_refs);
6146 rec->extent_item_refs = tmpl->extent_item_refs;
6150 if (tmpl->content_checked)
6151 rec->content_checked = 1;
6152 if (tmpl->owner_ref_checked)
6153 rec->owner_ref_checked = 1;
6154 memcpy(&rec->parent_key, &tmpl->parent_key,
6155 sizeof(tmpl->parent_key));
6156 if (tmpl->parent_generation)
6157 rec->parent_generation = tmpl->parent_generation;
6158 if (rec->max_size < tmpl->max_size)
6159 rec->max_size = tmpl->max_size;
6162 * A metadata extent can't cross stripe_len boundary, otherwise
6163 * kernel scrub won't be able to handle it.
6164 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6168 rec->crossing_stripes = check_crossing_stripes(
6169 global_info, rec->start,
6170 global_info->tree_root->nodesize);
6171 check_extent_type(rec);
6172 maybe_free_extent_rec(extent_cache, rec);
6176 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6181 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6182 u64 parent, u64 root, int found_ref)
6184 struct extent_record *rec;
6185 struct tree_backref *back;
6186 struct cache_extent *cache;
6189 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6191 struct extent_record tmpl;
6193 memset(&tmpl, 0, sizeof(tmpl));
6194 tmpl.start = bytenr;
6199 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6203 /* really a bug in cache_extent implement now */
6204 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6209 rec = container_of(cache, struct extent_record, cache);
6210 if (rec->start != bytenr) {
6212 * Several cause, from unaligned bytenr to over lapping extents
6217 back = find_tree_backref(rec, parent, root);
6219 back = alloc_tree_backref(rec, parent, root);
6225 if (back->node.found_ref) {
6226 fprintf(stderr, "Extent back ref already exists "
6227 "for %llu parent %llu root %llu \n",
6228 (unsigned long long)bytenr,
6229 (unsigned long long)parent,
6230 (unsigned long long)root);
6232 back->node.found_ref = 1;
6234 if (back->node.found_extent_tree) {
6235 fprintf(stderr, "Extent back ref already exists "
6236 "for %llu parent %llu root %llu \n",
6237 (unsigned long long)bytenr,
6238 (unsigned long long)parent,
6239 (unsigned long long)root);
6241 back->node.found_extent_tree = 1;
6243 check_extent_type(rec);
6244 maybe_free_extent_rec(extent_cache, rec);
6248 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6249 u64 parent, u64 root, u64 owner, u64 offset,
6250 u32 num_refs, int found_ref, u64 max_size)
6252 struct extent_record *rec;
6253 struct data_backref *back;
6254 struct cache_extent *cache;
6257 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6259 struct extent_record tmpl;
6261 memset(&tmpl, 0, sizeof(tmpl));
6262 tmpl.start = bytenr;
6264 tmpl.max_size = max_size;
6266 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6270 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6275 rec = container_of(cache, struct extent_record, cache);
6276 if (rec->max_size < max_size)
6277 rec->max_size = max_size;
6280 * If found_ref is set then max_size is the real size and must match the
6281 * existing refs. So if we have already found a ref then we need to
6282 * make sure that this ref matches the existing one, otherwise we need
6283 * to add a new backref so we can notice that the backrefs don't match
6284 * and we need to figure out who is telling the truth. This is to
6285 * account for that awful fsync bug I introduced where we'd end up with
6286 * a btrfs_file_extent_item that would have its length include multiple
6287 * prealloc extents or point inside of a prealloc extent.
6289 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6292 back = alloc_data_backref(rec, parent, root, owner, offset,
6298 BUG_ON(num_refs != 1);
6299 if (back->node.found_ref)
6300 BUG_ON(back->bytes != max_size);
6301 back->node.found_ref = 1;
6302 back->found_ref += 1;
6303 back->bytes = max_size;
6304 back->disk_bytenr = bytenr;
6306 rec->content_checked = 1;
6307 rec->owner_ref_checked = 1;
6309 if (back->node.found_extent_tree) {
6310 fprintf(stderr, "Extent back ref already exists "
6311 "for %llu parent %llu root %llu "
6312 "owner %llu offset %llu num_refs %lu\n",
6313 (unsigned long long)bytenr,
6314 (unsigned long long)parent,
6315 (unsigned long long)root,
6316 (unsigned long long)owner,
6317 (unsigned long long)offset,
6318 (unsigned long)num_refs);
6320 back->num_refs = num_refs;
6321 back->node.found_extent_tree = 1;
6323 maybe_free_extent_rec(extent_cache, rec);
6327 static int add_pending(struct cache_tree *pending,
6328 struct cache_tree *seen, u64 bytenr, u32 size)
6331 ret = add_cache_extent(seen, bytenr, size);
6334 add_cache_extent(pending, bytenr, size);
6338 static int pick_next_pending(struct cache_tree *pending,
6339 struct cache_tree *reada,
6340 struct cache_tree *nodes,
6341 u64 last, struct block_info *bits, int bits_nr,
6344 unsigned long node_start = last;
6345 struct cache_extent *cache;
6348 cache = search_cache_extent(reada, 0);
6350 bits[0].start = cache->start;
6351 bits[0].size = cache->size;
6356 if (node_start > 32768)
6357 node_start -= 32768;
6359 cache = search_cache_extent(nodes, node_start);
6361 cache = search_cache_extent(nodes, 0);
6364 cache = search_cache_extent(pending, 0);
6369 bits[ret].start = cache->start;
6370 bits[ret].size = cache->size;
6371 cache = next_cache_extent(cache);
6373 } while (cache && ret < bits_nr);
6379 bits[ret].start = cache->start;
6380 bits[ret].size = cache->size;
6381 cache = next_cache_extent(cache);
6383 } while (cache && ret < bits_nr);
6385 if (bits_nr - ret > 8) {
6386 u64 lookup = bits[0].start + bits[0].size;
6387 struct cache_extent *next;
6388 next = search_cache_extent(pending, lookup);
6390 if (next->start - lookup > 32768)
6392 bits[ret].start = next->start;
6393 bits[ret].size = next->size;
6394 lookup = next->start + next->size;
6398 next = next_cache_extent(next);
6406 static void free_chunk_record(struct cache_extent *cache)
6408 struct chunk_record *rec;
6410 rec = container_of(cache, struct chunk_record, cache);
6411 list_del_init(&rec->list);
6412 list_del_init(&rec->dextents);
6416 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6418 cache_tree_free_extents(chunk_cache, free_chunk_record);
6421 static void free_device_record(struct rb_node *node)
6423 struct device_record *rec;
6425 rec = container_of(node, struct device_record, node);
6429 FREE_RB_BASED_TREE(device_cache, free_device_record);
6431 int insert_block_group_record(struct block_group_tree *tree,
6432 struct block_group_record *bg_rec)
6436 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6440 list_add_tail(&bg_rec->list, &tree->block_groups);
6444 static void free_block_group_record(struct cache_extent *cache)
6446 struct block_group_record *rec;
6448 rec = container_of(cache, struct block_group_record, cache);
6449 list_del_init(&rec->list);
6453 void free_block_group_tree(struct block_group_tree *tree)
6455 cache_tree_free_extents(&tree->tree, free_block_group_record);
6458 int insert_device_extent_record(struct device_extent_tree *tree,
6459 struct device_extent_record *de_rec)
6464 * Device extent is a bit different from the other extents, because
6465 * the extents which belong to the different devices may have the
6466 * same start and size, so we need use the special extent cache
6467 * search/insert functions.
6469 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6473 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6474 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6478 static void free_device_extent_record(struct cache_extent *cache)
6480 struct device_extent_record *rec;
6482 rec = container_of(cache, struct device_extent_record, cache);
6483 if (!list_empty(&rec->chunk_list))
6484 list_del_init(&rec->chunk_list);
6485 if (!list_empty(&rec->device_list))
6486 list_del_init(&rec->device_list);
6490 void free_device_extent_tree(struct device_extent_tree *tree)
6492 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6495 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6496 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6497 struct extent_buffer *leaf, int slot)
6499 struct btrfs_extent_ref_v0 *ref0;
6500 struct btrfs_key key;
6503 btrfs_item_key_to_cpu(leaf, &key, slot);
6504 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6505 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6506 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6509 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6510 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6516 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6517 struct btrfs_key *key,
6520 struct btrfs_chunk *ptr;
6521 struct chunk_record *rec;
6524 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6525 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6527 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6529 fprintf(stderr, "memory allocation failed\n");
6533 INIT_LIST_HEAD(&rec->list);
6534 INIT_LIST_HEAD(&rec->dextents);
6537 rec->cache.start = key->offset;
6538 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6540 rec->generation = btrfs_header_generation(leaf);
6542 rec->objectid = key->objectid;
6543 rec->type = key->type;
6544 rec->offset = key->offset;
6546 rec->length = rec->cache.size;
6547 rec->owner = btrfs_chunk_owner(leaf, ptr);
6548 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6549 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6550 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6551 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6552 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6553 rec->num_stripes = num_stripes;
6554 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6556 for (i = 0; i < rec->num_stripes; ++i) {
6557 rec->stripes[i].devid =
6558 btrfs_stripe_devid_nr(leaf, ptr, i);
6559 rec->stripes[i].offset =
6560 btrfs_stripe_offset_nr(leaf, ptr, i);
6561 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6562 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6569 static int process_chunk_item(struct cache_tree *chunk_cache,
6570 struct btrfs_key *key, struct extent_buffer *eb,
6573 struct chunk_record *rec;
6574 struct btrfs_chunk *chunk;
6577 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6579 * Do extra check for this chunk item,
6581 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6582 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6583 * and owner<->key_type check.
6585 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6588 error("chunk(%llu, %llu) is not valid, ignore it",
6589 key->offset, btrfs_chunk_length(eb, chunk));
6592 rec = btrfs_new_chunk_record(eb, key, slot);
6593 ret = insert_cache_extent(chunk_cache, &rec->cache);
6595 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6596 rec->offset, rec->length);
6603 static int process_device_item(struct rb_root *dev_cache,
6604 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6606 struct btrfs_dev_item *ptr;
6607 struct device_record *rec;
6610 ptr = btrfs_item_ptr(eb,
6611 slot, struct btrfs_dev_item);
6613 rec = malloc(sizeof(*rec));
6615 fprintf(stderr, "memory allocation failed\n");
6619 rec->devid = key->offset;
6620 rec->generation = btrfs_header_generation(eb);
6622 rec->objectid = key->objectid;
6623 rec->type = key->type;
6624 rec->offset = key->offset;
6626 rec->devid = btrfs_device_id(eb, ptr);
6627 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6628 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6630 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6632 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6639 struct block_group_record *
6640 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6643 struct btrfs_block_group_item *ptr;
6644 struct block_group_record *rec;
6646 rec = calloc(1, sizeof(*rec));
6648 fprintf(stderr, "memory allocation failed\n");
6652 rec->cache.start = key->objectid;
6653 rec->cache.size = key->offset;
6655 rec->generation = btrfs_header_generation(leaf);
6657 rec->objectid = key->objectid;
6658 rec->type = key->type;
6659 rec->offset = key->offset;
6661 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6662 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6664 INIT_LIST_HEAD(&rec->list);
6669 static int process_block_group_item(struct block_group_tree *block_group_cache,
6670 struct btrfs_key *key,
6671 struct extent_buffer *eb, int slot)
6673 struct block_group_record *rec;
6676 rec = btrfs_new_block_group_record(eb, key, slot);
6677 ret = insert_block_group_record(block_group_cache, rec);
6679 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6680 rec->objectid, rec->offset);
6687 struct device_extent_record *
6688 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6689 struct btrfs_key *key, int slot)
6691 struct device_extent_record *rec;
6692 struct btrfs_dev_extent *ptr;
6694 rec = calloc(1, sizeof(*rec));
6696 fprintf(stderr, "memory allocation failed\n");
6700 rec->cache.objectid = key->objectid;
6701 rec->cache.start = key->offset;
6703 rec->generation = btrfs_header_generation(leaf);
6705 rec->objectid = key->objectid;
6706 rec->type = key->type;
6707 rec->offset = key->offset;
6709 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6710 rec->chunk_objecteid =
6711 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6713 btrfs_dev_extent_chunk_offset(leaf, ptr);
6714 rec->length = btrfs_dev_extent_length(leaf, ptr);
6715 rec->cache.size = rec->length;
6717 INIT_LIST_HEAD(&rec->chunk_list);
6718 INIT_LIST_HEAD(&rec->device_list);
6724 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6725 struct btrfs_key *key, struct extent_buffer *eb,
6728 struct device_extent_record *rec;
6731 rec = btrfs_new_device_extent_record(eb, key, slot);
6732 ret = insert_device_extent_record(dev_extent_cache, rec);
6735 "Device extent[%llu, %llu, %llu] existed.\n",
6736 rec->objectid, rec->offset, rec->length);
6743 static int process_extent_item(struct btrfs_root *root,
6744 struct cache_tree *extent_cache,
6745 struct extent_buffer *eb, int slot)
6747 struct btrfs_extent_item *ei;
6748 struct btrfs_extent_inline_ref *iref;
6749 struct btrfs_extent_data_ref *dref;
6750 struct btrfs_shared_data_ref *sref;
6751 struct btrfs_key key;
6752 struct extent_record tmpl;
6757 u32 item_size = btrfs_item_size_nr(eb, slot);
6763 btrfs_item_key_to_cpu(eb, &key, slot);
6765 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6767 num_bytes = root->nodesize;
6769 num_bytes = key.offset;
6772 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6773 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6774 key.objectid, root->sectorsize);
6777 if (item_size < sizeof(*ei)) {
6778 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6779 struct btrfs_extent_item_v0 *ei0;
6780 BUG_ON(item_size != sizeof(*ei0));
6781 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6782 refs = btrfs_extent_refs_v0(eb, ei0);
6786 memset(&tmpl, 0, sizeof(tmpl));
6787 tmpl.start = key.objectid;
6788 tmpl.nr = num_bytes;
6789 tmpl.extent_item_refs = refs;
6790 tmpl.metadata = metadata;
6792 tmpl.max_size = num_bytes;
6794 return add_extent_rec(extent_cache, &tmpl);
6797 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6798 refs = btrfs_extent_refs(eb, ei);
6799 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6803 if (metadata && num_bytes != root->nodesize) {
6804 error("ignore invalid metadata extent, length %llu does not equal to %u",
6805 num_bytes, root->nodesize);
6808 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6809 error("ignore invalid data extent, length %llu is not aligned to %u",
6810 num_bytes, root->sectorsize);
6814 memset(&tmpl, 0, sizeof(tmpl));
6815 tmpl.start = key.objectid;
6816 tmpl.nr = num_bytes;
6817 tmpl.extent_item_refs = refs;
6818 tmpl.metadata = metadata;
6820 tmpl.max_size = num_bytes;
6821 add_extent_rec(extent_cache, &tmpl);
6823 ptr = (unsigned long)(ei + 1);
6824 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6825 key.type == BTRFS_EXTENT_ITEM_KEY)
6826 ptr += sizeof(struct btrfs_tree_block_info);
6828 end = (unsigned long)ei + item_size;
6830 iref = (struct btrfs_extent_inline_ref *)ptr;
6831 type = btrfs_extent_inline_ref_type(eb, iref);
6832 offset = btrfs_extent_inline_ref_offset(eb, iref);
6834 case BTRFS_TREE_BLOCK_REF_KEY:
6835 ret = add_tree_backref(extent_cache, key.objectid,
6839 "add_tree_backref failed (extent items tree block): %s",
6842 case BTRFS_SHARED_BLOCK_REF_KEY:
6843 ret = add_tree_backref(extent_cache, key.objectid,
6847 "add_tree_backref failed (extent items shared block): %s",
6850 case BTRFS_EXTENT_DATA_REF_KEY:
6851 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6852 add_data_backref(extent_cache, key.objectid, 0,
6853 btrfs_extent_data_ref_root(eb, dref),
6854 btrfs_extent_data_ref_objectid(eb,
6856 btrfs_extent_data_ref_offset(eb, dref),
6857 btrfs_extent_data_ref_count(eb, dref),
6860 case BTRFS_SHARED_DATA_REF_KEY:
6861 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6862 add_data_backref(extent_cache, key.objectid, offset,
6864 btrfs_shared_data_ref_count(eb, sref),
6868 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6869 key.objectid, key.type, num_bytes);
6872 ptr += btrfs_extent_inline_ref_size(type);
6879 static int check_cache_range(struct btrfs_root *root,
6880 struct btrfs_block_group_cache *cache,
6881 u64 offset, u64 bytes)
6883 struct btrfs_free_space *entry;
6889 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6890 bytenr = btrfs_sb_offset(i);
6891 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6892 cache->key.objectid, bytenr, 0,
6893 &logical, &nr, &stripe_len);
6898 if (logical[nr] + stripe_len <= offset)
6900 if (offset + bytes <= logical[nr])
6902 if (logical[nr] == offset) {
6903 if (stripe_len >= bytes) {
6907 bytes -= stripe_len;
6908 offset += stripe_len;
6909 } else if (logical[nr] < offset) {
6910 if (logical[nr] + stripe_len >=
6915 bytes = (offset + bytes) -
6916 (logical[nr] + stripe_len);
6917 offset = logical[nr] + stripe_len;
6920 * Could be tricky, the super may land in the
6921 * middle of the area we're checking. First
6922 * check the easiest case, it's at the end.
6924 if (logical[nr] + stripe_len >=
6926 bytes = logical[nr] - offset;
6930 /* Check the left side */
6931 ret = check_cache_range(root, cache,
6933 logical[nr] - offset);
6939 /* Now we continue with the right side */
6940 bytes = (offset + bytes) -
6941 (logical[nr] + stripe_len);
6942 offset = logical[nr] + stripe_len;
6949 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6951 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6952 offset, offset+bytes);
6956 if (entry->offset != offset) {
6957 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6962 if (entry->bytes != bytes) {
6963 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6964 bytes, entry->bytes, offset);
6968 unlink_free_space(cache->free_space_ctl, entry);
6973 static int verify_space_cache(struct btrfs_root *root,
6974 struct btrfs_block_group_cache *cache)
6976 struct btrfs_path path;
6977 struct extent_buffer *leaf;
6978 struct btrfs_key key;
6982 root = root->fs_info->extent_root;
6984 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6986 btrfs_init_path(&path);
6987 key.objectid = last;
6989 key.type = BTRFS_EXTENT_ITEM_KEY;
6990 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6995 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6996 ret = btrfs_next_leaf(root, &path);
7004 leaf = path.nodes[0];
7005 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7006 if (key.objectid >= cache->key.offset + cache->key.objectid)
7008 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7009 key.type != BTRFS_METADATA_ITEM_KEY) {
7014 if (last == key.objectid) {
7015 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7016 last = key.objectid + key.offset;
7018 last = key.objectid + root->nodesize;
7023 ret = check_cache_range(root, cache, last,
7024 key.objectid - last);
7027 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7028 last = key.objectid + key.offset;
7030 last = key.objectid + root->nodesize;
7034 if (last < cache->key.objectid + cache->key.offset)
7035 ret = check_cache_range(root, cache, last,
7036 cache->key.objectid +
7037 cache->key.offset - last);
7040 btrfs_release_path(&path);
7043 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7044 fprintf(stderr, "There are still entries left in the space "
7052 static int check_space_cache(struct btrfs_root *root)
7054 struct btrfs_block_group_cache *cache;
7055 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7059 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7060 btrfs_super_generation(root->fs_info->super_copy) !=
7061 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7062 printf("cache and super generation don't match, space cache "
7063 "will be invalidated\n");
7067 if (ctx.progress_enabled) {
7068 ctx.tp = TASK_FREE_SPACE;
7069 task_start(ctx.info);
7073 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7077 start = cache->key.objectid + cache->key.offset;
7078 if (!cache->free_space_ctl) {
7079 if (btrfs_init_free_space_ctl(cache,
7080 root->sectorsize)) {
7085 btrfs_remove_free_space_cache(cache);
7088 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7089 ret = exclude_super_stripes(root, cache);
7091 fprintf(stderr, "could not exclude super stripes: %s\n",
7096 ret = load_free_space_tree(root->fs_info, cache);
7097 free_excluded_extents(root, cache);
7099 fprintf(stderr, "could not load free space tree: %s\n",
7106 ret = load_free_space_cache(root->fs_info, cache);
7111 ret = verify_space_cache(root, cache);
7113 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7114 cache->key.objectid);
7119 task_stop(ctx.info);
7121 return error ? -EINVAL : 0;
7124 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7125 u64 num_bytes, unsigned long leaf_offset,
7126 struct extent_buffer *eb) {
7129 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7131 unsigned long csum_offset;
7135 u64 data_checked = 0;
7141 if (num_bytes % root->sectorsize)
7144 data = malloc(num_bytes);
7148 while (offset < num_bytes) {
7151 read_len = num_bytes - offset;
7152 /* read as much space once a time */
7153 ret = read_extent_data(root, data + offset,
7154 bytenr + offset, &read_len, mirror);
7158 /* verify every 4k data's checksum */
7159 while (data_checked < read_len) {
7161 tmp = offset + data_checked;
7163 csum = btrfs_csum_data((char *)data + tmp,
7164 csum, root->sectorsize);
7165 btrfs_csum_final(csum, (u8 *)&csum);
7167 csum_offset = leaf_offset +
7168 tmp / root->sectorsize * csum_size;
7169 read_extent_buffer(eb, (char *)&csum_expected,
7170 csum_offset, csum_size);
7171 /* try another mirror */
7172 if (csum != csum_expected) {
7173 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7174 mirror, bytenr + tmp,
7175 csum, csum_expected);
7176 num_copies = btrfs_num_copies(
7177 &root->fs_info->mapping_tree,
7179 if (mirror < num_copies - 1) {
7184 data_checked += root->sectorsize;
7193 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7196 struct btrfs_path path;
7197 struct extent_buffer *leaf;
7198 struct btrfs_key key;
7201 btrfs_init_path(&path);
7202 key.objectid = bytenr;
7203 key.type = BTRFS_EXTENT_ITEM_KEY;
7204 key.offset = (u64)-1;
7207 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7210 fprintf(stderr, "Error looking up extent record %d\n", ret);
7211 btrfs_release_path(&path);
7214 if (path.slots[0] > 0) {
7217 ret = btrfs_prev_leaf(root, &path);
7220 } else if (ret > 0) {
7227 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7230 * Block group items come before extent items if they have the same
7231 * bytenr, so walk back one more just in case. Dear future traveller,
7232 * first congrats on mastering time travel. Now if it's not too much
7233 * trouble could you go back to 2006 and tell Chris to make the
7234 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7235 * EXTENT_ITEM_KEY please?
7237 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7238 if (path.slots[0] > 0) {
7241 ret = btrfs_prev_leaf(root, &path);
7244 } else if (ret > 0) {
7249 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7253 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7254 ret = btrfs_next_leaf(root, &path);
7256 fprintf(stderr, "Error going to next leaf "
7258 btrfs_release_path(&path);
7264 leaf = path.nodes[0];
7265 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7266 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7270 if (key.objectid + key.offset < bytenr) {
7274 if (key.objectid > bytenr + num_bytes)
7277 if (key.objectid == bytenr) {
7278 if (key.offset >= num_bytes) {
7282 num_bytes -= key.offset;
7283 bytenr += key.offset;
7284 } else if (key.objectid < bytenr) {
7285 if (key.objectid + key.offset >= bytenr + num_bytes) {
7289 num_bytes = (bytenr + num_bytes) -
7290 (key.objectid + key.offset);
7291 bytenr = key.objectid + key.offset;
7293 if (key.objectid + key.offset < bytenr + num_bytes) {
7294 u64 new_start = key.objectid + key.offset;
7295 u64 new_bytes = bytenr + num_bytes - new_start;
7298 * Weird case, the extent is in the middle of
7299 * our range, we'll have to search one side
7300 * and then the other. Not sure if this happens
7301 * in real life, but no harm in coding it up
7302 * anyway just in case.
7304 btrfs_release_path(&path);
7305 ret = check_extent_exists(root, new_start,
7308 fprintf(stderr, "Right section didn't "
7312 num_bytes = key.objectid - bytenr;
7315 num_bytes = key.objectid - bytenr;
7322 if (num_bytes && !ret) {
7323 fprintf(stderr, "There are no extents for csum range "
7324 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7328 btrfs_release_path(&path);
7332 static int check_csums(struct btrfs_root *root)
7334 struct btrfs_path path;
7335 struct extent_buffer *leaf;
7336 struct btrfs_key key;
7337 u64 offset = 0, num_bytes = 0;
7338 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7342 unsigned long leaf_offset;
7344 root = root->fs_info->csum_root;
7345 if (!extent_buffer_uptodate(root->node)) {
7346 fprintf(stderr, "No valid csum tree found\n");
7350 btrfs_init_path(&path);
7351 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7352 key.type = BTRFS_EXTENT_CSUM_KEY;
7354 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7356 fprintf(stderr, "Error searching csum tree %d\n", ret);
7357 btrfs_release_path(&path);
7361 if (ret > 0 && path.slots[0])
7366 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7367 ret = btrfs_next_leaf(root, &path);
7369 fprintf(stderr, "Error going to next leaf "
7376 leaf = path.nodes[0];
7378 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7379 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7384 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7385 csum_size) * root->sectorsize;
7386 if (!check_data_csum)
7387 goto skip_csum_check;
7388 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7389 ret = check_extent_csums(root, key.offset, data_len,
7395 offset = key.offset;
7396 } else if (key.offset != offset + num_bytes) {
7397 ret = check_extent_exists(root, offset, num_bytes);
7399 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7400 "there is no extent record\n",
7401 offset, offset+num_bytes);
7404 offset = key.offset;
7407 num_bytes += data_len;
7411 btrfs_release_path(&path);
7415 static int is_dropped_key(struct btrfs_key *key,
7416 struct btrfs_key *drop_key) {
7417 if (key->objectid < drop_key->objectid)
7419 else if (key->objectid == drop_key->objectid) {
7420 if (key->type < drop_key->type)
7422 else if (key->type == drop_key->type) {
7423 if (key->offset < drop_key->offset)
7431 * Here are the rules for FULL_BACKREF.
7433 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7434 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7436 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7437 * if it happened after the relocation occurred since we'll have dropped the
7438 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7439 * have no real way to know for sure.
7441 * We process the blocks one root at a time, and we start from the lowest root
7442 * objectid and go to the highest. So we can just lookup the owner backref for
7443 * the record and if we don't find it then we know it doesn't exist and we have
7446 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7447 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7448 * be set or not and then we can check later once we've gathered all the refs.
7450 static int calc_extent_flag(struct cache_tree *extent_cache,
7451 struct extent_buffer *buf,
7452 struct root_item_record *ri,
7455 struct extent_record *rec;
7456 struct cache_extent *cache;
7457 struct tree_backref *tback;
7460 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7461 /* we have added this extent before */
7465 rec = container_of(cache, struct extent_record, cache);
7468 * Except file/reloc tree, we can not have
7471 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7476 if (buf->start == ri->bytenr)
7479 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7482 owner = btrfs_header_owner(buf);
7483 if (owner == ri->objectid)
7486 tback = find_tree_backref(rec, 0, owner);
7491 if (rec->flag_block_full_backref != FLAG_UNSET &&
7492 rec->flag_block_full_backref != 0)
7493 rec->bad_full_backref = 1;
7496 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7497 if (rec->flag_block_full_backref != FLAG_UNSET &&
7498 rec->flag_block_full_backref != 1)
7499 rec->bad_full_backref = 1;
7503 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7505 fprintf(stderr, "Invalid key type(");
7506 print_key_type(stderr, 0, key_type);
7507 fprintf(stderr, ") found in root(");
7508 print_objectid(stderr, rootid, 0);
7509 fprintf(stderr, ")\n");
7513 * Check if the key is valid with its extent buffer.
7515 * This is a early check in case invalid key exists in a extent buffer
7516 * This is not comprehensive yet, but should prevent wrong key/item passed
7519 static int check_type_with_root(u64 rootid, u8 key_type)
7522 /* Only valid in chunk tree */
7523 case BTRFS_DEV_ITEM_KEY:
7524 case BTRFS_CHUNK_ITEM_KEY:
7525 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7528 /* valid in csum and log tree */
7529 case BTRFS_CSUM_TREE_OBJECTID:
7530 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7534 case BTRFS_EXTENT_ITEM_KEY:
7535 case BTRFS_METADATA_ITEM_KEY:
7536 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7537 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7540 case BTRFS_ROOT_ITEM_KEY:
7541 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7544 case BTRFS_DEV_EXTENT_KEY:
7545 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7551 report_mismatch_key_root(key_type, rootid);
7555 static int run_next_block(struct btrfs_root *root,
7556 struct block_info *bits,
7559 struct cache_tree *pending,
7560 struct cache_tree *seen,
7561 struct cache_tree *reada,
7562 struct cache_tree *nodes,
7563 struct cache_tree *extent_cache,
7564 struct cache_tree *chunk_cache,
7565 struct rb_root *dev_cache,
7566 struct block_group_tree *block_group_cache,
7567 struct device_extent_tree *dev_extent_cache,
7568 struct root_item_record *ri)
7570 struct extent_buffer *buf;
7571 struct extent_record *rec = NULL;
7582 struct btrfs_key key;
7583 struct cache_extent *cache;
7586 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7587 bits_nr, &reada_bits);
7592 for(i = 0; i < nritems; i++) {
7593 ret = add_cache_extent(reada, bits[i].start,
7598 /* fixme, get the parent transid */
7599 readahead_tree_block(root, bits[i].start,
7603 *last = bits[0].start;
7604 bytenr = bits[0].start;
7605 size = bits[0].size;
7607 cache = lookup_cache_extent(pending, bytenr, size);
7609 remove_cache_extent(pending, cache);
7612 cache = lookup_cache_extent(reada, bytenr, size);
7614 remove_cache_extent(reada, cache);
7617 cache = lookup_cache_extent(nodes, bytenr, size);
7619 remove_cache_extent(nodes, cache);
7622 cache = lookup_cache_extent(extent_cache, bytenr, size);
7624 rec = container_of(cache, struct extent_record, cache);
7625 gen = rec->parent_generation;
7628 /* fixme, get the real parent transid */
7629 buf = read_tree_block(root, bytenr, size, gen);
7630 if (!extent_buffer_uptodate(buf)) {
7631 record_bad_block_io(root->fs_info,
7632 extent_cache, bytenr, size);
7636 nritems = btrfs_header_nritems(buf);
7639 if (!init_extent_tree) {
7640 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7641 btrfs_header_level(buf), 1, NULL,
7644 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7646 fprintf(stderr, "Couldn't calc extent flags\n");
7647 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7652 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7654 fprintf(stderr, "Couldn't calc extent flags\n");
7655 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7659 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7661 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7662 ri->objectid == btrfs_header_owner(buf)) {
7664 * Ok we got to this block from it's original owner and
7665 * we have FULL_BACKREF set. Relocation can leave
7666 * converted blocks over so this is altogether possible,
7667 * however it's not possible if the generation > the
7668 * last snapshot, so check for this case.
7670 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7671 btrfs_header_generation(buf) > ri->last_snapshot) {
7672 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7673 rec->bad_full_backref = 1;
7678 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7679 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7680 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7681 rec->bad_full_backref = 1;
7685 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7686 rec->flag_block_full_backref = 1;
7690 rec->flag_block_full_backref = 0;
7692 owner = btrfs_header_owner(buf);
7695 ret = check_block(root, extent_cache, buf, flags);
7699 if (btrfs_is_leaf(buf)) {
7700 btree_space_waste += btrfs_leaf_free_space(root, buf);
7701 for (i = 0; i < nritems; i++) {
7702 struct btrfs_file_extent_item *fi;
7703 btrfs_item_key_to_cpu(buf, &key, i);
7705 * Check key type against the leaf owner.
7706 * Could filter quite a lot of early error if
7709 if (check_type_with_root(btrfs_header_owner(buf),
7711 fprintf(stderr, "ignoring invalid key\n");
7714 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7715 process_extent_item(root, extent_cache, buf,
7719 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7720 process_extent_item(root, extent_cache, buf,
7724 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7726 btrfs_item_size_nr(buf, i);
7729 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7730 process_chunk_item(chunk_cache, &key, buf, i);
7733 if (key.type == BTRFS_DEV_ITEM_KEY) {
7734 process_device_item(dev_cache, &key, buf, i);
7737 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7738 process_block_group_item(block_group_cache,
7742 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7743 process_device_extent_item(dev_extent_cache,
7748 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7749 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7750 process_extent_ref_v0(extent_cache, buf, i);
7757 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7758 ret = add_tree_backref(extent_cache,
7759 key.objectid, 0, key.offset, 0);
7762 "add_tree_backref failed (leaf tree block): %s",
7766 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7767 ret = add_tree_backref(extent_cache,
7768 key.objectid, key.offset, 0, 0);
7771 "add_tree_backref failed (leaf shared block): %s",
7775 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7776 struct btrfs_extent_data_ref *ref;
7777 ref = btrfs_item_ptr(buf, i,
7778 struct btrfs_extent_data_ref);
7779 add_data_backref(extent_cache,
7781 btrfs_extent_data_ref_root(buf, ref),
7782 btrfs_extent_data_ref_objectid(buf,
7784 btrfs_extent_data_ref_offset(buf, ref),
7785 btrfs_extent_data_ref_count(buf, ref),
7786 0, root->sectorsize);
7789 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7790 struct btrfs_shared_data_ref *ref;
7791 ref = btrfs_item_ptr(buf, i,
7792 struct btrfs_shared_data_ref);
7793 add_data_backref(extent_cache,
7794 key.objectid, key.offset, 0, 0, 0,
7795 btrfs_shared_data_ref_count(buf, ref),
7796 0, root->sectorsize);
7799 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7800 struct bad_item *bad;
7802 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7806 bad = malloc(sizeof(struct bad_item));
7809 INIT_LIST_HEAD(&bad->list);
7810 memcpy(&bad->key, &key,
7811 sizeof(struct btrfs_key));
7812 bad->root_id = owner;
7813 list_add_tail(&bad->list, &delete_items);
7816 if (key.type != BTRFS_EXTENT_DATA_KEY)
7818 fi = btrfs_item_ptr(buf, i,
7819 struct btrfs_file_extent_item);
7820 if (btrfs_file_extent_type(buf, fi) ==
7821 BTRFS_FILE_EXTENT_INLINE)
7823 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7826 data_bytes_allocated +=
7827 btrfs_file_extent_disk_num_bytes(buf, fi);
7828 if (data_bytes_allocated < root->sectorsize) {
7831 data_bytes_referenced +=
7832 btrfs_file_extent_num_bytes(buf, fi);
7833 add_data_backref(extent_cache,
7834 btrfs_file_extent_disk_bytenr(buf, fi),
7835 parent, owner, key.objectid, key.offset -
7836 btrfs_file_extent_offset(buf, fi), 1, 1,
7837 btrfs_file_extent_disk_num_bytes(buf, fi));
7841 struct btrfs_key first_key;
7843 first_key.objectid = 0;
7846 btrfs_item_key_to_cpu(buf, &first_key, 0);
7847 level = btrfs_header_level(buf);
7848 for (i = 0; i < nritems; i++) {
7849 struct extent_record tmpl;
7851 ptr = btrfs_node_blockptr(buf, i);
7852 size = root->nodesize;
7853 btrfs_node_key_to_cpu(buf, &key, i);
7855 if ((level == ri->drop_level)
7856 && is_dropped_key(&key, &ri->drop_key)) {
7861 memset(&tmpl, 0, sizeof(tmpl));
7862 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7863 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7868 tmpl.max_size = size;
7869 ret = add_extent_rec(extent_cache, &tmpl);
7873 ret = add_tree_backref(extent_cache, ptr, parent,
7877 "add_tree_backref failed (non-leaf block): %s",
7883 add_pending(nodes, seen, ptr, size);
7885 add_pending(pending, seen, ptr, size);
7888 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7889 nritems) * sizeof(struct btrfs_key_ptr);
7891 total_btree_bytes += buf->len;
7892 if (fs_root_objectid(btrfs_header_owner(buf)))
7893 total_fs_tree_bytes += buf->len;
7894 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7895 total_extent_tree_bytes += buf->len;
7896 if (!found_old_backref &&
7897 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7898 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7899 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7900 found_old_backref = 1;
7902 free_extent_buffer(buf);
7906 static int add_root_to_pending(struct extent_buffer *buf,
7907 struct cache_tree *extent_cache,
7908 struct cache_tree *pending,
7909 struct cache_tree *seen,
7910 struct cache_tree *nodes,
7913 struct extent_record tmpl;
7916 if (btrfs_header_level(buf) > 0)
7917 add_pending(nodes, seen, buf->start, buf->len);
7919 add_pending(pending, seen, buf->start, buf->len);
7921 memset(&tmpl, 0, sizeof(tmpl));
7922 tmpl.start = buf->start;
7927 tmpl.max_size = buf->len;
7928 add_extent_rec(extent_cache, &tmpl);
7930 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7931 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7932 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7935 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7940 /* as we fix the tree, we might be deleting blocks that
7941 * we're tracking for repair. This hook makes sure we
7942 * remove any backrefs for blocks as we are fixing them.
7944 static int free_extent_hook(struct btrfs_trans_handle *trans,
7945 struct btrfs_root *root,
7946 u64 bytenr, u64 num_bytes, u64 parent,
7947 u64 root_objectid, u64 owner, u64 offset,
7950 struct extent_record *rec;
7951 struct cache_extent *cache;
7953 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7955 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7956 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7960 rec = container_of(cache, struct extent_record, cache);
7962 struct data_backref *back;
7963 back = find_data_backref(rec, parent, root_objectid, owner,
7964 offset, 1, bytenr, num_bytes);
7967 if (back->node.found_ref) {
7968 back->found_ref -= refs_to_drop;
7970 rec->refs -= refs_to_drop;
7972 if (back->node.found_extent_tree) {
7973 back->num_refs -= refs_to_drop;
7974 if (rec->extent_item_refs)
7975 rec->extent_item_refs -= refs_to_drop;
7977 if (back->found_ref == 0)
7978 back->node.found_ref = 0;
7979 if (back->num_refs == 0)
7980 back->node.found_extent_tree = 0;
7982 if (!back->node.found_extent_tree && back->node.found_ref) {
7983 list_del(&back->node.list);
7987 struct tree_backref *back;
7988 back = find_tree_backref(rec, parent, root_objectid);
7991 if (back->node.found_ref) {
7994 back->node.found_ref = 0;
7996 if (back->node.found_extent_tree) {
7997 if (rec->extent_item_refs)
7998 rec->extent_item_refs--;
7999 back->node.found_extent_tree = 0;
8001 if (!back->node.found_extent_tree && back->node.found_ref) {
8002 list_del(&back->node.list);
8006 maybe_free_extent_rec(extent_cache, rec);
8011 static int delete_extent_records(struct btrfs_trans_handle *trans,
8012 struct btrfs_root *root,
8013 struct btrfs_path *path,
8016 struct btrfs_key key;
8017 struct btrfs_key found_key;
8018 struct extent_buffer *leaf;
8023 key.objectid = bytenr;
8025 key.offset = (u64)-1;
8028 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8035 if (path->slots[0] == 0)
8041 leaf = path->nodes[0];
8042 slot = path->slots[0];
8044 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8045 if (found_key.objectid != bytenr)
8048 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8049 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8050 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8051 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8052 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8053 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8054 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8055 btrfs_release_path(path);
8056 if (found_key.type == 0) {
8057 if (found_key.offset == 0)
8059 key.offset = found_key.offset - 1;
8060 key.type = found_key.type;
8062 key.type = found_key.type - 1;
8063 key.offset = (u64)-1;
8067 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8068 found_key.objectid, found_key.type, found_key.offset);
8070 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8073 btrfs_release_path(path);
8075 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8076 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8077 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8078 found_key.offset : root->nodesize;
8080 ret = btrfs_update_block_group(trans, root, bytenr,
8087 btrfs_release_path(path);
8092 * for a single backref, this will allocate a new extent
8093 * and add the backref to it.
8095 static int record_extent(struct btrfs_trans_handle *trans,
8096 struct btrfs_fs_info *info,
8097 struct btrfs_path *path,
8098 struct extent_record *rec,
8099 struct extent_backref *back,
8100 int allocated, u64 flags)
8103 struct btrfs_root *extent_root = info->extent_root;
8104 struct extent_buffer *leaf;
8105 struct btrfs_key ins_key;
8106 struct btrfs_extent_item *ei;
8107 struct data_backref *dback;
8108 struct btrfs_tree_block_info *bi;
8111 rec->max_size = max_t(u64, rec->max_size,
8112 info->extent_root->nodesize);
8115 u32 item_size = sizeof(*ei);
8118 item_size += sizeof(*bi);
8120 ins_key.objectid = rec->start;
8121 ins_key.offset = rec->max_size;
8122 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8124 ret = btrfs_insert_empty_item(trans, extent_root, path,
8125 &ins_key, item_size);
8129 leaf = path->nodes[0];
8130 ei = btrfs_item_ptr(leaf, path->slots[0],
8131 struct btrfs_extent_item);
8133 btrfs_set_extent_refs(leaf, ei, 0);
8134 btrfs_set_extent_generation(leaf, ei, rec->generation);
8136 if (back->is_data) {
8137 btrfs_set_extent_flags(leaf, ei,
8138 BTRFS_EXTENT_FLAG_DATA);
8140 struct btrfs_disk_key copy_key;;
8142 bi = (struct btrfs_tree_block_info *)(ei + 1);
8143 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8146 btrfs_set_disk_key_objectid(©_key,
8147 rec->info_objectid);
8148 btrfs_set_disk_key_type(©_key, 0);
8149 btrfs_set_disk_key_offset(©_key, 0);
8151 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8152 btrfs_set_tree_block_key(leaf, bi, ©_key);
8154 btrfs_set_extent_flags(leaf, ei,
8155 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8158 btrfs_mark_buffer_dirty(leaf);
8159 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8160 rec->max_size, 1, 0);
8163 btrfs_release_path(path);
8166 if (back->is_data) {
8170 dback = to_data_backref(back);
8171 if (back->full_backref)
8172 parent = dback->parent;
8176 for (i = 0; i < dback->found_ref; i++) {
8177 /* if parent != 0, we're doing a full backref
8178 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8179 * just makes the backref allocator create a data
8182 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8183 rec->start, rec->max_size,
8187 BTRFS_FIRST_FREE_OBJECTID :
8193 fprintf(stderr, "adding new data backref"
8194 " on %llu %s %llu owner %llu"
8195 " offset %llu found %d\n",
8196 (unsigned long long)rec->start,
8197 back->full_backref ?
8199 back->full_backref ?
8200 (unsigned long long)parent :
8201 (unsigned long long)dback->root,
8202 (unsigned long long)dback->owner,
8203 (unsigned long long)dback->offset,
8207 struct tree_backref *tback;
8209 tback = to_tree_backref(back);
8210 if (back->full_backref)
8211 parent = tback->parent;
8215 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8216 rec->start, rec->max_size,
8217 parent, tback->root, 0, 0);
8218 fprintf(stderr, "adding new tree backref on "
8219 "start %llu len %llu parent %llu root %llu\n",
8220 rec->start, rec->max_size, parent, tback->root);
8223 btrfs_release_path(path);
8227 static struct extent_entry *find_entry(struct list_head *entries,
8228 u64 bytenr, u64 bytes)
8230 struct extent_entry *entry = NULL;
8232 list_for_each_entry(entry, entries, list) {
8233 if (entry->bytenr == bytenr && entry->bytes == bytes)
8240 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8242 struct extent_entry *entry, *best = NULL, *prev = NULL;
8244 list_for_each_entry(entry, entries, list) {
8246 * If there are as many broken entries as entries then we know
8247 * not to trust this particular entry.
8249 if (entry->broken == entry->count)
8253 * Special case, when there are only two entries and 'best' is
8263 * If our current entry == best then we can't be sure our best
8264 * is really the best, so we need to keep searching.
8266 if (best && best->count == entry->count) {
8272 /* Prev == entry, not good enough, have to keep searching */
8273 if (!prev->broken && prev->count == entry->count)
8277 best = (prev->count > entry->count) ? prev : entry;
8278 else if (best->count < entry->count)
8286 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8287 struct data_backref *dback, struct extent_entry *entry)
8289 struct btrfs_trans_handle *trans;
8290 struct btrfs_root *root;
8291 struct btrfs_file_extent_item *fi;
8292 struct extent_buffer *leaf;
8293 struct btrfs_key key;
8297 key.objectid = dback->root;
8298 key.type = BTRFS_ROOT_ITEM_KEY;
8299 key.offset = (u64)-1;
8300 root = btrfs_read_fs_root(info, &key);
8302 fprintf(stderr, "Couldn't find root for our ref\n");
8307 * The backref points to the original offset of the extent if it was
8308 * split, so we need to search down to the offset we have and then walk
8309 * forward until we find the backref we're looking for.
8311 key.objectid = dback->owner;
8312 key.type = BTRFS_EXTENT_DATA_KEY;
8313 key.offset = dback->offset;
8314 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8316 fprintf(stderr, "Error looking up ref %d\n", ret);
8321 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8322 ret = btrfs_next_leaf(root, path);
8324 fprintf(stderr, "Couldn't find our ref, next\n");
8328 leaf = path->nodes[0];
8329 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8330 if (key.objectid != dback->owner ||
8331 key.type != BTRFS_EXTENT_DATA_KEY) {
8332 fprintf(stderr, "Couldn't find our ref, search\n");
8335 fi = btrfs_item_ptr(leaf, path->slots[0],
8336 struct btrfs_file_extent_item);
8337 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8338 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8340 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8345 btrfs_release_path(path);
8347 trans = btrfs_start_transaction(root, 1);
8349 return PTR_ERR(trans);
8352 * Ok we have the key of the file extent we want to fix, now we can cow
8353 * down to the thing and fix it.
8355 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8357 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8358 key.objectid, key.type, key.offset, ret);
8362 fprintf(stderr, "Well that's odd, we just found this key "
8363 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8368 leaf = path->nodes[0];
8369 fi = btrfs_item_ptr(leaf, path->slots[0],
8370 struct btrfs_file_extent_item);
8372 if (btrfs_file_extent_compression(leaf, fi) &&
8373 dback->disk_bytenr != entry->bytenr) {
8374 fprintf(stderr, "Ref doesn't match the record start and is "
8375 "compressed, please take a btrfs-image of this file "
8376 "system and send it to a btrfs developer so they can "
8377 "complete this functionality for bytenr %Lu\n",
8378 dback->disk_bytenr);
8383 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8384 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8385 } else if (dback->disk_bytenr > entry->bytenr) {
8386 u64 off_diff, offset;
8388 off_diff = dback->disk_bytenr - entry->bytenr;
8389 offset = btrfs_file_extent_offset(leaf, fi);
8390 if (dback->disk_bytenr + offset +
8391 btrfs_file_extent_num_bytes(leaf, fi) >
8392 entry->bytenr + entry->bytes) {
8393 fprintf(stderr, "Ref is past the entry end, please "
8394 "take a btrfs-image of this file system and "
8395 "send it to a btrfs developer, ref %Lu\n",
8396 dback->disk_bytenr);
8401 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8402 btrfs_set_file_extent_offset(leaf, fi, offset);
8403 } else if (dback->disk_bytenr < entry->bytenr) {
8406 offset = btrfs_file_extent_offset(leaf, fi);
8407 if (dback->disk_bytenr + offset < entry->bytenr) {
8408 fprintf(stderr, "Ref is before the entry start, please"
8409 " take a btrfs-image of this file system and "
8410 "send it to a btrfs developer, ref %Lu\n",
8411 dback->disk_bytenr);
8416 offset += dback->disk_bytenr;
8417 offset -= entry->bytenr;
8418 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8419 btrfs_set_file_extent_offset(leaf, fi, offset);
8422 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8425 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8426 * only do this if we aren't using compression, otherwise it's a
8429 if (!btrfs_file_extent_compression(leaf, fi))
8430 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8432 printf("ram bytes may be wrong?\n");
8433 btrfs_mark_buffer_dirty(leaf);
8435 err = btrfs_commit_transaction(trans, root);
8436 btrfs_release_path(path);
8437 return ret ? ret : err;
8440 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8441 struct extent_record *rec)
8443 struct extent_backref *back;
8444 struct data_backref *dback;
8445 struct extent_entry *entry, *best = NULL;
8448 int broken_entries = 0;
8453 * Metadata is easy and the backrefs should always agree on bytenr and
8454 * size, if not we've got bigger issues.
8459 list_for_each_entry(back, &rec->backrefs, list) {
8460 if (back->full_backref || !back->is_data)
8463 dback = to_data_backref(back);
8466 * We only pay attention to backrefs that we found a real
8469 if (dback->found_ref == 0)
8473 * For now we only catch when the bytes don't match, not the
8474 * bytenr. We can easily do this at the same time, but I want
8475 * to have a fs image to test on before we just add repair
8476 * functionality willy-nilly so we know we won't screw up the
8480 entry = find_entry(&entries, dback->disk_bytenr,
8483 entry = malloc(sizeof(struct extent_entry));
8488 memset(entry, 0, sizeof(*entry));
8489 entry->bytenr = dback->disk_bytenr;
8490 entry->bytes = dback->bytes;
8491 list_add_tail(&entry->list, &entries);
8496 * If we only have on entry we may think the entries agree when
8497 * in reality they don't so we have to do some extra checking.
8499 if (dback->disk_bytenr != rec->start ||
8500 dback->bytes != rec->nr || back->broken)
8511 /* Yay all the backrefs agree, carry on good sir */
8512 if (nr_entries <= 1 && !mismatch)
8515 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8516 "%Lu\n", rec->start);
8519 * First we want to see if the backrefs can agree amongst themselves who
8520 * is right, so figure out which one of the entries has the highest
8523 best = find_most_right_entry(&entries);
8526 * Ok so we may have an even split between what the backrefs think, so
8527 * this is where we use the extent ref to see what it thinks.
8530 entry = find_entry(&entries, rec->start, rec->nr);
8531 if (!entry && (!broken_entries || !rec->found_rec)) {
8532 fprintf(stderr, "Backrefs don't agree with each other "
8533 "and extent record doesn't agree with anybody,"
8534 " so we can't fix bytenr %Lu bytes %Lu\n",
8535 rec->start, rec->nr);
8538 } else if (!entry) {
8540 * Ok our backrefs were broken, we'll assume this is the
8541 * correct value and add an entry for this range.
8543 entry = malloc(sizeof(struct extent_entry));
8548 memset(entry, 0, sizeof(*entry));
8549 entry->bytenr = rec->start;
8550 entry->bytes = rec->nr;
8551 list_add_tail(&entry->list, &entries);
8555 best = find_most_right_entry(&entries);
8557 fprintf(stderr, "Backrefs and extent record evenly "
8558 "split on who is right, this is going to "
8559 "require user input to fix bytenr %Lu bytes "
8560 "%Lu\n", rec->start, rec->nr);
8567 * I don't think this can happen currently as we'll abort() if we catch
8568 * this case higher up, but in case somebody removes that we still can't
8569 * deal with it properly here yet, so just bail out of that's the case.
8571 if (best->bytenr != rec->start) {
8572 fprintf(stderr, "Extent start and backref starts don't match, "
8573 "please use btrfs-image on this file system and send "
8574 "it to a btrfs developer so they can make fsck fix "
8575 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8576 rec->start, rec->nr);
8582 * Ok great we all agreed on an extent record, let's go find the real
8583 * references and fix up the ones that don't match.
8585 list_for_each_entry(back, &rec->backrefs, list) {
8586 if (back->full_backref || !back->is_data)
8589 dback = to_data_backref(back);
8592 * Still ignoring backrefs that don't have a real ref attached
8595 if (dback->found_ref == 0)
8598 if (dback->bytes == best->bytes &&
8599 dback->disk_bytenr == best->bytenr)
8602 ret = repair_ref(info, path, dback, best);
8608 * Ok we messed with the actual refs, which means we need to drop our
8609 * entire cache and go back and rescan. I know this is a huge pain and
8610 * adds a lot of extra work, but it's the only way to be safe. Once all
8611 * the backrefs agree we may not need to do anything to the extent
8616 while (!list_empty(&entries)) {
8617 entry = list_entry(entries.next, struct extent_entry, list);
8618 list_del_init(&entry->list);
8624 static int process_duplicates(struct cache_tree *extent_cache,
8625 struct extent_record *rec)
8627 struct extent_record *good, *tmp;
8628 struct cache_extent *cache;
8632 * If we found a extent record for this extent then return, or if we
8633 * have more than one duplicate we are likely going to need to delete
8636 if (rec->found_rec || rec->num_duplicates > 1)
8639 /* Shouldn't happen but just in case */
8640 BUG_ON(!rec->num_duplicates);
8643 * So this happens if we end up with a backref that doesn't match the
8644 * actual extent entry. So either the backref is bad or the extent
8645 * entry is bad. Either way we want to have the extent_record actually
8646 * reflect what we found in the extent_tree, so we need to take the
8647 * duplicate out and use that as the extent_record since the only way we
8648 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8650 remove_cache_extent(extent_cache, &rec->cache);
8652 good = to_extent_record(rec->dups.next);
8653 list_del_init(&good->list);
8654 INIT_LIST_HEAD(&good->backrefs);
8655 INIT_LIST_HEAD(&good->dups);
8656 good->cache.start = good->start;
8657 good->cache.size = good->nr;
8658 good->content_checked = 0;
8659 good->owner_ref_checked = 0;
8660 good->num_duplicates = 0;
8661 good->refs = rec->refs;
8662 list_splice_init(&rec->backrefs, &good->backrefs);
8664 cache = lookup_cache_extent(extent_cache, good->start,
8668 tmp = container_of(cache, struct extent_record, cache);
8671 * If we find another overlapping extent and it's found_rec is
8672 * set then it's a duplicate and we need to try and delete
8675 if (tmp->found_rec || tmp->num_duplicates > 0) {
8676 if (list_empty(&good->list))
8677 list_add_tail(&good->list,
8678 &duplicate_extents);
8679 good->num_duplicates += tmp->num_duplicates + 1;
8680 list_splice_init(&tmp->dups, &good->dups);
8681 list_del_init(&tmp->list);
8682 list_add_tail(&tmp->list, &good->dups);
8683 remove_cache_extent(extent_cache, &tmp->cache);
8688 * Ok we have another non extent item backed extent rec, so lets
8689 * just add it to this extent and carry on like we did above.
8691 good->refs += tmp->refs;
8692 list_splice_init(&tmp->backrefs, &good->backrefs);
8693 remove_cache_extent(extent_cache, &tmp->cache);
8696 ret = insert_cache_extent(extent_cache, &good->cache);
8699 return good->num_duplicates ? 0 : 1;
8702 static int delete_duplicate_records(struct btrfs_root *root,
8703 struct extent_record *rec)
8705 struct btrfs_trans_handle *trans;
8706 LIST_HEAD(delete_list);
8707 struct btrfs_path path;
8708 struct extent_record *tmp, *good, *n;
8711 struct btrfs_key key;
8713 btrfs_init_path(&path);
8716 /* Find the record that covers all of the duplicates. */
8717 list_for_each_entry(tmp, &rec->dups, list) {
8718 if (good->start < tmp->start)
8720 if (good->nr > tmp->nr)
8723 if (tmp->start + tmp->nr < good->start + good->nr) {
8724 fprintf(stderr, "Ok we have overlapping extents that "
8725 "aren't completely covered by each other, this "
8726 "is going to require more careful thought. "
8727 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8728 tmp->start, tmp->nr, good->start, good->nr);
8735 list_add_tail(&rec->list, &delete_list);
8737 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8740 list_move_tail(&tmp->list, &delete_list);
8743 root = root->fs_info->extent_root;
8744 trans = btrfs_start_transaction(root, 1);
8745 if (IS_ERR(trans)) {
8746 ret = PTR_ERR(trans);
8750 list_for_each_entry(tmp, &delete_list, list) {
8751 if (tmp->found_rec == 0)
8753 key.objectid = tmp->start;
8754 key.type = BTRFS_EXTENT_ITEM_KEY;
8755 key.offset = tmp->nr;
8757 /* Shouldn't happen but just in case */
8758 if (tmp->metadata) {
8759 fprintf(stderr, "Well this shouldn't happen, extent "
8760 "record overlaps but is metadata? "
8761 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8765 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8771 ret = btrfs_del_item(trans, root, &path);
8774 btrfs_release_path(&path);
8777 err = btrfs_commit_transaction(trans, root);
8781 while (!list_empty(&delete_list)) {
8782 tmp = to_extent_record(delete_list.next);
8783 list_del_init(&tmp->list);
8789 while (!list_empty(&rec->dups)) {
8790 tmp = to_extent_record(rec->dups.next);
8791 list_del_init(&tmp->list);
8795 btrfs_release_path(&path);
8797 if (!ret && !nr_del)
8798 rec->num_duplicates = 0;
8800 return ret ? ret : nr_del;
8803 static int find_possible_backrefs(struct btrfs_fs_info *info,
8804 struct btrfs_path *path,
8805 struct cache_tree *extent_cache,
8806 struct extent_record *rec)
8808 struct btrfs_root *root;
8809 struct extent_backref *back;
8810 struct data_backref *dback;
8811 struct cache_extent *cache;
8812 struct btrfs_file_extent_item *fi;
8813 struct btrfs_key key;
8817 list_for_each_entry(back, &rec->backrefs, list) {
8818 /* Don't care about full backrefs (poor unloved backrefs) */
8819 if (back->full_backref || !back->is_data)
8822 dback = to_data_backref(back);
8824 /* We found this one, we don't need to do a lookup */
8825 if (dback->found_ref)
8828 key.objectid = dback->root;
8829 key.type = BTRFS_ROOT_ITEM_KEY;
8830 key.offset = (u64)-1;
8832 root = btrfs_read_fs_root(info, &key);
8834 /* No root, definitely a bad ref, skip */
8835 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8837 /* Other err, exit */
8839 return PTR_ERR(root);
8841 key.objectid = dback->owner;
8842 key.type = BTRFS_EXTENT_DATA_KEY;
8843 key.offset = dback->offset;
8844 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8846 btrfs_release_path(path);
8849 /* Didn't find it, we can carry on */
8854 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8855 struct btrfs_file_extent_item);
8856 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8857 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8858 btrfs_release_path(path);
8859 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8861 struct extent_record *tmp;
8862 tmp = container_of(cache, struct extent_record, cache);
8865 * If we found an extent record for the bytenr for this
8866 * particular backref then we can't add it to our
8867 * current extent record. We only want to add backrefs
8868 * that don't have a corresponding extent item in the
8869 * extent tree since they likely belong to this record
8870 * and we need to fix it if it doesn't match bytenrs.
8876 dback->found_ref += 1;
8877 dback->disk_bytenr = bytenr;
8878 dback->bytes = bytes;
8881 * Set this so the verify backref code knows not to trust the
8882 * values in this backref.
8891 * Record orphan data ref into corresponding root.
8893 * Return 0 if the extent item contains data ref and recorded.
8894 * Return 1 if the extent item contains no useful data ref
8895 * On that case, it may contains only shared_dataref or metadata backref
8896 * or the file extent exists(this should be handled by the extent bytenr
8898 * Return <0 if something goes wrong.
8900 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8901 struct extent_record *rec)
8903 struct btrfs_key key;
8904 struct btrfs_root *dest_root;
8905 struct extent_backref *back;
8906 struct data_backref *dback;
8907 struct orphan_data_extent *orphan;
8908 struct btrfs_path path;
8909 int recorded_data_ref = 0;
8914 btrfs_init_path(&path);
8915 list_for_each_entry(back, &rec->backrefs, list) {
8916 if (back->full_backref || !back->is_data ||
8917 !back->found_extent_tree)
8919 dback = to_data_backref(back);
8920 if (dback->found_ref)
8922 key.objectid = dback->root;
8923 key.type = BTRFS_ROOT_ITEM_KEY;
8924 key.offset = (u64)-1;
8926 dest_root = btrfs_read_fs_root(fs_info, &key);
8928 /* For non-exist root we just skip it */
8929 if (IS_ERR(dest_root) || !dest_root)
8932 key.objectid = dback->owner;
8933 key.type = BTRFS_EXTENT_DATA_KEY;
8934 key.offset = dback->offset;
8936 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8937 btrfs_release_path(&path);
8939 * For ret < 0, it's OK since the fs-tree may be corrupted,
8940 * we need to record it for inode/file extent rebuild.
8941 * For ret > 0, we record it only for file extent rebuild.
8942 * For ret == 0, the file extent exists but only bytenr
8943 * mismatch, let the original bytenr fix routine to handle,
8949 orphan = malloc(sizeof(*orphan));
8954 INIT_LIST_HEAD(&orphan->list);
8955 orphan->root = dback->root;
8956 orphan->objectid = dback->owner;
8957 orphan->offset = dback->offset;
8958 orphan->disk_bytenr = rec->cache.start;
8959 orphan->disk_len = rec->cache.size;
8960 list_add(&dest_root->orphan_data_extents, &orphan->list);
8961 recorded_data_ref = 1;
8964 btrfs_release_path(&path);
8966 return !recorded_data_ref;
8972 * when an incorrect extent item is found, this will delete
8973 * all of the existing entries for it and recreate them
8974 * based on what the tree scan found.
8976 static int fixup_extent_refs(struct btrfs_fs_info *info,
8977 struct cache_tree *extent_cache,
8978 struct extent_record *rec)
8980 struct btrfs_trans_handle *trans = NULL;
8982 struct btrfs_path path;
8983 struct list_head *cur = rec->backrefs.next;
8984 struct cache_extent *cache;
8985 struct extent_backref *back;
8989 if (rec->flag_block_full_backref)
8990 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8992 btrfs_init_path(&path);
8993 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8995 * Sometimes the backrefs themselves are so broken they don't
8996 * get attached to any meaningful rec, so first go back and
8997 * check any of our backrefs that we couldn't find and throw
8998 * them into the list if we find the backref so that
8999 * verify_backrefs can figure out what to do.
9001 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9006 /* step one, make sure all of the backrefs agree */
9007 ret = verify_backrefs(info, &path, rec);
9011 trans = btrfs_start_transaction(info->extent_root, 1);
9012 if (IS_ERR(trans)) {
9013 ret = PTR_ERR(trans);
9017 /* step two, delete all the existing records */
9018 ret = delete_extent_records(trans, info->extent_root, &path,
9024 /* was this block corrupt? If so, don't add references to it */
9025 cache = lookup_cache_extent(info->corrupt_blocks,
9026 rec->start, rec->max_size);
9032 /* step three, recreate all the refs we did find */
9033 while(cur != &rec->backrefs) {
9034 back = to_extent_backref(cur);
9038 * if we didn't find any references, don't create a
9041 if (!back->found_ref)
9044 rec->bad_full_backref = 0;
9045 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9053 int err = btrfs_commit_transaction(trans, info->extent_root);
9059 fprintf(stderr, "Repaired extent references for %llu\n",
9060 (unsigned long long)rec->start);
9062 btrfs_release_path(&path);
9066 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9067 struct extent_record *rec)
9069 struct btrfs_trans_handle *trans;
9070 struct btrfs_root *root = fs_info->extent_root;
9071 struct btrfs_path path;
9072 struct btrfs_extent_item *ei;
9073 struct btrfs_key key;
9077 key.objectid = rec->start;
9078 if (rec->metadata) {
9079 key.type = BTRFS_METADATA_ITEM_KEY;
9080 key.offset = rec->info_level;
9082 key.type = BTRFS_EXTENT_ITEM_KEY;
9083 key.offset = rec->max_size;
9086 trans = btrfs_start_transaction(root, 0);
9088 return PTR_ERR(trans);
9090 btrfs_init_path(&path);
9091 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9093 btrfs_release_path(&path);
9094 btrfs_commit_transaction(trans, root);
9097 fprintf(stderr, "Didn't find extent for %llu\n",
9098 (unsigned long long)rec->start);
9099 btrfs_release_path(&path);
9100 btrfs_commit_transaction(trans, root);
9104 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9105 struct btrfs_extent_item);
9106 flags = btrfs_extent_flags(path.nodes[0], ei);
9107 if (rec->flag_block_full_backref) {
9108 fprintf(stderr, "setting full backref on %llu\n",
9109 (unsigned long long)key.objectid);
9110 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9112 fprintf(stderr, "clearing full backref on %llu\n",
9113 (unsigned long long)key.objectid);
9114 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9116 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9117 btrfs_mark_buffer_dirty(path.nodes[0]);
9118 btrfs_release_path(&path);
9119 ret = btrfs_commit_transaction(trans, root);
9121 fprintf(stderr, "Repaired extent flags for %llu\n",
9122 (unsigned long long)rec->start);
9127 /* right now we only prune from the extent allocation tree */
9128 static int prune_one_block(struct btrfs_trans_handle *trans,
9129 struct btrfs_fs_info *info,
9130 struct btrfs_corrupt_block *corrupt)
9133 struct btrfs_path path;
9134 struct extent_buffer *eb;
9138 int level = corrupt->level + 1;
9140 btrfs_init_path(&path);
9142 /* we want to stop at the parent to our busted block */
9143 path.lowest_level = level;
9145 ret = btrfs_search_slot(trans, info->extent_root,
9146 &corrupt->key, &path, -1, 1);
9151 eb = path.nodes[level];
9158 * hopefully the search gave us the block we want to prune,
9159 * lets try that first
9161 slot = path.slots[level];
9162 found = btrfs_node_blockptr(eb, slot);
9163 if (found == corrupt->cache.start)
9166 nritems = btrfs_header_nritems(eb);
9168 /* the search failed, lets scan this node and hope we find it */
9169 for (slot = 0; slot < nritems; slot++) {
9170 found = btrfs_node_blockptr(eb, slot);
9171 if (found == corrupt->cache.start)
9175 * we couldn't find the bad block. TODO, search all the nodes for pointers
9178 if (eb == info->extent_root->node) {
9183 btrfs_release_path(&path);
9188 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9189 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9192 btrfs_release_path(&path);
9196 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9198 struct btrfs_trans_handle *trans = NULL;
9199 struct cache_extent *cache;
9200 struct btrfs_corrupt_block *corrupt;
9203 cache = search_cache_extent(info->corrupt_blocks, 0);
9207 trans = btrfs_start_transaction(info->extent_root, 1);
9209 return PTR_ERR(trans);
9211 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9212 prune_one_block(trans, info, corrupt);
9213 remove_cache_extent(info->corrupt_blocks, cache);
9216 return btrfs_commit_transaction(trans, info->extent_root);
9220 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9222 struct btrfs_block_group_cache *cache;
9227 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9228 &start, &end, EXTENT_DIRTY);
9231 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9236 cache = btrfs_lookup_first_block_group(fs_info, start);
9241 start = cache->key.objectid + cache->key.offset;
9245 static int check_extent_refs(struct btrfs_root *root,
9246 struct cache_tree *extent_cache)
9248 struct extent_record *rec;
9249 struct cache_extent *cache;
9255 * if we're doing a repair, we have to make sure
9256 * we don't allocate from the problem extents.
9257 * In the worst case, this will be all the
9260 cache = search_cache_extent(extent_cache, 0);
9262 rec = container_of(cache, struct extent_record, cache);
9263 set_extent_dirty(root->fs_info->excluded_extents,
9265 rec->start + rec->max_size - 1);
9266 cache = next_cache_extent(cache);
9269 /* pin down all the corrupted blocks too */
9270 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9272 set_extent_dirty(root->fs_info->excluded_extents,
9274 cache->start + cache->size - 1);
9275 cache = next_cache_extent(cache);
9277 prune_corrupt_blocks(root->fs_info);
9278 reset_cached_block_groups(root->fs_info);
9281 reset_cached_block_groups(root->fs_info);
9284 * We need to delete any duplicate entries we find first otherwise we
9285 * could mess up the extent tree when we have backrefs that actually
9286 * belong to a different extent item and not the weird duplicate one.
9288 while (repair && !list_empty(&duplicate_extents)) {
9289 rec = to_extent_record(duplicate_extents.next);
9290 list_del_init(&rec->list);
9292 /* Sometimes we can find a backref before we find an actual
9293 * extent, so we need to process it a little bit to see if there
9294 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9295 * if this is a backref screwup. If we need to delete stuff
9296 * process_duplicates() will return 0, otherwise it will return
9299 if (process_duplicates(extent_cache, rec))
9301 ret = delete_duplicate_records(root, rec);
9305 * delete_duplicate_records will return the number of entries
9306 * deleted, so if it's greater than 0 then we know we actually
9307 * did something and we need to remove.
9320 cache = search_cache_extent(extent_cache, 0);
9323 rec = container_of(cache, struct extent_record, cache);
9324 if (rec->num_duplicates) {
9325 fprintf(stderr, "extent item %llu has multiple extent "
9326 "items\n", (unsigned long long)rec->start);
9330 if (rec->refs != rec->extent_item_refs) {
9331 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9332 (unsigned long long)rec->start,
9333 (unsigned long long)rec->nr);
9334 fprintf(stderr, "extent item %llu, found %llu\n",
9335 (unsigned long long)rec->extent_item_refs,
9336 (unsigned long long)rec->refs);
9337 ret = record_orphan_data_extents(root->fs_info, rec);
9343 if (all_backpointers_checked(rec, 1)) {
9344 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9345 (unsigned long long)rec->start,
9346 (unsigned long long)rec->nr);
9350 if (!rec->owner_ref_checked) {
9351 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9352 (unsigned long long)rec->start,
9353 (unsigned long long)rec->nr);
9358 if (repair && fix) {
9359 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9365 if (rec->bad_full_backref) {
9366 fprintf(stderr, "bad full backref, on [%llu]\n",
9367 (unsigned long long)rec->start);
9369 ret = fixup_extent_flags(root->fs_info, rec);
9377 * Although it's not a extent ref's problem, we reuse this
9378 * routine for error reporting.
9379 * No repair function yet.
9381 if (rec->crossing_stripes) {
9383 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9384 rec->start, rec->start + rec->max_size);
9388 if (rec->wrong_chunk_type) {
9390 "bad extent [%llu, %llu), type mismatch with chunk\n",
9391 rec->start, rec->start + rec->max_size);
9395 remove_cache_extent(extent_cache, cache);
9396 free_all_extent_backrefs(rec);
9397 if (!init_extent_tree && repair && (!cur_err || fix))
9398 clear_extent_dirty(root->fs_info->excluded_extents,
9400 rec->start + rec->max_size - 1);
9405 if (ret && ret != -EAGAIN) {
9406 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9409 struct btrfs_trans_handle *trans;
9411 root = root->fs_info->extent_root;
9412 trans = btrfs_start_transaction(root, 1);
9413 if (IS_ERR(trans)) {
9414 ret = PTR_ERR(trans);
9418 btrfs_fix_block_accounting(trans, root);
9419 ret = btrfs_commit_transaction(trans, root);
9428 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9432 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9433 stripe_size = length;
9434 stripe_size /= num_stripes;
9435 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9436 stripe_size = length * 2;
9437 stripe_size /= num_stripes;
9438 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9439 stripe_size = length;
9440 stripe_size /= (num_stripes - 1);
9441 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9442 stripe_size = length;
9443 stripe_size /= (num_stripes - 2);
9445 stripe_size = length;
9451 * Check the chunk with its block group/dev list ref:
9452 * Return 0 if all refs seems valid.
9453 * Return 1 if part of refs seems valid, need later check for rebuild ref
9454 * like missing block group and needs to search extent tree to rebuild them.
9455 * Return -1 if essential refs are missing and unable to rebuild.
9457 static int check_chunk_refs(struct chunk_record *chunk_rec,
9458 struct block_group_tree *block_group_cache,
9459 struct device_extent_tree *dev_extent_cache,
9462 struct cache_extent *block_group_item;
9463 struct block_group_record *block_group_rec;
9464 struct cache_extent *dev_extent_item;
9465 struct device_extent_record *dev_extent_rec;
9469 int metadump_v2 = 0;
9473 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9476 if (block_group_item) {
9477 block_group_rec = container_of(block_group_item,
9478 struct block_group_record,
9480 if (chunk_rec->length != block_group_rec->offset ||
9481 chunk_rec->offset != block_group_rec->objectid ||
9483 chunk_rec->type_flags != block_group_rec->flags)) {
9486 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9487 chunk_rec->objectid,
9492 chunk_rec->type_flags,
9493 block_group_rec->objectid,
9494 block_group_rec->type,
9495 block_group_rec->offset,
9496 block_group_rec->offset,
9497 block_group_rec->objectid,
9498 block_group_rec->flags);
9501 list_del_init(&block_group_rec->list);
9502 chunk_rec->bg_rec = block_group_rec;
9507 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9508 chunk_rec->objectid,
9513 chunk_rec->type_flags);
9520 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9521 chunk_rec->num_stripes);
9522 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9523 devid = chunk_rec->stripes[i].devid;
9524 offset = chunk_rec->stripes[i].offset;
9525 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9526 devid, offset, length);
9527 if (dev_extent_item) {
9528 dev_extent_rec = container_of(dev_extent_item,
9529 struct device_extent_record,
9531 if (dev_extent_rec->objectid != devid ||
9532 dev_extent_rec->offset != offset ||
9533 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9534 dev_extent_rec->length != length) {
9537 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9538 chunk_rec->objectid,
9541 chunk_rec->stripes[i].devid,
9542 chunk_rec->stripes[i].offset,
9543 dev_extent_rec->objectid,
9544 dev_extent_rec->offset,
9545 dev_extent_rec->length);
9548 list_move(&dev_extent_rec->chunk_list,
9549 &chunk_rec->dextents);
9554 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9555 chunk_rec->objectid,
9558 chunk_rec->stripes[i].devid,
9559 chunk_rec->stripes[i].offset);
9566 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9567 int check_chunks(struct cache_tree *chunk_cache,
9568 struct block_group_tree *block_group_cache,
9569 struct device_extent_tree *dev_extent_cache,
9570 struct list_head *good, struct list_head *bad,
9571 struct list_head *rebuild, int silent)
9573 struct cache_extent *chunk_item;
9574 struct chunk_record *chunk_rec;
9575 struct block_group_record *bg_rec;
9576 struct device_extent_record *dext_rec;
9580 chunk_item = first_cache_extent(chunk_cache);
9581 while (chunk_item) {
9582 chunk_rec = container_of(chunk_item, struct chunk_record,
9584 err = check_chunk_refs(chunk_rec, block_group_cache,
9585 dev_extent_cache, silent);
9588 if (err == 0 && good)
9589 list_add_tail(&chunk_rec->list, good);
9590 if (err > 0 && rebuild)
9591 list_add_tail(&chunk_rec->list, rebuild);
9593 list_add_tail(&chunk_rec->list, bad);
9594 chunk_item = next_cache_extent(chunk_item);
9597 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9600 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9608 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9612 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9623 static int check_device_used(struct device_record *dev_rec,
9624 struct device_extent_tree *dext_cache)
9626 struct cache_extent *cache;
9627 struct device_extent_record *dev_extent_rec;
9630 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9632 dev_extent_rec = container_of(cache,
9633 struct device_extent_record,
9635 if (dev_extent_rec->objectid != dev_rec->devid)
9638 list_del_init(&dev_extent_rec->device_list);
9639 total_byte += dev_extent_rec->length;
9640 cache = next_cache_extent(cache);
9643 if (total_byte != dev_rec->byte_used) {
9645 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9646 total_byte, dev_rec->byte_used, dev_rec->objectid,
9647 dev_rec->type, dev_rec->offset);
9654 /* check btrfs_dev_item -> btrfs_dev_extent */
9655 static int check_devices(struct rb_root *dev_cache,
9656 struct device_extent_tree *dev_extent_cache)
9658 struct rb_node *dev_node;
9659 struct device_record *dev_rec;
9660 struct device_extent_record *dext_rec;
9664 dev_node = rb_first(dev_cache);
9666 dev_rec = container_of(dev_node, struct device_record, node);
9667 err = check_device_used(dev_rec, dev_extent_cache);
9671 dev_node = rb_next(dev_node);
9673 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9676 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9677 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9684 static int add_root_item_to_list(struct list_head *head,
9685 u64 objectid, u64 bytenr, u64 last_snapshot,
9686 u8 level, u8 drop_level,
9687 int level_size, struct btrfs_key *drop_key)
9690 struct root_item_record *ri_rec;
9691 ri_rec = malloc(sizeof(*ri_rec));
9694 ri_rec->bytenr = bytenr;
9695 ri_rec->objectid = objectid;
9696 ri_rec->level = level;
9697 ri_rec->level_size = level_size;
9698 ri_rec->drop_level = drop_level;
9699 ri_rec->last_snapshot = last_snapshot;
9701 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9702 list_add_tail(&ri_rec->list, head);
9707 static void free_root_item_list(struct list_head *list)
9709 struct root_item_record *ri_rec;
9711 while (!list_empty(list)) {
9712 ri_rec = list_first_entry(list, struct root_item_record,
9714 list_del_init(&ri_rec->list);
9719 static int deal_root_from_list(struct list_head *list,
9720 struct btrfs_root *root,
9721 struct block_info *bits,
9723 struct cache_tree *pending,
9724 struct cache_tree *seen,
9725 struct cache_tree *reada,
9726 struct cache_tree *nodes,
9727 struct cache_tree *extent_cache,
9728 struct cache_tree *chunk_cache,
9729 struct rb_root *dev_cache,
9730 struct block_group_tree *block_group_cache,
9731 struct device_extent_tree *dev_extent_cache)
9736 while (!list_empty(list)) {
9737 struct root_item_record *rec;
9738 struct extent_buffer *buf;
9739 rec = list_entry(list->next,
9740 struct root_item_record, list);
9742 buf = read_tree_block(root->fs_info->tree_root,
9743 rec->bytenr, rec->level_size, 0);
9744 if (!extent_buffer_uptodate(buf)) {
9745 free_extent_buffer(buf);
9749 ret = add_root_to_pending(buf, extent_cache, pending,
9750 seen, nodes, rec->objectid);
9754 * To rebuild extent tree, we need deal with snapshot
9755 * one by one, otherwise we deal with node firstly which
9756 * can maximize readahead.
9759 ret = run_next_block(root, bits, bits_nr, &last,
9760 pending, seen, reada, nodes,
9761 extent_cache, chunk_cache,
9762 dev_cache, block_group_cache,
9763 dev_extent_cache, rec);
9767 free_extent_buffer(buf);
9768 list_del(&rec->list);
9774 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9775 reada, nodes, extent_cache, chunk_cache,
9776 dev_cache, block_group_cache,
9777 dev_extent_cache, NULL);
9787 static int check_chunks_and_extents(struct btrfs_root *root)
9789 struct rb_root dev_cache;
9790 struct cache_tree chunk_cache;
9791 struct block_group_tree block_group_cache;
9792 struct device_extent_tree dev_extent_cache;
9793 struct cache_tree extent_cache;
9794 struct cache_tree seen;
9795 struct cache_tree pending;
9796 struct cache_tree reada;
9797 struct cache_tree nodes;
9798 struct extent_io_tree excluded_extents;
9799 struct cache_tree corrupt_blocks;
9800 struct btrfs_path path;
9801 struct btrfs_key key;
9802 struct btrfs_key found_key;
9804 struct block_info *bits;
9806 struct extent_buffer *leaf;
9808 struct btrfs_root_item ri;
9809 struct list_head dropping_trees;
9810 struct list_head normal_trees;
9811 struct btrfs_root *root1;
9816 dev_cache = RB_ROOT;
9817 cache_tree_init(&chunk_cache);
9818 block_group_tree_init(&block_group_cache);
9819 device_extent_tree_init(&dev_extent_cache);
9821 cache_tree_init(&extent_cache);
9822 cache_tree_init(&seen);
9823 cache_tree_init(&pending);
9824 cache_tree_init(&nodes);
9825 cache_tree_init(&reada);
9826 cache_tree_init(&corrupt_blocks);
9827 extent_io_tree_init(&excluded_extents);
9828 INIT_LIST_HEAD(&dropping_trees);
9829 INIT_LIST_HEAD(&normal_trees);
9832 root->fs_info->excluded_extents = &excluded_extents;
9833 root->fs_info->fsck_extent_cache = &extent_cache;
9834 root->fs_info->free_extent_hook = free_extent_hook;
9835 root->fs_info->corrupt_blocks = &corrupt_blocks;
9839 bits = malloc(bits_nr * sizeof(struct block_info));
9845 if (ctx.progress_enabled) {
9846 ctx.tp = TASK_EXTENTS;
9847 task_start(ctx.info);
9851 root1 = root->fs_info->tree_root;
9852 level = btrfs_header_level(root1->node);
9853 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9854 root1->node->start, 0, level, 0,
9855 root1->nodesize, NULL);
9858 root1 = root->fs_info->chunk_root;
9859 level = btrfs_header_level(root1->node);
9860 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9861 root1->node->start, 0, level, 0,
9862 root1->nodesize, NULL);
9865 btrfs_init_path(&path);
9868 key.type = BTRFS_ROOT_ITEM_KEY;
9869 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9874 leaf = path.nodes[0];
9875 slot = path.slots[0];
9876 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9877 ret = btrfs_next_leaf(root, &path);
9880 leaf = path.nodes[0];
9881 slot = path.slots[0];
9883 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9884 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9885 unsigned long offset;
9888 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9889 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9890 last_snapshot = btrfs_root_last_snapshot(&ri);
9891 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9892 level = btrfs_root_level(&ri);
9893 level_size = root->nodesize;
9894 ret = add_root_item_to_list(&normal_trees,
9896 btrfs_root_bytenr(&ri),
9897 last_snapshot, level,
9898 0, level_size, NULL);
9902 level = btrfs_root_level(&ri);
9903 level_size = root->nodesize;
9904 objectid = found_key.objectid;
9905 btrfs_disk_key_to_cpu(&found_key,
9907 ret = add_root_item_to_list(&dropping_trees,
9909 btrfs_root_bytenr(&ri),
9910 last_snapshot, level,
9912 level_size, &found_key);
9919 btrfs_release_path(&path);
9922 * check_block can return -EAGAIN if it fixes something, please keep
9923 * this in mind when dealing with return values from these functions, if
9924 * we get -EAGAIN we want to fall through and restart the loop.
9926 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9927 &seen, &reada, &nodes, &extent_cache,
9928 &chunk_cache, &dev_cache, &block_group_cache,
9935 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9936 &pending, &seen, &reada, &nodes,
9937 &extent_cache, &chunk_cache, &dev_cache,
9938 &block_group_cache, &dev_extent_cache);
9945 ret = check_chunks(&chunk_cache, &block_group_cache,
9946 &dev_extent_cache, NULL, NULL, NULL, 0);
9953 ret = check_extent_refs(root, &extent_cache);
9960 ret = check_devices(&dev_cache, &dev_extent_cache);
9965 task_stop(ctx.info);
9967 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9968 extent_io_tree_cleanup(&excluded_extents);
9969 root->fs_info->fsck_extent_cache = NULL;
9970 root->fs_info->free_extent_hook = NULL;
9971 root->fs_info->corrupt_blocks = NULL;
9972 root->fs_info->excluded_extents = NULL;
9975 free_chunk_cache_tree(&chunk_cache);
9976 free_device_cache_tree(&dev_cache);
9977 free_block_group_tree(&block_group_cache);
9978 free_device_extent_tree(&dev_extent_cache);
9979 free_extent_cache_tree(&seen);
9980 free_extent_cache_tree(&pending);
9981 free_extent_cache_tree(&reada);
9982 free_extent_cache_tree(&nodes);
9983 free_root_item_list(&normal_trees);
9984 free_root_item_list(&dropping_trees);
9987 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9988 free_extent_cache_tree(&seen);
9989 free_extent_cache_tree(&pending);
9990 free_extent_cache_tree(&reada);
9991 free_extent_cache_tree(&nodes);
9992 free_chunk_cache_tree(&chunk_cache);
9993 free_block_group_tree(&block_group_cache);
9994 free_device_cache_tree(&dev_cache);
9995 free_device_extent_tree(&dev_extent_cache);
9996 free_extent_record_cache(&extent_cache);
9997 free_root_item_list(&normal_trees);
9998 free_root_item_list(&dropping_trees);
9999 extent_io_tree_cleanup(&excluded_extents);
10004 * Check backrefs of a tree block given by @bytenr or @eb.
10006 * @root: the root containing the @bytenr or @eb
10007 * @eb: tree block extent buffer, can be NULL
10008 * @bytenr: bytenr of the tree block to search
10009 * @level: tree level of the tree block
10010 * @owner: owner of the tree block
10012 * Return >0 for any error found and output error message
10013 * Return 0 for no error found
10015 static int check_tree_block_ref(struct btrfs_root *root,
10016 struct extent_buffer *eb, u64 bytenr,
10017 int level, u64 owner)
10019 struct btrfs_key key;
10020 struct btrfs_root *extent_root = root->fs_info->extent_root;
10021 struct btrfs_path path;
10022 struct btrfs_extent_item *ei;
10023 struct btrfs_extent_inline_ref *iref;
10024 struct extent_buffer *leaf;
10030 u32 nodesize = root->nodesize;
10033 int tree_reloc_root = 0;
10038 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10039 btrfs_header_bytenr(root->node) == bytenr)
10040 tree_reloc_root = 1;
10042 btrfs_init_path(&path);
10043 key.objectid = bytenr;
10044 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10045 key.type = BTRFS_METADATA_ITEM_KEY;
10047 key.type = BTRFS_EXTENT_ITEM_KEY;
10048 key.offset = (u64)-1;
10050 /* Search for the backref in extent tree */
10051 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10053 err |= BACKREF_MISSING;
10056 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10058 err |= BACKREF_MISSING;
10062 leaf = path.nodes[0];
10063 slot = path.slots[0];
10064 btrfs_item_key_to_cpu(leaf, &key, slot);
10066 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10068 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10069 skinny_level = (int)key.offset;
10070 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10072 struct btrfs_tree_block_info *info;
10074 info = (struct btrfs_tree_block_info *)(ei + 1);
10075 skinny_level = btrfs_tree_block_level(leaf, info);
10076 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10083 if (!(btrfs_extent_flags(leaf, ei) &
10084 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10086 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10087 key.objectid, nodesize,
10088 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10089 err = BACKREF_MISMATCH;
10091 header_gen = btrfs_header_generation(eb);
10092 extent_gen = btrfs_extent_generation(leaf, ei);
10093 if (header_gen != extent_gen) {
10095 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10096 key.objectid, nodesize, header_gen,
10098 err = BACKREF_MISMATCH;
10100 if (level != skinny_level) {
10102 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10103 key.objectid, nodesize, level, skinny_level);
10104 err = BACKREF_MISMATCH;
10106 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10108 "extent[%llu %u] is referred by other roots than %llu",
10109 key.objectid, nodesize, root->objectid);
10110 err = BACKREF_MISMATCH;
10115 * Iterate the extent/metadata item to find the exact backref
10117 item_size = btrfs_item_size_nr(leaf, slot);
10118 ptr = (unsigned long)iref;
10119 end = (unsigned long)ei + item_size;
10120 while (ptr < end) {
10121 iref = (struct btrfs_extent_inline_ref *)ptr;
10122 type = btrfs_extent_inline_ref_type(leaf, iref);
10123 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10125 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10126 (offset == root->objectid || offset == owner)) {
10128 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10130 * Backref of tree reloc root points to itself, no need
10131 * to check backref any more.
10133 if (tree_reloc_root)
10136 /* Check if the backref points to valid referencer */
10137 found_ref = !check_tree_block_ref(root, NULL,
10138 offset, level + 1, owner);
10143 ptr += btrfs_extent_inline_ref_size(type);
10147 * Inlined extent item doesn't have what we need, check
10148 * TREE_BLOCK_REF_KEY
10151 btrfs_release_path(&path);
10152 key.objectid = bytenr;
10153 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10154 key.offset = root->objectid;
10156 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10161 err |= BACKREF_MISSING;
10163 btrfs_release_path(&path);
10164 if (eb && (err & BACKREF_MISSING))
10165 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10166 bytenr, nodesize, owner, level);
10171 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10173 * Return >0 any error found and output error message
10174 * Return 0 for no error found
10176 static int check_extent_data_item(struct btrfs_root *root,
10177 struct extent_buffer *eb, int slot)
10179 struct btrfs_file_extent_item *fi;
10180 struct btrfs_path path;
10181 struct btrfs_root *extent_root = root->fs_info->extent_root;
10182 struct btrfs_key fi_key;
10183 struct btrfs_key dbref_key;
10184 struct extent_buffer *leaf;
10185 struct btrfs_extent_item *ei;
10186 struct btrfs_extent_inline_ref *iref;
10187 struct btrfs_extent_data_ref *dref;
10190 u64 disk_num_bytes;
10191 u64 extent_num_bytes;
10198 int found_dbackref = 0;
10202 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10203 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10205 /* Nothing to check for hole and inline data extents */
10206 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10207 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10210 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10211 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10212 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10214 /* Check unaligned disk_num_bytes and num_bytes */
10215 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10217 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10218 fi_key.objectid, fi_key.offset, disk_num_bytes,
10220 err |= BYTES_UNALIGNED;
10222 data_bytes_allocated += disk_num_bytes;
10224 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10226 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10227 fi_key.objectid, fi_key.offset, extent_num_bytes,
10229 err |= BYTES_UNALIGNED;
10231 data_bytes_referenced += extent_num_bytes;
10233 owner = btrfs_header_owner(eb);
10235 /* Check the extent item of the file extent in extent tree */
10236 btrfs_init_path(&path);
10237 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10238 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10239 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10241 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10245 leaf = path.nodes[0];
10246 slot = path.slots[0];
10247 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10249 extent_flags = btrfs_extent_flags(leaf, ei);
10251 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10253 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10254 disk_bytenr, disk_num_bytes,
10255 BTRFS_EXTENT_FLAG_DATA);
10256 err |= BACKREF_MISMATCH;
10259 /* Check data backref inside that extent item */
10260 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10261 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10262 ptr = (unsigned long)iref;
10263 end = (unsigned long)ei + item_size;
10264 while (ptr < end) {
10265 iref = (struct btrfs_extent_inline_ref *)ptr;
10266 type = btrfs_extent_inline_ref_type(leaf, iref);
10267 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10269 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10270 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10271 if (ref_root == owner || ref_root == root->objectid)
10272 found_dbackref = 1;
10273 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10274 found_dbackref = !check_tree_block_ref(root, NULL,
10275 btrfs_extent_inline_ref_offset(leaf, iref),
10279 if (found_dbackref)
10281 ptr += btrfs_extent_inline_ref_size(type);
10284 if (!found_dbackref) {
10285 btrfs_release_path(&path);
10287 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10288 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10289 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10290 dbref_key.offset = hash_extent_data_ref(root->objectid,
10291 fi_key.objectid, fi_key.offset);
10293 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10294 &dbref_key, &path, 0, 0);
10296 found_dbackref = 1;
10300 btrfs_release_path(&path);
10303 * Neither inlined nor EXTENT_DATA_REF found, try
10304 * SHARED_DATA_REF as last chance.
10306 dbref_key.objectid = disk_bytenr;
10307 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10308 dbref_key.offset = eb->start;
10310 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10311 &dbref_key, &path, 0, 0);
10313 found_dbackref = 1;
10319 if (!found_dbackref)
10320 err |= BACKREF_MISSING;
10321 btrfs_release_path(&path);
10322 if (err & BACKREF_MISSING) {
10323 error("data extent[%llu %llu] backref lost",
10324 disk_bytenr, disk_num_bytes);
10330 * Get real tree block level for the case like shared block
10331 * Return >= 0 as tree level
10332 * Return <0 for error
10334 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10336 struct extent_buffer *eb;
10337 struct btrfs_path path;
10338 struct btrfs_key key;
10339 struct btrfs_extent_item *ei;
10342 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10347 /* Search extent tree for extent generation and level */
10348 key.objectid = bytenr;
10349 key.type = BTRFS_METADATA_ITEM_KEY;
10350 key.offset = (u64)-1;
10352 btrfs_init_path(&path);
10353 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10356 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10364 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10365 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10366 struct btrfs_extent_item);
10367 flags = btrfs_extent_flags(path.nodes[0], ei);
10368 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10373 /* Get transid for later read_tree_block() check */
10374 transid = btrfs_extent_generation(path.nodes[0], ei);
10376 /* Get backref level as one source */
10377 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10378 backref_level = key.offset;
10380 struct btrfs_tree_block_info *info;
10382 info = (struct btrfs_tree_block_info *)(ei + 1);
10383 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10385 btrfs_release_path(&path);
10387 /* Get level from tree block as an alternative source */
10388 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10389 if (!extent_buffer_uptodate(eb)) {
10390 free_extent_buffer(eb);
10393 header_level = btrfs_header_level(eb);
10394 free_extent_buffer(eb);
10396 if (header_level != backref_level)
10398 return header_level;
10401 btrfs_release_path(&path);
10406 * Check if a tree block backref is valid (points to a valid tree block)
10407 * if level == -1, level will be resolved
10408 * Return >0 for any error found and print error message
10410 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10411 u64 bytenr, int level)
10413 struct btrfs_root *root;
10414 struct btrfs_key key;
10415 struct btrfs_path path;
10416 struct extent_buffer *eb;
10417 struct extent_buffer *node;
10418 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10422 /* Query level for level == -1 special case */
10424 level = query_tree_block_level(fs_info, bytenr);
10426 err |= REFERENCER_MISSING;
10430 key.objectid = root_id;
10431 key.type = BTRFS_ROOT_ITEM_KEY;
10432 key.offset = (u64)-1;
10434 root = btrfs_read_fs_root(fs_info, &key);
10435 if (IS_ERR(root)) {
10436 err |= REFERENCER_MISSING;
10440 /* Read out the tree block to get item/node key */
10441 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10442 if (!extent_buffer_uptodate(eb)) {
10443 err |= REFERENCER_MISSING;
10444 free_extent_buffer(eb);
10448 /* Empty tree, no need to check key */
10449 if (!btrfs_header_nritems(eb) && !level) {
10450 free_extent_buffer(eb);
10455 btrfs_node_key_to_cpu(eb, &key, 0);
10457 btrfs_item_key_to_cpu(eb, &key, 0);
10459 free_extent_buffer(eb);
10461 btrfs_init_path(&path);
10462 path.lowest_level = level;
10463 /* Search with the first key, to ensure we can reach it */
10464 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10466 err |= REFERENCER_MISSING;
10470 node = path.nodes[level];
10471 if (btrfs_header_bytenr(node) != bytenr) {
10473 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10474 bytenr, nodesize, bytenr,
10475 btrfs_header_bytenr(node));
10476 err |= REFERENCER_MISMATCH;
10478 if (btrfs_header_level(node) != level) {
10480 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10481 bytenr, nodesize, level,
10482 btrfs_header_level(node));
10483 err |= REFERENCER_MISMATCH;
10487 btrfs_release_path(&path);
10489 if (err & REFERENCER_MISSING) {
10491 error("extent [%llu %d] lost referencer (owner: %llu)",
10492 bytenr, nodesize, root_id);
10495 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10496 bytenr, nodesize, root_id, level);
10503 * Check if tree block @eb is tree reloc root.
10504 * Return 0 if it's not or any problem happens
10505 * Return 1 if it's a tree reloc root
10507 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10508 struct extent_buffer *eb)
10510 struct btrfs_root *tree_reloc_root;
10511 struct btrfs_key key;
10512 u64 bytenr = btrfs_header_bytenr(eb);
10513 u64 owner = btrfs_header_owner(eb);
10516 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10517 key.offset = owner;
10518 key.type = BTRFS_ROOT_ITEM_KEY;
10520 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10521 if (IS_ERR(tree_reloc_root))
10524 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10526 btrfs_free_fs_root(tree_reloc_root);
10531 * Check referencer for shared block backref
10532 * If level == -1, this function will resolve the level.
10534 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10535 u64 parent, u64 bytenr, int level)
10537 struct extent_buffer *eb;
10538 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10540 int found_parent = 0;
10543 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10544 if (!extent_buffer_uptodate(eb))
10548 level = query_tree_block_level(fs_info, bytenr);
10552 /* It's possible it's a tree reloc root */
10553 if (parent == bytenr) {
10554 if (is_tree_reloc_root(fs_info, eb))
10559 if (level + 1 != btrfs_header_level(eb))
10562 nr = btrfs_header_nritems(eb);
10563 for (i = 0; i < nr; i++) {
10564 if (bytenr == btrfs_node_blockptr(eb, i)) {
10570 free_extent_buffer(eb);
10571 if (!found_parent) {
10573 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10574 bytenr, nodesize, parent, level);
10575 return REFERENCER_MISSING;
10581 * Check referencer for normal (inlined) data ref
10582 * If len == 0, it will be resolved by searching in extent tree
10584 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10585 u64 root_id, u64 objectid, u64 offset,
10586 u64 bytenr, u64 len, u32 count)
10588 struct btrfs_root *root;
10589 struct btrfs_root *extent_root = fs_info->extent_root;
10590 struct btrfs_key key;
10591 struct btrfs_path path;
10592 struct extent_buffer *leaf;
10593 struct btrfs_file_extent_item *fi;
10594 u32 found_count = 0;
10599 key.objectid = bytenr;
10600 key.type = BTRFS_EXTENT_ITEM_KEY;
10601 key.offset = (u64)-1;
10603 btrfs_init_path(&path);
10604 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10607 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10610 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10611 if (key.objectid != bytenr ||
10612 key.type != BTRFS_EXTENT_ITEM_KEY)
10615 btrfs_release_path(&path);
10617 key.objectid = root_id;
10618 key.type = BTRFS_ROOT_ITEM_KEY;
10619 key.offset = (u64)-1;
10620 btrfs_init_path(&path);
10622 root = btrfs_read_fs_root(fs_info, &key);
10626 key.objectid = objectid;
10627 key.type = BTRFS_EXTENT_DATA_KEY;
10629 * It can be nasty as data backref offset is
10630 * file offset - file extent offset, which is smaller or
10631 * equal to original backref offset. The only special case is
10632 * overflow. So we need to special check and do further search.
10634 key.offset = offset & (1ULL << 63) ? 0 : offset;
10636 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10641 * Search afterwards to get correct one
10642 * NOTE: As we must do a comprehensive check on the data backref to
10643 * make sure the dref count also matches, we must iterate all file
10644 * extents for that inode.
10647 leaf = path.nodes[0];
10648 slot = path.slots[0];
10650 if (slot >= btrfs_header_nritems(leaf))
10652 btrfs_item_key_to_cpu(leaf, &key, slot);
10653 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10655 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10657 * Except normal disk bytenr and disk num bytes, we still
10658 * need to do extra check on dbackref offset as
10659 * dbackref offset = file_offset - file_extent_offset
10661 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10662 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10663 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10668 ret = btrfs_next_item(root, &path);
10673 btrfs_release_path(&path);
10674 if (found_count != count) {
10676 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10677 bytenr, len, root_id, objectid, offset, count, found_count);
10678 return REFERENCER_MISSING;
10684 * Check if the referencer of a shared data backref exists
10686 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10687 u64 parent, u64 bytenr)
10689 struct extent_buffer *eb;
10690 struct btrfs_key key;
10691 struct btrfs_file_extent_item *fi;
10692 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10694 int found_parent = 0;
10697 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10698 if (!extent_buffer_uptodate(eb))
10701 nr = btrfs_header_nritems(eb);
10702 for (i = 0; i < nr; i++) {
10703 btrfs_item_key_to_cpu(eb, &key, i);
10704 if (key.type != BTRFS_EXTENT_DATA_KEY)
10707 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10708 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10711 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10718 free_extent_buffer(eb);
10719 if (!found_parent) {
10720 error("shared extent %llu referencer lost (parent: %llu)",
10722 return REFERENCER_MISSING;
10728 * This function will check a given extent item, including its backref and
10729 * itself (like crossing stripe boundary and type)
10731 * Since we don't use extent_record anymore, introduce new error bit
10733 static int check_extent_item(struct btrfs_fs_info *fs_info,
10734 struct extent_buffer *eb, int slot)
10736 struct btrfs_extent_item *ei;
10737 struct btrfs_extent_inline_ref *iref;
10738 struct btrfs_extent_data_ref *dref;
10742 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10743 u32 item_size = btrfs_item_size_nr(eb, slot);
10748 struct btrfs_key key;
10752 btrfs_item_key_to_cpu(eb, &key, slot);
10753 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10754 bytes_used += key.offset;
10756 bytes_used += nodesize;
10758 if (item_size < sizeof(*ei)) {
10760 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10761 * old thing when on disk format is still un-determined.
10762 * No need to care about it anymore
10764 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10768 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10769 flags = btrfs_extent_flags(eb, ei);
10771 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10773 if (metadata && check_crossing_stripes(global_info, key.objectid,
10775 error("bad metadata [%llu, %llu) crossing stripe boundary",
10776 key.objectid, key.objectid + nodesize);
10777 err |= CROSSING_STRIPE_BOUNDARY;
10780 ptr = (unsigned long)(ei + 1);
10782 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10783 /* Old EXTENT_ITEM metadata */
10784 struct btrfs_tree_block_info *info;
10786 info = (struct btrfs_tree_block_info *)ptr;
10787 level = btrfs_tree_block_level(eb, info);
10788 ptr += sizeof(struct btrfs_tree_block_info);
10790 /* New METADATA_ITEM */
10791 level = key.offset;
10793 end = (unsigned long)ei + item_size;
10796 /* Reached extent item end normally */
10800 /* Beyond extent item end, wrong item size */
10802 err |= ITEM_SIZE_MISMATCH;
10803 error("extent item at bytenr %llu slot %d has wrong size",
10808 /* Now check every backref in this extent item */
10809 iref = (struct btrfs_extent_inline_ref *)ptr;
10810 type = btrfs_extent_inline_ref_type(eb, iref);
10811 offset = btrfs_extent_inline_ref_offset(eb, iref);
10813 case BTRFS_TREE_BLOCK_REF_KEY:
10814 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10818 case BTRFS_SHARED_BLOCK_REF_KEY:
10819 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10823 case BTRFS_EXTENT_DATA_REF_KEY:
10824 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10825 ret = check_extent_data_backref(fs_info,
10826 btrfs_extent_data_ref_root(eb, dref),
10827 btrfs_extent_data_ref_objectid(eb, dref),
10828 btrfs_extent_data_ref_offset(eb, dref),
10829 key.objectid, key.offset,
10830 btrfs_extent_data_ref_count(eb, dref));
10833 case BTRFS_SHARED_DATA_REF_KEY:
10834 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10838 error("extent[%llu %d %llu] has unknown ref type: %d",
10839 key.objectid, key.type, key.offset, type);
10840 err |= UNKNOWN_TYPE;
10844 ptr += btrfs_extent_inline_ref_size(type);
10852 * Check if a dev extent item is referred correctly by its chunk
10854 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10855 struct extent_buffer *eb, int slot)
10857 struct btrfs_root *chunk_root = fs_info->chunk_root;
10858 struct btrfs_dev_extent *ptr;
10859 struct btrfs_path path;
10860 struct btrfs_key chunk_key;
10861 struct btrfs_key devext_key;
10862 struct btrfs_chunk *chunk;
10863 struct extent_buffer *l;
10867 int found_chunk = 0;
10870 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10871 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10872 length = btrfs_dev_extent_length(eb, ptr);
10874 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10875 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10876 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10878 btrfs_init_path(&path);
10879 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10884 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10885 if (btrfs_chunk_length(l, chunk) != length)
10888 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10889 for (i = 0; i < num_stripes; i++) {
10890 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10891 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10893 if (devid == devext_key.objectid &&
10894 offset == devext_key.offset) {
10900 btrfs_release_path(&path);
10901 if (!found_chunk) {
10903 "device extent[%llu, %llu, %llu] did not find the related chunk",
10904 devext_key.objectid, devext_key.offset, length);
10905 return REFERENCER_MISSING;
10911 * Check if the used space is correct with the dev item
10913 static int check_dev_item(struct btrfs_fs_info *fs_info,
10914 struct extent_buffer *eb, int slot)
10916 struct btrfs_root *dev_root = fs_info->dev_root;
10917 struct btrfs_dev_item *dev_item;
10918 struct btrfs_path path;
10919 struct btrfs_key key;
10920 struct btrfs_dev_extent *ptr;
10926 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10927 dev_id = btrfs_device_id(eb, dev_item);
10928 used = btrfs_device_bytes_used(eb, dev_item);
10930 key.objectid = dev_id;
10931 key.type = BTRFS_DEV_EXTENT_KEY;
10934 btrfs_init_path(&path);
10935 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10937 btrfs_item_key_to_cpu(eb, &key, slot);
10938 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10939 key.objectid, key.type, key.offset);
10940 btrfs_release_path(&path);
10941 return REFERENCER_MISSING;
10944 /* Iterate dev_extents to calculate the used space of a device */
10946 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10949 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10950 if (key.objectid > dev_id)
10952 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10955 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10956 struct btrfs_dev_extent);
10957 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10959 ret = btrfs_next_item(dev_root, &path);
10963 btrfs_release_path(&path);
10965 if (used != total) {
10966 btrfs_item_key_to_cpu(eb, &key, slot);
10968 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10969 total, used, BTRFS_ROOT_TREE_OBJECTID,
10970 BTRFS_DEV_EXTENT_KEY, dev_id);
10971 return ACCOUNTING_MISMATCH;
10977 * Check a block group item with its referener (chunk) and its used space
10978 * with extent/metadata item
10980 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10981 struct extent_buffer *eb, int slot)
10983 struct btrfs_root *extent_root = fs_info->extent_root;
10984 struct btrfs_root *chunk_root = fs_info->chunk_root;
10985 struct btrfs_block_group_item *bi;
10986 struct btrfs_block_group_item bg_item;
10987 struct btrfs_path path;
10988 struct btrfs_key bg_key;
10989 struct btrfs_key chunk_key;
10990 struct btrfs_key extent_key;
10991 struct btrfs_chunk *chunk;
10992 struct extent_buffer *leaf;
10993 struct btrfs_extent_item *ei;
10994 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11002 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11003 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11004 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11005 used = btrfs_block_group_used(&bg_item);
11006 bg_flags = btrfs_block_group_flags(&bg_item);
11008 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11009 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11010 chunk_key.offset = bg_key.objectid;
11012 btrfs_init_path(&path);
11013 /* Search for the referencer chunk */
11014 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11017 "block group[%llu %llu] did not find the related chunk item",
11018 bg_key.objectid, bg_key.offset);
11019 err |= REFERENCER_MISSING;
11021 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11022 struct btrfs_chunk);
11023 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11026 "block group[%llu %llu] related chunk item length does not match",
11027 bg_key.objectid, bg_key.offset);
11028 err |= REFERENCER_MISMATCH;
11031 btrfs_release_path(&path);
11033 /* Search from the block group bytenr */
11034 extent_key.objectid = bg_key.objectid;
11035 extent_key.type = 0;
11036 extent_key.offset = 0;
11038 btrfs_init_path(&path);
11039 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11043 /* Iterate extent tree to account used space */
11045 leaf = path.nodes[0];
11047 /* Search slot can point to the last item beyond leaf nritems */
11048 if (path.slots[0] >= btrfs_header_nritems(leaf))
11051 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11052 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11055 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11056 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11058 if (extent_key.objectid < bg_key.objectid)
11061 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11064 total += extent_key.offset;
11066 ei = btrfs_item_ptr(leaf, path.slots[0],
11067 struct btrfs_extent_item);
11068 flags = btrfs_extent_flags(leaf, ei);
11069 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11070 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11072 "bad extent[%llu, %llu) type mismatch with chunk",
11073 extent_key.objectid,
11074 extent_key.objectid + extent_key.offset);
11075 err |= CHUNK_TYPE_MISMATCH;
11077 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11078 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11079 BTRFS_BLOCK_GROUP_METADATA))) {
11081 "bad extent[%llu, %llu) type mismatch with chunk",
11082 extent_key.objectid,
11083 extent_key.objectid + nodesize);
11084 err |= CHUNK_TYPE_MISMATCH;
11088 ret = btrfs_next_item(extent_root, &path);
11094 btrfs_release_path(&path);
11096 if (total != used) {
11098 "block group[%llu %llu] used %llu but extent items used %llu",
11099 bg_key.objectid, bg_key.offset, used, total);
11100 err |= ACCOUNTING_MISMATCH;
11106 * Check a chunk item.
11107 * Including checking all referred dev_extents and block group
11109 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11110 struct extent_buffer *eb, int slot)
11112 struct btrfs_root *extent_root = fs_info->extent_root;
11113 struct btrfs_root *dev_root = fs_info->dev_root;
11114 struct btrfs_path path;
11115 struct btrfs_key chunk_key;
11116 struct btrfs_key bg_key;
11117 struct btrfs_key devext_key;
11118 struct btrfs_chunk *chunk;
11119 struct extent_buffer *leaf;
11120 struct btrfs_block_group_item *bi;
11121 struct btrfs_block_group_item bg_item;
11122 struct btrfs_dev_extent *ptr;
11123 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11135 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11136 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11137 length = btrfs_chunk_length(eb, chunk);
11138 chunk_end = chunk_key.offset + length;
11139 if (!IS_ALIGNED(length, sectorsize)) {
11140 error("chunk[%llu %llu) not aligned to %u",
11141 chunk_key.offset, chunk_end, sectorsize);
11142 err |= BYTES_UNALIGNED;
11146 type = btrfs_chunk_type(eb, chunk);
11147 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11148 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11149 error("chunk[%llu %llu) has no chunk type",
11150 chunk_key.offset, chunk_end);
11151 err |= UNKNOWN_TYPE;
11153 if (profile && (profile & (profile - 1))) {
11154 error("chunk[%llu %llu) multiple profiles detected: %llx",
11155 chunk_key.offset, chunk_end, profile);
11156 err |= UNKNOWN_TYPE;
11159 bg_key.objectid = chunk_key.offset;
11160 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11161 bg_key.offset = length;
11163 btrfs_init_path(&path);
11164 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11167 "chunk[%llu %llu) did not find the related block group item",
11168 chunk_key.offset, chunk_end);
11169 err |= REFERENCER_MISSING;
11171 leaf = path.nodes[0];
11172 bi = btrfs_item_ptr(leaf, path.slots[0],
11173 struct btrfs_block_group_item);
11174 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11176 if (btrfs_block_group_flags(&bg_item) != type) {
11178 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11179 chunk_key.offset, chunk_end, type,
11180 btrfs_block_group_flags(&bg_item));
11181 err |= REFERENCER_MISSING;
11185 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11186 for (i = 0; i < num_stripes; i++) {
11187 btrfs_release_path(&path);
11188 btrfs_init_path(&path);
11189 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11190 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11191 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11193 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11196 goto not_match_dev;
11198 leaf = path.nodes[0];
11199 ptr = btrfs_item_ptr(leaf, path.slots[0],
11200 struct btrfs_dev_extent);
11201 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11202 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11203 if (objectid != chunk_key.objectid ||
11204 offset != chunk_key.offset ||
11205 btrfs_dev_extent_length(leaf, ptr) != length)
11206 goto not_match_dev;
11209 err |= BACKREF_MISSING;
11211 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11212 chunk_key.objectid, chunk_end, i);
11215 btrfs_release_path(&path);
11221 * Main entry function to check known items and update related accounting info
11223 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11225 struct btrfs_fs_info *fs_info = root->fs_info;
11226 struct btrfs_key key;
11229 struct btrfs_extent_data_ref *dref;
11234 btrfs_item_key_to_cpu(eb, &key, slot);
11238 case BTRFS_EXTENT_DATA_KEY:
11239 ret = check_extent_data_item(root, eb, slot);
11242 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11243 ret = check_block_group_item(fs_info, eb, slot);
11246 case BTRFS_DEV_ITEM_KEY:
11247 ret = check_dev_item(fs_info, eb, slot);
11250 case BTRFS_CHUNK_ITEM_KEY:
11251 ret = check_chunk_item(fs_info, eb, slot);
11254 case BTRFS_DEV_EXTENT_KEY:
11255 ret = check_dev_extent_item(fs_info, eb, slot);
11258 case BTRFS_EXTENT_ITEM_KEY:
11259 case BTRFS_METADATA_ITEM_KEY:
11260 ret = check_extent_item(fs_info, eb, slot);
11263 case BTRFS_EXTENT_CSUM_KEY:
11264 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11266 case BTRFS_TREE_BLOCK_REF_KEY:
11267 ret = check_tree_block_backref(fs_info, key.offset,
11271 case BTRFS_EXTENT_DATA_REF_KEY:
11272 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11273 ret = check_extent_data_backref(fs_info,
11274 btrfs_extent_data_ref_root(eb, dref),
11275 btrfs_extent_data_ref_objectid(eb, dref),
11276 btrfs_extent_data_ref_offset(eb, dref),
11278 btrfs_extent_data_ref_count(eb, dref));
11281 case BTRFS_SHARED_BLOCK_REF_KEY:
11282 ret = check_shared_block_backref(fs_info, key.offset,
11286 case BTRFS_SHARED_DATA_REF_KEY:
11287 ret = check_shared_data_backref(fs_info, key.offset,
11295 if (++slot < btrfs_header_nritems(eb))
11302 * Helper function for later fs/subvol tree check. To determine if a tree
11303 * block should be checked.
11304 * This function will ensure only the direct referencer with lowest rootid to
11305 * check a fs/subvolume tree block.
11307 * Backref check at extent tree would detect errors like missing subvolume
11308 * tree, so we can do aggressive check to reduce duplicated checks.
11310 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11312 struct btrfs_root *extent_root = root->fs_info->extent_root;
11313 struct btrfs_key key;
11314 struct btrfs_path path;
11315 struct extent_buffer *leaf;
11317 struct btrfs_extent_item *ei;
11323 struct btrfs_extent_inline_ref *iref;
11326 btrfs_init_path(&path);
11327 key.objectid = btrfs_header_bytenr(eb);
11328 key.type = BTRFS_METADATA_ITEM_KEY;
11329 key.offset = (u64)-1;
11332 * Any failure in backref resolving means we can't determine
11333 * whom the tree block belongs to.
11334 * So in that case, we need to check that tree block
11336 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11340 ret = btrfs_previous_extent_item(extent_root, &path,
11341 btrfs_header_bytenr(eb));
11345 leaf = path.nodes[0];
11346 slot = path.slots[0];
11347 btrfs_item_key_to_cpu(leaf, &key, slot);
11348 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11350 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11351 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11353 struct btrfs_tree_block_info *info;
11355 info = (struct btrfs_tree_block_info *)(ei + 1);
11356 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11359 item_size = btrfs_item_size_nr(leaf, slot);
11360 ptr = (unsigned long)iref;
11361 end = (unsigned long)ei + item_size;
11362 while (ptr < end) {
11363 iref = (struct btrfs_extent_inline_ref *)ptr;
11364 type = btrfs_extent_inline_ref_type(leaf, iref);
11365 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11368 * We only check the tree block if current root is
11369 * the lowest referencer of it.
11371 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11372 offset < root->objectid) {
11373 btrfs_release_path(&path);
11377 ptr += btrfs_extent_inline_ref_size(type);
11380 * Normally we should also check keyed tree block ref, but that may be
11381 * very time consuming. Inlined ref should already make us skip a lot
11382 * of refs now. So skip search keyed tree block ref.
11386 btrfs_release_path(&path);
11391 * Traversal function for tree block. We will do:
11392 * 1) Skip shared fs/subvolume tree blocks
11393 * 2) Update related bytes accounting
11394 * 3) Pre-order traversal
11396 static int traverse_tree_block(struct btrfs_root *root,
11397 struct extent_buffer *node)
11399 struct extent_buffer *eb;
11400 struct btrfs_key key;
11401 struct btrfs_key drop_key;
11409 * Skip shared fs/subvolume tree block, in that case they will
11410 * be checked by referencer with lowest rootid
11412 if (is_fstree(root->objectid) && !should_check(root, node))
11415 /* Update bytes accounting */
11416 total_btree_bytes += node->len;
11417 if (fs_root_objectid(btrfs_header_owner(node)))
11418 total_fs_tree_bytes += node->len;
11419 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11420 total_extent_tree_bytes += node->len;
11421 if (!found_old_backref &&
11422 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11423 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11424 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11425 found_old_backref = 1;
11427 /* pre-order tranversal, check itself first */
11428 level = btrfs_header_level(node);
11429 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11430 btrfs_header_level(node),
11431 btrfs_header_owner(node));
11435 "check %s failed root %llu bytenr %llu level %d, force continue check",
11436 level ? "node":"leaf", root->objectid,
11437 btrfs_header_bytenr(node), btrfs_header_level(node));
11440 btree_space_waste += btrfs_leaf_free_space(root, node);
11441 ret = check_leaf_items(root, node);
11446 nr = btrfs_header_nritems(node);
11447 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11448 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11449 sizeof(struct btrfs_key_ptr);
11451 /* Then check all its children */
11452 for (i = 0; i < nr; i++) {
11453 u64 blocknr = btrfs_node_blockptr(node, i);
11455 btrfs_node_key_to_cpu(node, &key, i);
11456 if (level == root->root_item.drop_level &&
11457 is_dropped_key(&key, &drop_key))
11461 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11462 * to call the function itself.
11464 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11465 if (extent_buffer_uptodate(eb)) {
11466 ret = traverse_tree_block(root, eb);
11469 free_extent_buffer(eb);
11476 * Low memory usage version check_chunks_and_extents.
11478 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11480 struct btrfs_path path;
11481 struct btrfs_key key;
11482 struct btrfs_root *root1;
11483 struct btrfs_root *cur_root;
11487 root1 = root->fs_info->chunk_root;
11488 ret = traverse_tree_block(root1, root1->node);
11491 root1 = root->fs_info->tree_root;
11492 ret = traverse_tree_block(root1, root1->node);
11495 btrfs_init_path(&path);
11496 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11498 key.type = BTRFS_ROOT_ITEM_KEY;
11500 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11502 error("cannot find extent treet in tree_root");
11507 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11508 if (key.type != BTRFS_ROOT_ITEM_KEY)
11510 key.offset = (u64)-1;
11512 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11513 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11516 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11517 if (IS_ERR(cur_root) || !cur_root) {
11518 error("failed to read tree: %lld", key.objectid);
11522 ret = traverse_tree_block(cur_root, cur_root->node);
11525 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11526 btrfs_free_fs_root(cur_root);
11528 ret = btrfs_next_item(root1, &path);
11534 btrfs_release_path(&path);
11538 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11539 struct btrfs_root *root, int overwrite)
11541 struct extent_buffer *c;
11542 struct extent_buffer *old = root->node;
11545 struct btrfs_disk_key disk_key = {0,0,0};
11551 extent_buffer_get(c);
11554 c = btrfs_alloc_free_block(trans, root,
11556 root->root_key.objectid,
11557 &disk_key, level, 0, 0);
11560 extent_buffer_get(c);
11564 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11565 btrfs_set_header_level(c, level);
11566 btrfs_set_header_bytenr(c, c->start);
11567 btrfs_set_header_generation(c, trans->transid);
11568 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11569 btrfs_set_header_owner(c, root->root_key.objectid);
11571 write_extent_buffer(c, root->fs_info->fsid,
11572 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11574 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11575 btrfs_header_chunk_tree_uuid(c),
11578 btrfs_mark_buffer_dirty(c);
11580 * this case can happen in the following case:
11582 * 1.overwrite previous root.
11584 * 2.reinit reloc data root, this is because we skip pin
11585 * down reloc data tree before which means we can allocate
11586 * same block bytenr here.
11588 if (old->start == c->start) {
11589 btrfs_set_root_generation(&root->root_item,
11591 root->root_item.level = btrfs_header_level(root->node);
11592 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11593 &root->root_key, &root->root_item);
11595 free_extent_buffer(c);
11599 free_extent_buffer(old);
11601 add_root_to_dirty_list(root);
11605 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11606 struct extent_buffer *eb, int tree_root)
11608 struct extent_buffer *tmp;
11609 struct btrfs_root_item *ri;
11610 struct btrfs_key key;
11613 int level = btrfs_header_level(eb);
11619 * If we have pinned this block before, don't pin it again.
11620 * This can not only avoid forever loop with broken filesystem
11621 * but also give us some speedups.
11623 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11624 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11627 btrfs_pin_extent(fs_info, eb->start, eb->len);
11629 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11630 nritems = btrfs_header_nritems(eb);
11631 for (i = 0; i < nritems; i++) {
11633 btrfs_item_key_to_cpu(eb, &key, i);
11634 if (key.type != BTRFS_ROOT_ITEM_KEY)
11636 /* Skip the extent root and reloc roots */
11637 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11638 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11639 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11641 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11642 bytenr = btrfs_disk_root_bytenr(eb, ri);
11645 * If at any point we start needing the real root we
11646 * will have to build a stump root for the root we are
11647 * in, but for now this doesn't actually use the root so
11648 * just pass in extent_root.
11650 tmp = read_tree_block(fs_info->extent_root, bytenr,
11652 if (!extent_buffer_uptodate(tmp)) {
11653 fprintf(stderr, "Error reading root block\n");
11656 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11657 free_extent_buffer(tmp);
11661 bytenr = btrfs_node_blockptr(eb, i);
11663 /* If we aren't the tree root don't read the block */
11664 if (level == 1 && !tree_root) {
11665 btrfs_pin_extent(fs_info, bytenr, nodesize);
11669 tmp = read_tree_block(fs_info->extent_root, bytenr,
11671 if (!extent_buffer_uptodate(tmp)) {
11672 fprintf(stderr, "Error reading tree block\n");
11675 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11676 free_extent_buffer(tmp);
11685 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11689 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11693 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11696 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11698 struct btrfs_block_group_cache *cache;
11699 struct btrfs_path path;
11700 struct extent_buffer *leaf;
11701 struct btrfs_chunk *chunk;
11702 struct btrfs_key key;
11706 btrfs_init_path(&path);
11708 key.type = BTRFS_CHUNK_ITEM_KEY;
11710 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11712 btrfs_release_path(&path);
11717 * We do this in case the block groups were screwed up and had alloc
11718 * bits that aren't actually set on the chunks. This happens with
11719 * restored images every time and could happen in real life I guess.
11721 fs_info->avail_data_alloc_bits = 0;
11722 fs_info->avail_metadata_alloc_bits = 0;
11723 fs_info->avail_system_alloc_bits = 0;
11725 /* First we need to create the in-memory block groups */
11727 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11728 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11730 btrfs_release_path(&path);
11738 leaf = path.nodes[0];
11739 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11740 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11745 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11746 btrfs_add_block_group(fs_info, 0,
11747 btrfs_chunk_type(leaf, chunk),
11748 key.objectid, key.offset,
11749 btrfs_chunk_length(leaf, chunk));
11750 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11751 key.offset + btrfs_chunk_length(leaf, chunk));
11756 cache = btrfs_lookup_first_block_group(fs_info, start);
11760 start = cache->key.objectid + cache->key.offset;
11763 btrfs_release_path(&path);
11767 static int reset_balance(struct btrfs_trans_handle *trans,
11768 struct btrfs_fs_info *fs_info)
11770 struct btrfs_root *root = fs_info->tree_root;
11771 struct btrfs_path path;
11772 struct extent_buffer *leaf;
11773 struct btrfs_key key;
11774 int del_slot, del_nr = 0;
11778 btrfs_init_path(&path);
11779 key.objectid = BTRFS_BALANCE_OBJECTID;
11780 key.type = BTRFS_BALANCE_ITEM_KEY;
11782 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11787 goto reinit_data_reloc;
11792 ret = btrfs_del_item(trans, root, &path);
11795 btrfs_release_path(&path);
11797 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11798 key.type = BTRFS_ROOT_ITEM_KEY;
11800 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11804 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11809 ret = btrfs_del_items(trans, root, &path,
11816 btrfs_release_path(&path);
11819 ret = btrfs_search_slot(trans, root, &key, &path,
11826 leaf = path.nodes[0];
11827 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11828 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11830 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11835 del_slot = path.slots[0];
11844 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11848 btrfs_release_path(&path);
11851 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11852 key.type = BTRFS_ROOT_ITEM_KEY;
11853 key.offset = (u64)-1;
11854 root = btrfs_read_fs_root(fs_info, &key);
11855 if (IS_ERR(root)) {
11856 fprintf(stderr, "Error reading data reloc tree\n");
11857 ret = PTR_ERR(root);
11860 record_root_in_trans(trans, root);
11861 ret = btrfs_fsck_reinit_root(trans, root, 0);
11864 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11866 btrfs_release_path(&path);
11870 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11871 struct btrfs_fs_info *fs_info)
11877 * The only reason we don't do this is because right now we're just
11878 * walking the trees we find and pinning down their bytes, we don't look
11879 * at any of the leaves. In order to do mixed groups we'd have to check
11880 * the leaves of any fs roots and pin down the bytes for any file
11881 * extents we find. Not hard but why do it if we don't have to?
11883 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11884 fprintf(stderr, "We don't support re-initing the extent tree "
11885 "for mixed block groups yet, please notify a btrfs "
11886 "developer you want to do this so they can add this "
11887 "functionality.\n");
11892 * first we need to walk all of the trees except the extent tree and pin
11893 * down the bytes that are in use so we don't overwrite any existing
11896 ret = pin_metadata_blocks(fs_info);
11898 fprintf(stderr, "error pinning down used bytes\n");
11903 * Need to drop all the block groups since we're going to recreate all
11906 btrfs_free_block_groups(fs_info);
11907 ret = reset_block_groups(fs_info);
11909 fprintf(stderr, "error resetting the block groups\n");
11913 /* Ok we can allocate now, reinit the extent root */
11914 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11916 fprintf(stderr, "extent root initialization failed\n");
11918 * When the transaction code is updated we should end the
11919 * transaction, but for now progs only knows about commit so
11920 * just return an error.
11926 * Now we have all the in-memory block groups setup so we can make
11927 * allocations properly, and the metadata we care about is safe since we
11928 * pinned all of it above.
11931 struct btrfs_block_group_cache *cache;
11933 cache = btrfs_lookup_first_block_group(fs_info, start);
11936 start = cache->key.objectid + cache->key.offset;
11937 ret = btrfs_insert_item(trans, fs_info->extent_root,
11938 &cache->key, &cache->item,
11939 sizeof(cache->item));
11941 fprintf(stderr, "Error adding block group\n");
11944 btrfs_extent_post_op(trans, fs_info->extent_root);
11947 ret = reset_balance(trans, fs_info);
11949 fprintf(stderr, "error resetting the pending balance\n");
11954 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11956 struct btrfs_path path;
11957 struct btrfs_trans_handle *trans;
11958 struct btrfs_key key;
11961 printf("Recowing metadata block %llu\n", eb->start);
11962 key.objectid = btrfs_header_owner(eb);
11963 key.type = BTRFS_ROOT_ITEM_KEY;
11964 key.offset = (u64)-1;
11966 root = btrfs_read_fs_root(root->fs_info, &key);
11967 if (IS_ERR(root)) {
11968 fprintf(stderr, "Couldn't find owner root %llu\n",
11970 return PTR_ERR(root);
11973 trans = btrfs_start_transaction(root, 1);
11975 return PTR_ERR(trans);
11977 btrfs_init_path(&path);
11978 path.lowest_level = btrfs_header_level(eb);
11979 if (path.lowest_level)
11980 btrfs_node_key_to_cpu(eb, &key, 0);
11982 btrfs_item_key_to_cpu(eb, &key, 0);
11984 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11985 btrfs_commit_transaction(trans, root);
11986 btrfs_release_path(&path);
11990 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11992 struct btrfs_path path;
11993 struct btrfs_trans_handle *trans;
11994 struct btrfs_key key;
11997 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11998 bad->key.type, bad->key.offset);
11999 key.objectid = bad->root_id;
12000 key.type = BTRFS_ROOT_ITEM_KEY;
12001 key.offset = (u64)-1;
12003 root = btrfs_read_fs_root(root->fs_info, &key);
12004 if (IS_ERR(root)) {
12005 fprintf(stderr, "Couldn't find owner root %llu\n",
12007 return PTR_ERR(root);
12010 trans = btrfs_start_transaction(root, 1);
12012 return PTR_ERR(trans);
12014 btrfs_init_path(&path);
12015 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12021 ret = btrfs_del_item(trans, root, &path);
12023 btrfs_commit_transaction(trans, root);
12024 btrfs_release_path(&path);
12028 static int zero_log_tree(struct btrfs_root *root)
12030 struct btrfs_trans_handle *trans;
12033 trans = btrfs_start_transaction(root, 1);
12034 if (IS_ERR(trans)) {
12035 ret = PTR_ERR(trans);
12038 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12039 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12040 ret = btrfs_commit_transaction(trans, root);
12044 static int populate_csum(struct btrfs_trans_handle *trans,
12045 struct btrfs_root *csum_root, char *buf, u64 start,
12052 while (offset < len) {
12053 sectorsize = csum_root->sectorsize;
12054 ret = read_extent_data(csum_root, buf, start + offset,
12058 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12059 start + offset, buf, sectorsize);
12062 offset += sectorsize;
12067 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12068 struct btrfs_root *csum_root,
12069 struct btrfs_root *cur_root)
12071 struct btrfs_path path;
12072 struct btrfs_key key;
12073 struct extent_buffer *node;
12074 struct btrfs_file_extent_item *fi;
12081 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12085 btrfs_init_path(&path);
12089 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12092 /* Iterate all regular file extents and fill its csum */
12094 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12096 if (key.type != BTRFS_EXTENT_DATA_KEY)
12098 node = path.nodes[0];
12099 slot = path.slots[0];
12100 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12101 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12103 start = btrfs_file_extent_disk_bytenr(node, fi);
12104 len = btrfs_file_extent_disk_num_bytes(node, fi);
12106 ret = populate_csum(trans, csum_root, buf, start, len);
12107 if (ret == -EEXIST)
12113 * TODO: if next leaf is corrupted, jump to nearest next valid
12116 ret = btrfs_next_item(cur_root, &path);
12126 btrfs_release_path(&path);
12131 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12132 struct btrfs_root *csum_root)
12134 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12135 struct btrfs_path path;
12136 struct btrfs_root *tree_root = fs_info->tree_root;
12137 struct btrfs_root *cur_root;
12138 struct extent_buffer *node;
12139 struct btrfs_key key;
12143 btrfs_init_path(&path);
12144 key.objectid = BTRFS_FS_TREE_OBJECTID;
12146 key.type = BTRFS_ROOT_ITEM_KEY;
12147 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12156 node = path.nodes[0];
12157 slot = path.slots[0];
12158 btrfs_item_key_to_cpu(node, &key, slot);
12159 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12161 if (key.type != BTRFS_ROOT_ITEM_KEY)
12163 if (!is_fstree(key.objectid))
12165 key.offset = (u64)-1;
12167 cur_root = btrfs_read_fs_root(fs_info, &key);
12168 if (IS_ERR(cur_root) || !cur_root) {
12169 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12173 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12178 ret = btrfs_next_item(tree_root, &path);
12188 btrfs_release_path(&path);
12192 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12193 struct btrfs_root *csum_root)
12195 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12196 struct btrfs_path path;
12197 struct btrfs_extent_item *ei;
12198 struct extent_buffer *leaf;
12200 struct btrfs_key key;
12203 btrfs_init_path(&path);
12205 key.type = BTRFS_EXTENT_ITEM_KEY;
12207 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12209 btrfs_release_path(&path);
12213 buf = malloc(csum_root->sectorsize);
12215 btrfs_release_path(&path);
12220 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12221 ret = btrfs_next_leaf(extent_root, &path);
12229 leaf = path.nodes[0];
12231 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12232 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12237 ei = btrfs_item_ptr(leaf, path.slots[0],
12238 struct btrfs_extent_item);
12239 if (!(btrfs_extent_flags(leaf, ei) &
12240 BTRFS_EXTENT_FLAG_DATA)) {
12245 ret = populate_csum(trans, csum_root, buf, key.objectid,
12252 btrfs_release_path(&path);
12258 * Recalculate the csum and put it into the csum tree.
12260 * Extent tree init will wipe out all the extent info, so in that case, we
12261 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12262 * will use fs/subvol trees to init the csum tree.
12264 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12265 struct btrfs_root *csum_root,
12266 int search_fs_tree)
12268 if (search_fs_tree)
12269 return fill_csum_tree_from_fs(trans, csum_root);
12271 return fill_csum_tree_from_extent(trans, csum_root);
12274 static void free_roots_info_cache(void)
12276 if (!roots_info_cache)
12279 while (!cache_tree_empty(roots_info_cache)) {
12280 struct cache_extent *entry;
12281 struct root_item_info *rii;
12283 entry = first_cache_extent(roots_info_cache);
12286 remove_cache_extent(roots_info_cache, entry);
12287 rii = container_of(entry, struct root_item_info, cache_extent);
12291 free(roots_info_cache);
12292 roots_info_cache = NULL;
12295 static int build_roots_info_cache(struct btrfs_fs_info *info)
12298 struct btrfs_key key;
12299 struct extent_buffer *leaf;
12300 struct btrfs_path path;
12302 if (!roots_info_cache) {
12303 roots_info_cache = malloc(sizeof(*roots_info_cache));
12304 if (!roots_info_cache)
12306 cache_tree_init(roots_info_cache);
12309 btrfs_init_path(&path);
12311 key.type = BTRFS_EXTENT_ITEM_KEY;
12313 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12316 leaf = path.nodes[0];
12319 struct btrfs_key found_key;
12320 struct btrfs_extent_item *ei;
12321 struct btrfs_extent_inline_ref *iref;
12322 int slot = path.slots[0];
12327 struct cache_extent *entry;
12328 struct root_item_info *rii;
12330 if (slot >= btrfs_header_nritems(leaf)) {
12331 ret = btrfs_next_leaf(info->extent_root, &path);
12338 leaf = path.nodes[0];
12339 slot = path.slots[0];
12342 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12344 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12345 found_key.type != BTRFS_METADATA_ITEM_KEY)
12348 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12349 flags = btrfs_extent_flags(leaf, ei);
12351 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12352 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12355 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12356 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12357 level = found_key.offset;
12359 struct btrfs_tree_block_info *binfo;
12361 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12362 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12363 level = btrfs_tree_block_level(leaf, binfo);
12367 * For a root extent, it must be of the following type and the
12368 * first (and only one) iref in the item.
12370 type = btrfs_extent_inline_ref_type(leaf, iref);
12371 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12374 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12375 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12377 rii = malloc(sizeof(struct root_item_info));
12382 rii->cache_extent.start = root_id;
12383 rii->cache_extent.size = 1;
12384 rii->level = (u8)-1;
12385 entry = &rii->cache_extent;
12386 ret = insert_cache_extent(roots_info_cache, entry);
12389 rii = container_of(entry, struct root_item_info,
12393 ASSERT(rii->cache_extent.start == root_id);
12394 ASSERT(rii->cache_extent.size == 1);
12396 if (level > rii->level || rii->level == (u8)-1) {
12397 rii->level = level;
12398 rii->bytenr = found_key.objectid;
12399 rii->gen = btrfs_extent_generation(leaf, ei);
12400 rii->node_count = 1;
12401 } else if (level == rii->level) {
12409 btrfs_release_path(&path);
12414 static int maybe_repair_root_item(struct btrfs_path *path,
12415 const struct btrfs_key *root_key,
12416 const int read_only_mode)
12418 const u64 root_id = root_key->objectid;
12419 struct cache_extent *entry;
12420 struct root_item_info *rii;
12421 struct btrfs_root_item ri;
12422 unsigned long offset;
12424 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12427 "Error: could not find extent items for root %llu\n",
12428 root_key->objectid);
12432 rii = container_of(entry, struct root_item_info, cache_extent);
12433 ASSERT(rii->cache_extent.start == root_id);
12434 ASSERT(rii->cache_extent.size == 1);
12436 if (rii->node_count != 1) {
12438 "Error: could not find btree root extent for root %llu\n",
12443 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12444 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12446 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12447 btrfs_root_level(&ri) != rii->level ||
12448 btrfs_root_generation(&ri) != rii->gen) {
12451 * If we're in repair mode but our caller told us to not update
12452 * the root item, i.e. just check if it needs to be updated, don't
12453 * print this message, since the caller will call us again shortly
12454 * for the same root item without read only mode (the caller will
12455 * open a transaction first).
12457 if (!(read_only_mode && repair))
12459 "%sroot item for root %llu,"
12460 " current bytenr %llu, current gen %llu, current level %u,"
12461 " new bytenr %llu, new gen %llu, new level %u\n",
12462 (read_only_mode ? "" : "fixing "),
12464 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12465 btrfs_root_level(&ri),
12466 rii->bytenr, rii->gen, rii->level);
12468 if (btrfs_root_generation(&ri) > rii->gen) {
12470 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12471 root_id, btrfs_root_generation(&ri), rii->gen);
12475 if (!read_only_mode) {
12476 btrfs_set_root_bytenr(&ri, rii->bytenr);
12477 btrfs_set_root_level(&ri, rii->level);
12478 btrfs_set_root_generation(&ri, rii->gen);
12479 write_extent_buffer(path->nodes[0], &ri,
12480 offset, sizeof(ri));
12490 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12491 * caused read-only snapshots to be corrupted if they were created at a moment
12492 * when the source subvolume/snapshot had orphan items. The issue was that the
12493 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12494 * node instead of the post orphan cleanup root node.
12495 * So this function, and its callees, just detects and fixes those cases. Even
12496 * though the regression was for read-only snapshots, this function applies to
12497 * any snapshot/subvolume root.
12498 * This must be run before any other repair code - not doing it so, makes other
12499 * repair code delete or modify backrefs in the extent tree for example, which
12500 * will result in an inconsistent fs after repairing the root items.
12502 static int repair_root_items(struct btrfs_fs_info *info)
12504 struct btrfs_path path;
12505 struct btrfs_key key;
12506 struct extent_buffer *leaf;
12507 struct btrfs_trans_handle *trans = NULL;
12510 int need_trans = 0;
12512 btrfs_init_path(&path);
12514 ret = build_roots_info_cache(info);
12518 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12519 key.type = BTRFS_ROOT_ITEM_KEY;
12524 * Avoid opening and committing transactions if a leaf doesn't have
12525 * any root items that need to be fixed, so that we avoid rotating
12526 * backup roots unnecessarily.
12529 trans = btrfs_start_transaction(info->tree_root, 1);
12530 if (IS_ERR(trans)) {
12531 ret = PTR_ERR(trans);
12536 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12540 leaf = path.nodes[0];
12543 struct btrfs_key found_key;
12545 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12546 int no_more_keys = find_next_key(&path, &key);
12548 btrfs_release_path(&path);
12550 ret = btrfs_commit_transaction(trans,
12562 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12564 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12566 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12569 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12573 if (!trans && repair) {
12576 btrfs_release_path(&path);
12586 free_roots_info_cache();
12587 btrfs_release_path(&path);
12589 btrfs_commit_transaction(trans, info->tree_root);
12596 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12598 struct btrfs_trans_handle *trans;
12599 struct btrfs_block_group_cache *bg_cache;
12603 /* Clear all free space cache inodes and its extent data */
12605 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12608 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12611 current = bg_cache->key.objectid + bg_cache->key.offset;
12614 /* Don't forget to set cache_generation to -1 */
12615 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12616 if (IS_ERR(trans)) {
12617 error("failed to update super block cache generation");
12618 return PTR_ERR(trans);
12620 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12621 btrfs_commit_transaction(trans, fs_info->tree_root);
12626 const char * const cmd_check_usage[] = {
12627 "btrfs check [options] <device>",
12628 "Check structural integrity of a filesystem (unmounted).",
12629 "Check structural integrity of an unmounted filesystem. Verify internal",
12630 "trees' consistency and item connectivity. In the repair mode try to",
12631 "fix the problems found. ",
12632 "WARNING: the repair mode is considered dangerous",
12634 "-s|--super <superblock> use this superblock copy",
12635 "-b|--backup use the first valid backup root copy",
12636 "--repair try to repair the filesystem",
12637 "--readonly run in read-only mode (default)",
12638 "--init-csum-tree create a new CRC tree",
12639 "--init-extent-tree create a new extent tree",
12640 "--mode <MODE> allows choice of memory/IO trade-offs",
12641 " where MODE is one of:",
12642 " original - read inodes and extents to memory (requires",
12643 " more memory, does less IO)",
12644 " lowmem - try to use less memory but read blocks again",
12646 "--check-data-csum verify checksums of data blocks",
12647 "-Q|--qgroup-report print a report on qgroup consistency",
12648 "-E|--subvol-extents <subvolid>",
12649 " print subvolume extents and sharing state",
12650 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12651 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12652 "-p|--progress indicate progress",
12653 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12657 int cmd_check(int argc, char **argv)
12659 struct cache_tree root_cache;
12660 struct btrfs_root *root;
12661 struct btrfs_fs_info *info;
12664 u64 tree_root_bytenr = 0;
12665 u64 chunk_root_bytenr = 0;
12666 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12670 int init_csum_tree = 0;
12672 int clear_space_cache = 0;
12673 int qgroup_report = 0;
12674 int qgroups_repaired = 0;
12675 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12679 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12680 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12681 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12682 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12683 static const struct option long_options[] = {
12684 { "super", required_argument, NULL, 's' },
12685 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12686 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12687 { "init-csum-tree", no_argument, NULL,
12688 GETOPT_VAL_INIT_CSUM },
12689 { "init-extent-tree", no_argument, NULL,
12690 GETOPT_VAL_INIT_EXTENT },
12691 { "check-data-csum", no_argument, NULL,
12692 GETOPT_VAL_CHECK_CSUM },
12693 { "backup", no_argument, NULL, 'b' },
12694 { "subvol-extents", required_argument, NULL, 'E' },
12695 { "qgroup-report", no_argument, NULL, 'Q' },
12696 { "tree-root", required_argument, NULL, 'r' },
12697 { "chunk-root", required_argument, NULL,
12698 GETOPT_VAL_CHUNK_TREE },
12699 { "progress", no_argument, NULL, 'p' },
12700 { "mode", required_argument, NULL,
12702 { "clear-space-cache", required_argument, NULL,
12703 GETOPT_VAL_CLEAR_SPACE_CACHE},
12704 { NULL, 0, NULL, 0}
12707 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12711 case 'a': /* ignored */ break;
12713 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12716 num = arg_strtou64(optarg);
12717 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12719 "super mirror should be less than %d",
12720 BTRFS_SUPER_MIRROR_MAX);
12723 bytenr = btrfs_sb_offset(((int)num));
12724 printf("using SB copy %llu, bytenr %llu\n", num,
12725 (unsigned long long)bytenr);
12731 subvolid = arg_strtou64(optarg);
12734 tree_root_bytenr = arg_strtou64(optarg);
12736 case GETOPT_VAL_CHUNK_TREE:
12737 chunk_root_bytenr = arg_strtou64(optarg);
12740 ctx.progress_enabled = true;
12744 usage(cmd_check_usage);
12745 case GETOPT_VAL_REPAIR:
12746 printf("enabling repair mode\n");
12748 ctree_flags |= OPEN_CTREE_WRITES;
12750 case GETOPT_VAL_READONLY:
12753 case GETOPT_VAL_INIT_CSUM:
12754 printf("Creating a new CRC tree\n");
12755 init_csum_tree = 1;
12757 ctree_flags |= OPEN_CTREE_WRITES;
12759 case GETOPT_VAL_INIT_EXTENT:
12760 init_extent_tree = 1;
12761 ctree_flags |= (OPEN_CTREE_WRITES |
12762 OPEN_CTREE_NO_BLOCK_GROUPS);
12765 case GETOPT_VAL_CHECK_CSUM:
12766 check_data_csum = 1;
12768 case GETOPT_VAL_MODE:
12769 check_mode = parse_check_mode(optarg);
12770 if (check_mode == CHECK_MODE_UNKNOWN) {
12771 error("unknown mode: %s", optarg);
12775 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12776 if (strcmp(optarg, "v1") == 0) {
12777 clear_space_cache = 1;
12778 } else if (strcmp(optarg, "v2") == 0) {
12779 clear_space_cache = 2;
12780 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12783 "invalid argument to --clear-space-cache, must be v1 or v2");
12786 ctree_flags |= OPEN_CTREE_WRITES;
12791 if (check_argc_exact(argc - optind, 1))
12792 usage(cmd_check_usage);
12794 if (ctx.progress_enabled) {
12795 ctx.tp = TASK_NOTHING;
12796 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12799 /* This check is the only reason for --readonly to exist */
12800 if (readonly && repair) {
12801 error("repair options are not compatible with --readonly");
12806 * Not supported yet
12808 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12809 error("low memory mode doesn't support repair yet");
12814 cache_tree_init(&root_cache);
12816 if((ret = check_mounted(argv[optind])) < 0) {
12817 error("could not check mount status: %s", strerror(-ret));
12821 error("%s is currently mounted, aborting", argv[optind]);
12827 /* only allow partial opening under repair mode */
12829 ctree_flags |= OPEN_CTREE_PARTIAL;
12831 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12832 chunk_root_bytenr, ctree_flags);
12834 error("cannot open file system");
12840 global_info = info;
12841 root = info->fs_root;
12842 if (clear_space_cache == 1) {
12843 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12845 "free space cache v2 detected, use --clear-space-cache v2");
12849 printf("Clearing free space cache\n");
12850 ret = clear_free_space_cache(info);
12852 error("failed to clear free space cache");
12855 printf("Free space cache cleared\n");
12858 } else if (clear_space_cache == 2) {
12859 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12860 printf("no free space cache v2 to clear\n");
12864 printf("Clear free space cache v2\n");
12865 ret = btrfs_clear_free_space_tree(info);
12867 error("failed to clear free space cache v2: %d", ret);
12870 printf("free space cache v2 cleared\n");
12876 * repair mode will force us to commit transaction which
12877 * will make us fail to load log tree when mounting.
12879 if (repair && btrfs_super_log_root(info->super_copy)) {
12880 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12886 ret = zero_log_tree(root);
12889 error("failed to zero log tree: %d", ret);
12894 uuid_unparse(info->super_copy->fsid, uuidbuf);
12895 if (qgroup_report) {
12896 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12898 ret = qgroup_verify_all(info);
12905 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12906 subvolid, argv[optind], uuidbuf);
12907 ret = print_extent_state(info, subvolid);
12911 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12913 if (!extent_buffer_uptodate(info->tree_root->node) ||
12914 !extent_buffer_uptodate(info->dev_root->node) ||
12915 !extent_buffer_uptodate(info->chunk_root->node)) {
12916 error("critical roots corrupted, unable to check the filesystem");
12922 if (init_extent_tree || init_csum_tree) {
12923 struct btrfs_trans_handle *trans;
12925 trans = btrfs_start_transaction(info->extent_root, 0);
12926 if (IS_ERR(trans)) {
12927 error("error starting transaction");
12928 ret = PTR_ERR(trans);
12933 if (init_extent_tree) {
12934 printf("Creating a new extent tree\n");
12935 ret = reinit_extent_tree(trans, info);
12941 if (init_csum_tree) {
12942 printf("Reinitialize checksum tree\n");
12943 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12945 error("checksum tree initialization failed: %d",
12952 ret = fill_csum_tree(trans, info->csum_root,
12956 error("checksum tree refilling failed: %d", ret);
12961 * Ok now we commit and run the normal fsck, which will add
12962 * extent entries for all of the items it finds.
12964 ret = btrfs_commit_transaction(trans, info->extent_root);
12969 if (!extent_buffer_uptodate(info->extent_root->node)) {
12970 error("critical: extent_root, unable to check the filesystem");
12975 if (!extent_buffer_uptodate(info->csum_root->node)) {
12976 error("critical: csum_root, unable to check the filesystem");
12982 if (!ctx.progress_enabled)
12983 fprintf(stderr, "checking extents\n");
12984 if (check_mode == CHECK_MODE_LOWMEM)
12985 ret = check_chunks_and_extents_v2(root);
12987 ret = check_chunks_and_extents(root);
12991 "errors found in extent allocation tree or chunk allocation");
12993 ret = repair_root_items(info);
12996 error("failed to repair root items: %s", strerror(-ret));
13000 fprintf(stderr, "Fixed %d roots.\n", ret);
13002 } else if (ret > 0) {
13004 "Found %d roots with an outdated root item.\n",
13007 "Please run a filesystem check with the option --repair to fix them.\n");
13013 if (!ctx.progress_enabled) {
13014 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13015 fprintf(stderr, "checking free space tree\n");
13017 fprintf(stderr, "checking free space cache\n");
13019 ret = check_space_cache(root);
13022 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13023 error("errors found in free space tree");
13025 error("errors found in free space cache");
13030 * We used to have to have these hole extents in between our real
13031 * extents so if we don't have this flag set we need to make sure there
13032 * are no gaps in the file extents for inodes, otherwise we can just
13033 * ignore it when this happens.
13035 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13036 if (!ctx.progress_enabled)
13037 fprintf(stderr, "checking fs roots\n");
13038 if (check_mode == CHECK_MODE_LOWMEM)
13039 ret = check_fs_roots_v2(root->fs_info);
13041 ret = check_fs_roots(root, &root_cache);
13044 error("errors found in fs roots");
13048 fprintf(stderr, "checking csums\n");
13049 ret = check_csums(root);
13052 error("errors found in csum tree");
13056 fprintf(stderr, "checking root refs\n");
13057 /* For low memory mode, check_fs_roots_v2 handles root refs */
13058 if (check_mode != CHECK_MODE_LOWMEM) {
13059 ret = check_root_refs(root, &root_cache);
13062 error("errors found in root refs");
13067 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13068 struct extent_buffer *eb;
13070 eb = list_first_entry(&root->fs_info->recow_ebs,
13071 struct extent_buffer, recow);
13072 list_del_init(&eb->recow);
13073 ret = recow_extent_buffer(root, eb);
13076 error("fails to fix transid errors");
13081 while (!list_empty(&delete_items)) {
13082 struct bad_item *bad;
13084 bad = list_first_entry(&delete_items, struct bad_item, list);
13085 list_del_init(&bad->list);
13087 ret = delete_bad_item(root, bad);
13093 if (info->quota_enabled) {
13094 fprintf(stderr, "checking quota groups\n");
13095 ret = qgroup_verify_all(info);
13098 error("failed to check quota groups");
13102 ret = repair_qgroups(info, &qgroups_repaired);
13105 error("failed to repair quota groups");
13111 if (!list_empty(&root->fs_info->recow_ebs)) {
13112 error("transid errors in file system");
13117 if (found_old_backref) { /*
13118 * there was a disk format change when mixed
13119 * backref was in testing tree. The old format
13120 * existed about one week.
13122 printf("\n * Found old mixed backref format. "
13123 "The old format is not supported! *"
13124 "\n * Please mount the FS in readonly mode, "
13125 "backup data and re-format the FS. *\n\n");
13128 printf("found %llu bytes used, ",
13129 (unsigned long long)bytes_used);
13131 printf("error(s) found\n");
13133 printf("no error found\n");
13134 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13135 printf("total tree bytes: %llu\n",
13136 (unsigned long long)total_btree_bytes);
13137 printf("total fs tree bytes: %llu\n",
13138 (unsigned long long)total_fs_tree_bytes);
13139 printf("total extent tree bytes: %llu\n",
13140 (unsigned long long)total_extent_tree_bytes);
13141 printf("btree space waste bytes: %llu\n",
13142 (unsigned long long)btree_space_waste);
13143 printf("file data blocks allocated: %llu\n referenced %llu\n",
13144 (unsigned long long)data_bytes_allocated,
13145 (unsigned long long)data_bytes_referenced);
13147 free_qgroup_counts();
13148 free_root_recs_tree(&root_cache);
13152 if (ctx.progress_enabled)
13153 task_deinit(ctx.info);