2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
832 round_up(rec->isize, root->sectorsize));
836 static void print_ref_error(int errors)
838 if (errors & REF_ERR_NO_DIR_ITEM)
839 fprintf(stderr, ", no dir item");
840 if (errors & REF_ERR_NO_DIR_INDEX)
841 fprintf(stderr, ", no dir index");
842 if (errors & REF_ERR_NO_INODE_REF)
843 fprintf(stderr, ", no inode ref");
844 if (errors & REF_ERR_DUP_DIR_ITEM)
845 fprintf(stderr, ", dup dir item");
846 if (errors & REF_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & REF_ERR_DUP_INODE_REF)
849 fprintf(stderr, ", dup inode ref");
850 if (errors & REF_ERR_INDEX_UNMATCH)
851 fprintf(stderr, ", index mismatch");
852 if (errors & REF_ERR_FILETYPE_UNMATCH)
853 fprintf(stderr, ", filetype mismatch");
854 if (errors & REF_ERR_NAME_TOO_LONG)
855 fprintf(stderr, ", name too long");
856 if (errors & REF_ERR_NO_ROOT_REF)
857 fprintf(stderr, ", no root ref");
858 if (errors & REF_ERR_NO_ROOT_BACKREF)
859 fprintf(stderr, ", no root backref");
860 if (errors & REF_ERR_DUP_ROOT_REF)
861 fprintf(stderr, ", dup root ref");
862 if (errors & REF_ERR_DUP_ROOT_BACKREF)
863 fprintf(stderr, ", dup root backref");
864 fprintf(stderr, "\n");
867 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
870 struct ptr_node *node;
871 struct cache_extent *cache;
872 struct inode_record *rec = NULL;
875 cache = lookup_cache_extent(inode_cache, ino, 1);
877 node = container_of(cache, struct ptr_node, cache);
879 if (mod && rec->refs > 1) {
880 node->data = clone_inode_rec(rec);
881 if (IS_ERR(node->data))
887 rec = calloc(1, sizeof(*rec));
889 return ERR_PTR(-ENOMEM);
891 rec->extent_start = (u64)-1;
893 INIT_LIST_HEAD(&rec->backrefs);
894 INIT_LIST_HEAD(&rec->orphan_extents);
895 rec->holes = RB_ROOT;
897 node = malloc(sizeof(*node));
900 return ERR_PTR(-ENOMEM);
902 node->cache.start = ino;
903 node->cache.size = 1;
906 if (ino == BTRFS_FREE_INO_OBJECTID)
909 ret = insert_cache_extent(inode_cache, &node->cache);
911 return ERR_PTR(-EEXIST);
916 static void free_orphan_data_extents(struct list_head *orphan_extents)
918 struct orphan_data_extent *orphan;
920 while (!list_empty(orphan_extents)) {
921 orphan = list_entry(orphan_extents->next,
922 struct orphan_data_extent, list);
923 list_del(&orphan->list);
928 static void free_inode_rec(struct inode_record *rec)
930 struct inode_backref *backref;
935 while (!list_empty(&rec->backrefs)) {
936 backref = to_inode_backref(rec->backrefs.next);
937 list_del(&backref->list);
940 free_orphan_data_extents(&rec->orphan_extents);
941 free_file_extent_holes(&rec->holes);
945 static int can_free_inode_rec(struct inode_record *rec)
947 if (!rec->errors && rec->checked && rec->found_inode_item &&
948 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
953 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
954 struct inode_record *rec)
956 struct cache_extent *cache;
957 struct inode_backref *tmp, *backref;
958 struct ptr_node *node;
961 if (!rec->found_inode_item)
964 filetype = imode_to_type(rec->imode);
965 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
966 if (backref->found_dir_item && backref->found_dir_index) {
967 if (backref->filetype != filetype)
968 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
969 if (!backref->errors && backref->found_inode_ref &&
970 rec->nlink == rec->found_link) {
971 list_del(&backref->list);
977 if (!rec->checked || rec->merging)
980 if (S_ISDIR(rec->imode)) {
981 if (rec->found_size != rec->isize)
982 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
983 if (rec->found_file_extent)
984 rec->errors |= I_ERR_ODD_FILE_EXTENT;
985 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
986 if (rec->found_dir_item)
987 rec->errors |= I_ERR_ODD_DIR_ITEM;
988 if (rec->found_size != rec->nbytes)
989 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
990 if (rec->nlink > 0 && !no_holes &&
991 (rec->extent_end < rec->isize ||
992 first_extent_gap(&rec->holes) < rec->isize))
993 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
996 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
997 if (rec->found_csum_item && rec->nodatasum)
998 rec->errors |= I_ERR_ODD_CSUM_ITEM;
999 if (rec->some_csum_missing && !rec->nodatasum)
1000 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1003 BUG_ON(rec->refs != 1);
1004 if (can_free_inode_rec(rec)) {
1005 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1006 node = container_of(cache, struct ptr_node, cache);
1007 BUG_ON(node->data != rec);
1008 remove_cache_extent(inode_cache, &node->cache);
1010 free_inode_rec(rec);
1014 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1016 struct btrfs_path path;
1017 struct btrfs_key key;
1020 key.objectid = BTRFS_ORPHAN_OBJECTID;
1021 key.type = BTRFS_ORPHAN_ITEM_KEY;
1024 btrfs_init_path(&path);
1025 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1026 btrfs_release_path(&path);
1032 static int process_inode_item(struct extent_buffer *eb,
1033 int slot, struct btrfs_key *key,
1034 struct shared_node *active_node)
1036 struct inode_record *rec;
1037 struct btrfs_inode_item *item;
1039 rec = active_node->current;
1040 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1041 if (rec->found_inode_item) {
1042 rec->errors |= I_ERR_DUP_INODE_ITEM;
1045 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1046 rec->nlink = btrfs_inode_nlink(eb, item);
1047 rec->isize = btrfs_inode_size(eb, item);
1048 rec->nbytes = btrfs_inode_nbytes(eb, item);
1049 rec->imode = btrfs_inode_mode(eb, item);
1050 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1052 rec->found_inode_item = 1;
1053 if (rec->nlink == 0)
1054 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1055 maybe_free_inode_rec(&active_node->inode_cache, rec);
1059 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1061 int namelen, u64 dir)
1063 struct inode_backref *backref;
1065 list_for_each_entry(backref, &rec->backrefs, list) {
1066 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1068 if (backref->dir != dir || backref->namelen != namelen)
1070 if (memcmp(name, backref->name, namelen))
1075 backref = malloc(sizeof(*backref) + namelen + 1);
1078 memset(backref, 0, sizeof(*backref));
1080 backref->namelen = namelen;
1081 memcpy(backref->name, name, namelen);
1082 backref->name[namelen] = '\0';
1083 list_add_tail(&backref->list, &rec->backrefs);
1087 static int add_inode_backref(struct cache_tree *inode_cache,
1088 u64 ino, u64 dir, u64 index,
1089 const char *name, int namelen,
1090 u8 filetype, u8 itemtype, int errors)
1092 struct inode_record *rec;
1093 struct inode_backref *backref;
1095 rec = get_inode_rec(inode_cache, ino, 1);
1096 BUG_ON(IS_ERR(rec));
1097 backref = get_inode_backref(rec, name, namelen, dir);
1100 backref->errors |= errors;
1101 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1102 if (backref->found_dir_index)
1103 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1104 if (backref->found_inode_ref && backref->index != index)
1105 backref->errors |= REF_ERR_INDEX_UNMATCH;
1106 if (backref->found_dir_item && backref->filetype != filetype)
1107 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1109 backref->index = index;
1110 backref->filetype = filetype;
1111 backref->found_dir_index = 1;
1112 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1114 if (backref->found_dir_item)
1115 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1116 if (backref->found_dir_index && backref->filetype != filetype)
1117 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1119 backref->filetype = filetype;
1120 backref->found_dir_item = 1;
1121 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1122 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1123 if (backref->found_inode_ref)
1124 backref->errors |= REF_ERR_DUP_INODE_REF;
1125 if (backref->found_dir_index && backref->index != index)
1126 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 backref->index = index;
1130 backref->ref_type = itemtype;
1131 backref->found_inode_ref = 1;
1136 maybe_free_inode_rec(inode_cache, rec);
1140 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1141 struct cache_tree *dst_cache)
1143 struct inode_backref *backref;
1148 list_for_each_entry(backref, &src->backrefs, list) {
1149 if (backref->found_dir_index) {
1150 add_inode_backref(dst_cache, dst->ino, backref->dir,
1151 backref->index, backref->name,
1152 backref->namelen, backref->filetype,
1153 BTRFS_DIR_INDEX_KEY, backref->errors);
1155 if (backref->found_dir_item) {
1157 add_inode_backref(dst_cache, dst->ino,
1158 backref->dir, 0, backref->name,
1159 backref->namelen, backref->filetype,
1160 BTRFS_DIR_ITEM_KEY, backref->errors);
1162 if (backref->found_inode_ref) {
1163 add_inode_backref(dst_cache, dst->ino,
1164 backref->dir, backref->index,
1165 backref->name, backref->namelen, 0,
1166 backref->ref_type, backref->errors);
1170 if (src->found_dir_item)
1171 dst->found_dir_item = 1;
1172 if (src->found_file_extent)
1173 dst->found_file_extent = 1;
1174 if (src->found_csum_item)
1175 dst->found_csum_item = 1;
1176 if (src->some_csum_missing)
1177 dst->some_csum_missing = 1;
1178 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1179 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1184 BUG_ON(src->found_link < dir_count);
1185 dst->found_link += src->found_link - dir_count;
1186 dst->found_size += src->found_size;
1187 if (src->extent_start != (u64)-1) {
1188 if (dst->extent_start == (u64)-1) {
1189 dst->extent_start = src->extent_start;
1190 dst->extent_end = src->extent_end;
1192 if (dst->extent_end > src->extent_start)
1193 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1194 else if (dst->extent_end < src->extent_start) {
1195 ret = add_file_extent_hole(&dst->holes,
1197 src->extent_start - dst->extent_end);
1199 if (dst->extent_end < src->extent_end)
1200 dst->extent_end = src->extent_end;
1204 dst->errors |= src->errors;
1205 if (src->found_inode_item) {
1206 if (!dst->found_inode_item) {
1207 dst->nlink = src->nlink;
1208 dst->isize = src->isize;
1209 dst->nbytes = src->nbytes;
1210 dst->imode = src->imode;
1211 dst->nodatasum = src->nodatasum;
1212 dst->found_inode_item = 1;
1214 dst->errors |= I_ERR_DUP_INODE_ITEM;
1222 static int splice_shared_node(struct shared_node *src_node,
1223 struct shared_node *dst_node)
1225 struct cache_extent *cache;
1226 struct ptr_node *node, *ins;
1227 struct cache_tree *src, *dst;
1228 struct inode_record *rec, *conflict;
1229 u64 current_ino = 0;
1233 if (--src_node->refs == 0)
1235 if (src_node->current)
1236 current_ino = src_node->current->ino;
1238 src = &src_node->root_cache;
1239 dst = &dst_node->root_cache;
1241 cache = search_cache_extent(src, 0);
1243 node = container_of(cache, struct ptr_node, cache);
1245 cache = next_cache_extent(cache);
1248 remove_cache_extent(src, &node->cache);
1251 ins = malloc(sizeof(*ins));
1253 ins->cache.start = node->cache.start;
1254 ins->cache.size = node->cache.size;
1258 ret = insert_cache_extent(dst, &ins->cache);
1259 if (ret == -EEXIST) {
1260 conflict = get_inode_rec(dst, rec->ino, 1);
1261 BUG_ON(IS_ERR(conflict));
1262 merge_inode_recs(rec, conflict, dst);
1264 conflict->checked = 1;
1265 if (dst_node->current == conflict)
1266 dst_node->current = NULL;
1268 maybe_free_inode_rec(dst, conflict);
1269 free_inode_rec(rec);
1276 if (src == &src_node->root_cache) {
1277 src = &src_node->inode_cache;
1278 dst = &dst_node->inode_cache;
1282 if (current_ino > 0 && (!dst_node->current ||
1283 current_ino > dst_node->current->ino)) {
1284 if (dst_node->current) {
1285 dst_node->current->checked = 1;
1286 maybe_free_inode_rec(dst, dst_node->current);
1288 dst_node->current = get_inode_rec(dst, current_ino, 1);
1289 BUG_ON(IS_ERR(dst_node->current));
1294 static void free_inode_ptr(struct cache_extent *cache)
1296 struct ptr_node *node;
1297 struct inode_record *rec;
1299 node = container_of(cache, struct ptr_node, cache);
1301 free_inode_rec(rec);
1305 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1307 static struct shared_node *find_shared_node(struct cache_tree *shared,
1310 struct cache_extent *cache;
1311 struct shared_node *node;
1313 cache = lookup_cache_extent(shared, bytenr, 1);
1315 node = container_of(cache, struct shared_node, cache);
1321 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1324 struct shared_node *node;
1326 node = calloc(1, sizeof(*node));
1329 node->cache.start = bytenr;
1330 node->cache.size = 1;
1331 cache_tree_init(&node->root_cache);
1332 cache_tree_init(&node->inode_cache);
1335 ret = insert_cache_extent(shared, &node->cache);
1340 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1341 struct walk_control *wc, int level)
1343 struct shared_node *node;
1344 struct shared_node *dest;
1347 if (level == wc->active_node)
1350 BUG_ON(wc->active_node <= level);
1351 node = find_shared_node(&wc->shared, bytenr);
1353 ret = add_shared_node(&wc->shared, bytenr, refs);
1355 node = find_shared_node(&wc->shared, bytenr);
1356 wc->nodes[level] = node;
1357 wc->active_node = level;
1361 if (wc->root_level == wc->active_node &&
1362 btrfs_root_refs(&root->root_item) == 0) {
1363 if (--node->refs == 0) {
1364 free_inode_recs_tree(&node->root_cache);
1365 free_inode_recs_tree(&node->inode_cache);
1366 remove_cache_extent(&wc->shared, &node->cache);
1372 dest = wc->nodes[wc->active_node];
1373 splice_shared_node(node, dest);
1374 if (node->refs == 0) {
1375 remove_cache_extent(&wc->shared, &node->cache);
1381 static int leave_shared_node(struct btrfs_root *root,
1382 struct walk_control *wc, int level)
1384 struct shared_node *node;
1385 struct shared_node *dest;
1388 if (level == wc->root_level)
1391 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1395 BUG_ON(i >= BTRFS_MAX_LEVEL);
1397 node = wc->nodes[wc->active_node];
1398 wc->nodes[wc->active_node] = NULL;
1399 wc->active_node = i;
1401 dest = wc->nodes[wc->active_node];
1402 if (wc->active_node < wc->root_level ||
1403 btrfs_root_refs(&root->root_item) > 0) {
1404 BUG_ON(node->refs <= 1);
1405 splice_shared_node(node, dest);
1407 BUG_ON(node->refs < 2);
1416 * 1 - if the root with id child_root_id is a child of root parent_root_id
1417 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1418 * has other root(s) as parent(s)
1419 * 2 - if the root child_root_id doesn't have any parent roots
1421 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1424 struct btrfs_path path;
1425 struct btrfs_key key;
1426 struct extent_buffer *leaf;
1430 btrfs_init_path(&path);
1432 key.objectid = parent_root_id;
1433 key.type = BTRFS_ROOT_REF_KEY;
1434 key.offset = child_root_id;
1435 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1439 btrfs_release_path(&path);
1443 key.objectid = child_root_id;
1444 key.type = BTRFS_ROOT_BACKREF_KEY;
1446 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1452 leaf = path.nodes[0];
1453 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1454 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1457 leaf = path.nodes[0];
1460 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1461 if (key.objectid != child_root_id ||
1462 key.type != BTRFS_ROOT_BACKREF_KEY)
1467 if (key.offset == parent_root_id) {
1468 btrfs_release_path(&path);
1475 btrfs_release_path(&path);
1478 return has_parent ? 0 : 2;
1481 static int process_dir_item(struct extent_buffer *eb,
1482 int slot, struct btrfs_key *key,
1483 struct shared_node *active_node)
1493 struct btrfs_dir_item *di;
1494 struct inode_record *rec;
1495 struct cache_tree *root_cache;
1496 struct cache_tree *inode_cache;
1497 struct btrfs_key location;
1498 char namebuf[BTRFS_NAME_LEN];
1500 root_cache = &active_node->root_cache;
1501 inode_cache = &active_node->inode_cache;
1502 rec = active_node->current;
1503 rec->found_dir_item = 1;
1505 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1506 total = btrfs_item_size_nr(eb, slot);
1507 while (cur < total) {
1509 btrfs_dir_item_key_to_cpu(eb, di, &location);
1510 name_len = btrfs_dir_name_len(eb, di);
1511 data_len = btrfs_dir_data_len(eb, di);
1512 filetype = btrfs_dir_type(eb, di);
1514 rec->found_size += name_len;
1515 if (name_len <= BTRFS_NAME_LEN) {
1519 len = BTRFS_NAME_LEN;
1520 error = REF_ERR_NAME_TOO_LONG;
1522 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1524 if (location.type == BTRFS_INODE_ITEM_KEY) {
1525 add_inode_backref(inode_cache, location.objectid,
1526 key->objectid, key->offset, namebuf,
1527 len, filetype, key->type, error);
1528 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1529 add_inode_backref(root_cache, location.objectid,
1530 key->objectid, key->offset,
1531 namebuf, len, filetype,
1534 fprintf(stderr, "invalid location in dir item %u\n",
1536 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1537 key->objectid, key->offset, namebuf,
1538 len, filetype, key->type, error);
1541 len = sizeof(*di) + name_len + data_len;
1542 di = (struct btrfs_dir_item *)((char *)di + len);
1545 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1546 rec->errors |= I_ERR_DUP_DIR_INDEX;
1551 static int process_inode_ref(struct extent_buffer *eb,
1552 int slot, struct btrfs_key *key,
1553 struct shared_node *active_node)
1561 struct cache_tree *inode_cache;
1562 struct btrfs_inode_ref *ref;
1563 char namebuf[BTRFS_NAME_LEN];
1565 inode_cache = &active_node->inode_cache;
1567 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1568 total = btrfs_item_size_nr(eb, slot);
1569 while (cur < total) {
1570 name_len = btrfs_inode_ref_name_len(eb, ref);
1571 index = btrfs_inode_ref_index(eb, ref);
1573 /* inode_ref + namelen should not cross item boundary */
1574 if (cur + sizeof(*ref) + name_len > total ||
1575 name_len > BTRFS_NAME_LEN) {
1576 if (total < cur + sizeof(*ref))
1579 /* Still try to read out the remaining part */
1580 len = min_t(u32, total - cur - sizeof(*ref),
1582 error = REF_ERR_NAME_TOO_LONG;
1588 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1589 add_inode_backref(inode_cache, key->objectid, key->offset,
1590 index, namebuf, len, 0, key->type, error);
1592 len = sizeof(*ref) + name_len;
1593 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1599 static int process_inode_extref(struct extent_buffer *eb,
1600 int slot, struct btrfs_key *key,
1601 struct shared_node *active_node)
1610 struct cache_tree *inode_cache;
1611 struct btrfs_inode_extref *extref;
1612 char namebuf[BTRFS_NAME_LEN];
1614 inode_cache = &active_node->inode_cache;
1616 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1617 total = btrfs_item_size_nr(eb, slot);
1618 while (cur < total) {
1619 name_len = btrfs_inode_extref_name_len(eb, extref);
1620 index = btrfs_inode_extref_index(eb, extref);
1621 parent = btrfs_inode_extref_parent(eb, extref);
1622 if (name_len <= BTRFS_NAME_LEN) {
1626 len = BTRFS_NAME_LEN;
1627 error = REF_ERR_NAME_TOO_LONG;
1629 read_extent_buffer(eb, namebuf,
1630 (unsigned long)(extref + 1), len);
1631 add_inode_backref(inode_cache, key->objectid, parent,
1632 index, namebuf, len, 0, key->type, error);
1634 len = sizeof(*extref) + name_len;
1635 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1642 static int count_csum_range(struct btrfs_root *root, u64 start,
1643 u64 len, u64 *found)
1645 struct btrfs_key key;
1646 struct btrfs_path path;
1647 struct extent_buffer *leaf;
1652 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1654 btrfs_init_path(&path);
1656 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1658 key.type = BTRFS_EXTENT_CSUM_KEY;
1660 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1664 if (ret > 0 && path.slots[0] > 0) {
1665 leaf = path.nodes[0];
1666 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1667 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1668 key.type == BTRFS_EXTENT_CSUM_KEY)
1673 leaf = path.nodes[0];
1674 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1675 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1680 leaf = path.nodes[0];
1683 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1684 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1685 key.type != BTRFS_EXTENT_CSUM_KEY)
1688 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1689 if (key.offset >= start + len)
1692 if (key.offset > start)
1695 size = btrfs_item_size_nr(leaf, path.slots[0]);
1696 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1697 if (csum_end > start) {
1698 size = min(csum_end - start, len);
1707 btrfs_release_path(&path);
1713 static int process_file_extent(struct btrfs_root *root,
1714 struct extent_buffer *eb,
1715 int slot, struct btrfs_key *key,
1716 struct shared_node *active_node)
1718 struct inode_record *rec;
1719 struct btrfs_file_extent_item *fi;
1721 u64 disk_bytenr = 0;
1722 u64 extent_offset = 0;
1723 u64 mask = root->sectorsize - 1;
1727 rec = active_node->current;
1728 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1729 rec->found_file_extent = 1;
1731 if (rec->extent_start == (u64)-1) {
1732 rec->extent_start = key->offset;
1733 rec->extent_end = key->offset;
1736 if (rec->extent_end > key->offset)
1737 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1738 else if (rec->extent_end < key->offset) {
1739 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1740 key->offset - rec->extent_end);
1745 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1746 extent_type = btrfs_file_extent_type(eb, fi);
1748 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1749 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1751 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1752 rec->found_size += num_bytes;
1753 num_bytes = (num_bytes + mask) & ~mask;
1754 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1755 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1756 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1757 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1758 extent_offset = btrfs_file_extent_offset(eb, fi);
1759 if (num_bytes == 0 || (num_bytes & mask))
1760 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1761 if (num_bytes + extent_offset >
1762 btrfs_file_extent_ram_bytes(eb, fi))
1763 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1764 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1765 (btrfs_file_extent_compression(eb, fi) ||
1766 btrfs_file_extent_encryption(eb, fi) ||
1767 btrfs_file_extent_other_encoding(eb, fi)))
1768 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769 if (disk_bytenr > 0)
1770 rec->found_size += num_bytes;
1772 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1774 rec->extent_end = key->offset + num_bytes;
1777 * The data reloc tree will copy full extents into its inode and then
1778 * copy the corresponding csums. Because the extent it copied could be
1779 * a preallocated extent that hasn't been written to yet there may be no
1780 * csums to copy, ergo we won't have csums for our file extent. This is
1781 * ok so just don't bother checking csums if the inode belongs to the
1784 if (disk_bytenr > 0 &&
1785 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1787 if (btrfs_file_extent_compression(eb, fi))
1788 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1790 disk_bytenr += extent_offset;
1792 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1795 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1797 rec->found_csum_item = 1;
1798 if (found < num_bytes)
1799 rec->some_csum_missing = 1;
1800 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1802 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1808 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1809 struct walk_control *wc)
1811 struct btrfs_key key;
1815 struct cache_tree *inode_cache;
1816 struct shared_node *active_node;
1818 if (wc->root_level == wc->active_node &&
1819 btrfs_root_refs(&root->root_item) == 0)
1822 active_node = wc->nodes[wc->active_node];
1823 inode_cache = &active_node->inode_cache;
1824 nritems = btrfs_header_nritems(eb);
1825 for (i = 0; i < nritems; i++) {
1826 btrfs_item_key_to_cpu(eb, &key, i);
1828 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1830 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1833 if (active_node->current == NULL ||
1834 active_node->current->ino < key.objectid) {
1835 if (active_node->current) {
1836 active_node->current->checked = 1;
1837 maybe_free_inode_rec(inode_cache,
1838 active_node->current);
1840 active_node->current = get_inode_rec(inode_cache,
1842 BUG_ON(IS_ERR(active_node->current));
1845 case BTRFS_DIR_ITEM_KEY:
1846 case BTRFS_DIR_INDEX_KEY:
1847 ret = process_dir_item(eb, i, &key, active_node);
1849 case BTRFS_INODE_REF_KEY:
1850 ret = process_inode_ref(eb, i, &key, active_node);
1852 case BTRFS_INODE_EXTREF_KEY:
1853 ret = process_inode_extref(eb, i, &key, active_node);
1855 case BTRFS_INODE_ITEM_KEY:
1856 ret = process_inode_item(eb, i, &key, active_node);
1858 case BTRFS_EXTENT_DATA_KEY:
1859 ret = process_file_extent(root, eb, i, &key,
1870 u64 bytenr[BTRFS_MAX_LEVEL];
1871 u64 refs[BTRFS_MAX_LEVEL];
1872 int need_check[BTRFS_MAX_LEVEL];
1875 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1876 struct node_refs *nrefs, u64 level);
1877 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1878 unsigned int ext_ref);
1881 * Returns >0 Found error, not fatal, should continue
1882 * Returns <0 Fatal error, must exit the whole check
1883 * Returns 0 No errors found
1885 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1886 struct node_refs *nrefs, int *level, int ext_ref)
1888 struct extent_buffer *cur = path->nodes[0];
1889 struct btrfs_key key;
1893 int root_level = btrfs_header_level(root->node);
1895 int ret = 0; /* Final return value */
1896 int err = 0; /* Positive error bitmap */
1898 cur_bytenr = cur->start;
1900 /* skip to first inode item or the first inode number change */
1901 nritems = btrfs_header_nritems(cur);
1902 for (i = 0; i < nritems; i++) {
1903 btrfs_item_key_to_cpu(cur, &key, i);
1905 first_ino = key.objectid;
1906 if (key.type == BTRFS_INODE_ITEM_KEY ||
1907 (first_ino && first_ino != key.objectid))
1911 path->slots[0] = nritems;
1917 err |= check_inode_item(root, path, ext_ref);
1919 if (err & LAST_ITEM)
1922 /* still have inode items in thie leaf */
1923 if (cur->start == cur_bytenr)
1927 * we have switched to another leaf, above nodes may
1928 * have changed, here walk down the path, if a node
1929 * or leaf is shared, check whether we can skip this
1932 for (i = root_level; i >= 0; i--) {
1933 if (path->nodes[i]->start == nrefs->bytenr[i])
1936 ret = update_nodes_refs(root,
1937 path->nodes[i]->start,
1942 if (!nrefs->need_check[i]) {
1948 for (i = 0; i < *level; i++) {
1949 free_extent_buffer(path->nodes[i]);
1950 path->nodes[i] = NULL;
1959 static void reada_walk_down(struct btrfs_root *root,
1960 struct extent_buffer *node, int slot)
1969 level = btrfs_header_level(node);
1973 nritems = btrfs_header_nritems(node);
1974 blocksize = root->nodesize;
1975 for (i = slot; i < nritems; i++) {
1976 bytenr = btrfs_node_blockptr(node, i);
1977 ptr_gen = btrfs_node_ptr_generation(node, i);
1978 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1983 * Check the child node/leaf by the following condition:
1984 * 1. the first item key of the node/leaf should be the same with the one
1986 * 2. block in parent node should match the child node/leaf.
1987 * 3. generation of parent node and child's header should be consistent.
1989 * Or the child node/leaf pointed by the key in parent is not valid.
1991 * We hope to check leaf owner too, but since subvol may share leaves,
1992 * which makes leaf owner check not so strong, key check should be
1993 * sufficient enough for that case.
1995 static int check_child_node(struct extent_buffer *parent, int slot,
1996 struct extent_buffer *child)
1998 struct btrfs_key parent_key;
1999 struct btrfs_key child_key;
2002 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2003 if (btrfs_header_level(child) == 0)
2004 btrfs_item_key_to_cpu(child, &child_key, 0);
2006 btrfs_node_key_to_cpu(child, &child_key, 0);
2008 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2011 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2012 parent_key.objectid, parent_key.type, parent_key.offset,
2013 child_key.objectid, child_key.type, child_key.offset);
2015 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2017 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2018 btrfs_node_blockptr(parent, slot),
2019 btrfs_header_bytenr(child));
2021 if (btrfs_node_ptr_generation(parent, slot) !=
2022 btrfs_header_generation(child)) {
2024 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2025 btrfs_header_generation(child),
2026 btrfs_node_ptr_generation(parent, slot));
2032 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2033 * in every fs or file tree check. Here we find its all root ids, and only check
2034 * it in the fs or file tree which has the smallest root id.
2036 static int need_check(struct btrfs_root *root, struct ulist *roots)
2038 struct rb_node *node;
2039 struct ulist_node *u;
2041 if (roots->nnodes == 1)
2044 node = rb_first(&roots->root);
2045 u = rb_entry(node, struct ulist_node, rb_node);
2047 * current root id is not smallest, we skip it and let it be checked
2048 * in the fs or file tree who hash the smallest root id.
2050 if (root->objectid != u->val)
2057 * for a tree node or leaf, we record its reference count, so later if we still
2058 * process this node or leaf, don't need to compute its reference count again.
2060 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2061 struct node_refs *nrefs, u64 level)
2065 struct ulist *roots;
2067 if (nrefs->bytenr[level] != bytenr) {
2068 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2069 level, 1, &refs, NULL);
2073 nrefs->bytenr[level] = bytenr;
2074 nrefs->refs[level] = refs;
2076 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2081 check = need_check(root, roots);
2083 nrefs->need_check[level] = check;
2085 nrefs->need_check[level] = 1;
2092 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2093 struct walk_control *wc, int *level,
2094 struct node_refs *nrefs)
2096 enum btrfs_tree_block_status status;
2099 struct extent_buffer *next;
2100 struct extent_buffer *cur;
2105 WARN_ON(*level < 0);
2106 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2108 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2109 refs = nrefs->refs[*level];
2112 ret = btrfs_lookup_extent_info(NULL, root,
2113 path->nodes[*level]->start,
2114 *level, 1, &refs, NULL);
2119 nrefs->bytenr[*level] = path->nodes[*level]->start;
2120 nrefs->refs[*level] = refs;
2124 ret = enter_shared_node(root, path->nodes[*level]->start,
2132 while (*level >= 0) {
2133 WARN_ON(*level < 0);
2134 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2135 cur = path->nodes[*level];
2137 if (btrfs_header_level(cur) != *level)
2140 if (path->slots[*level] >= btrfs_header_nritems(cur))
2143 ret = process_one_leaf(root, cur, wc);
2148 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2149 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2150 blocksize = root->nodesize;
2152 if (bytenr == nrefs->bytenr[*level - 1]) {
2153 refs = nrefs->refs[*level - 1];
2155 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2156 *level - 1, 1, &refs, NULL);
2160 nrefs->bytenr[*level - 1] = bytenr;
2161 nrefs->refs[*level - 1] = refs;
2166 ret = enter_shared_node(root, bytenr, refs,
2169 path->slots[*level]++;
2174 next = btrfs_find_tree_block(root, bytenr, blocksize);
2175 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2176 free_extent_buffer(next);
2177 reada_walk_down(root, cur, path->slots[*level]);
2178 next = read_tree_block(root, bytenr, blocksize,
2180 if (!extent_buffer_uptodate(next)) {
2181 struct btrfs_key node_key;
2183 btrfs_node_key_to_cpu(path->nodes[*level],
2185 path->slots[*level]);
2186 btrfs_add_corrupt_extent_record(root->fs_info,
2188 path->nodes[*level]->start,
2189 root->nodesize, *level);
2195 ret = check_child_node(cur, path->slots[*level], next);
2197 free_extent_buffer(next);
2202 if (btrfs_is_leaf(next))
2203 status = btrfs_check_leaf(root, NULL, next);
2205 status = btrfs_check_node(root, NULL, next);
2206 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2207 free_extent_buffer(next);
2212 *level = *level - 1;
2213 free_extent_buffer(path->nodes[*level]);
2214 path->nodes[*level] = next;
2215 path->slots[*level] = 0;
2218 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2222 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2223 unsigned int ext_ref);
2226 * Returns >0 Found error, should continue
2227 * Returns <0 Fatal error, must exit the whole check
2228 * Returns 0 No errors found
2230 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2231 int *level, struct node_refs *nrefs, int ext_ref)
2233 enum btrfs_tree_block_status status;
2236 struct extent_buffer *next;
2237 struct extent_buffer *cur;
2241 WARN_ON(*level < 0);
2242 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2244 ret = update_nodes_refs(root, path->nodes[*level]->start,
2249 while (*level >= 0) {
2250 WARN_ON(*level < 0);
2251 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2252 cur = path->nodes[*level];
2254 if (btrfs_header_level(cur) != *level)
2257 if (path->slots[*level] >= btrfs_header_nritems(cur))
2259 /* Don't forgot to check leaf/node validation */
2261 ret = btrfs_check_leaf(root, NULL, cur);
2262 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2266 ret = process_one_leaf_v2(root, path, nrefs,
2270 ret = btrfs_check_node(root, NULL, cur);
2271 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2276 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2277 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2278 blocksize = root->nodesize;
2280 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2283 if (!nrefs->need_check[*level - 1]) {
2284 path->slots[*level]++;
2288 next = btrfs_find_tree_block(root, bytenr, blocksize);
2289 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2290 free_extent_buffer(next);
2291 reada_walk_down(root, cur, path->slots[*level]);
2292 next = read_tree_block(root, bytenr, blocksize,
2294 if (!extent_buffer_uptodate(next)) {
2295 struct btrfs_key node_key;
2297 btrfs_node_key_to_cpu(path->nodes[*level],
2299 path->slots[*level]);
2300 btrfs_add_corrupt_extent_record(root->fs_info,
2302 path->nodes[*level]->start,
2303 root->nodesize, *level);
2309 ret = check_child_node(cur, path->slots[*level], next);
2313 if (btrfs_is_leaf(next))
2314 status = btrfs_check_leaf(root, NULL, next);
2316 status = btrfs_check_node(root, NULL, next);
2317 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2318 free_extent_buffer(next);
2323 *level = *level - 1;
2324 free_extent_buffer(path->nodes[*level]);
2325 path->nodes[*level] = next;
2326 path->slots[*level] = 0;
2331 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2332 struct walk_control *wc, int *level)
2335 struct extent_buffer *leaf;
2337 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2338 leaf = path->nodes[i];
2339 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2344 free_extent_buffer(path->nodes[*level]);
2345 path->nodes[*level] = NULL;
2346 BUG_ON(*level > wc->active_node);
2347 if (*level == wc->active_node)
2348 leave_shared_node(root, wc, *level);
2355 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2359 struct extent_buffer *leaf;
2361 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2362 leaf = path->nodes[i];
2363 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2368 free_extent_buffer(path->nodes[*level]);
2369 path->nodes[*level] = NULL;
2376 static int check_root_dir(struct inode_record *rec)
2378 struct inode_backref *backref;
2381 if (!rec->found_inode_item || rec->errors)
2383 if (rec->nlink != 1 || rec->found_link != 0)
2385 if (list_empty(&rec->backrefs))
2387 backref = to_inode_backref(rec->backrefs.next);
2388 if (!backref->found_inode_ref)
2390 if (backref->index != 0 || backref->namelen != 2 ||
2391 memcmp(backref->name, "..", 2))
2393 if (backref->found_dir_index || backref->found_dir_item)
2400 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2401 struct btrfs_root *root, struct btrfs_path *path,
2402 struct inode_record *rec)
2404 struct btrfs_inode_item *ei;
2405 struct btrfs_key key;
2408 key.objectid = rec->ino;
2409 key.type = BTRFS_INODE_ITEM_KEY;
2410 key.offset = (u64)-1;
2412 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2416 if (!path->slots[0]) {
2423 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2424 if (key.objectid != rec->ino) {
2429 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2430 struct btrfs_inode_item);
2431 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2432 btrfs_mark_buffer_dirty(path->nodes[0]);
2433 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2434 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2435 root->root_key.objectid);
2437 btrfs_release_path(path);
2441 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2442 struct btrfs_root *root,
2443 struct btrfs_path *path,
2444 struct inode_record *rec)
2448 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2449 btrfs_release_path(path);
2451 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2455 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2456 struct btrfs_root *root,
2457 struct btrfs_path *path,
2458 struct inode_record *rec)
2460 struct btrfs_inode_item *ei;
2461 struct btrfs_key key;
2464 key.objectid = rec->ino;
2465 key.type = BTRFS_INODE_ITEM_KEY;
2468 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2475 /* Since ret == 0, no need to check anything */
2476 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2477 struct btrfs_inode_item);
2478 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2479 btrfs_mark_buffer_dirty(path->nodes[0]);
2480 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2481 printf("reset nbytes for ino %llu root %llu\n",
2482 rec->ino, root->root_key.objectid);
2484 btrfs_release_path(path);
2488 static int add_missing_dir_index(struct btrfs_root *root,
2489 struct cache_tree *inode_cache,
2490 struct inode_record *rec,
2491 struct inode_backref *backref)
2493 struct btrfs_path path;
2494 struct btrfs_trans_handle *trans;
2495 struct btrfs_dir_item *dir_item;
2496 struct extent_buffer *leaf;
2497 struct btrfs_key key;
2498 struct btrfs_disk_key disk_key;
2499 struct inode_record *dir_rec;
2500 unsigned long name_ptr;
2501 u32 data_size = sizeof(*dir_item) + backref->namelen;
2504 trans = btrfs_start_transaction(root, 1);
2506 return PTR_ERR(trans);
2508 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2509 (unsigned long long)rec->ino);
2511 btrfs_init_path(&path);
2512 key.objectid = backref->dir;
2513 key.type = BTRFS_DIR_INDEX_KEY;
2514 key.offset = backref->index;
2515 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2518 leaf = path.nodes[0];
2519 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2521 disk_key.objectid = cpu_to_le64(rec->ino);
2522 disk_key.type = BTRFS_INODE_ITEM_KEY;
2523 disk_key.offset = 0;
2525 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2526 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2527 btrfs_set_dir_data_len(leaf, dir_item, 0);
2528 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2529 name_ptr = (unsigned long)(dir_item + 1);
2530 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2531 btrfs_mark_buffer_dirty(leaf);
2532 btrfs_release_path(&path);
2533 btrfs_commit_transaction(trans, root);
2535 backref->found_dir_index = 1;
2536 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2537 BUG_ON(IS_ERR(dir_rec));
2540 dir_rec->found_size += backref->namelen;
2541 if (dir_rec->found_size == dir_rec->isize &&
2542 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2543 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2544 if (dir_rec->found_size != dir_rec->isize)
2545 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2550 static int delete_dir_index(struct btrfs_root *root,
2551 struct inode_backref *backref)
2553 struct btrfs_trans_handle *trans;
2554 struct btrfs_dir_item *di;
2555 struct btrfs_path path;
2558 trans = btrfs_start_transaction(root, 1);
2560 return PTR_ERR(trans);
2562 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2563 (unsigned long long)backref->dir,
2564 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2565 (unsigned long long)root->objectid);
2567 btrfs_init_path(&path);
2568 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2569 backref->name, backref->namelen,
2570 backref->index, -1);
2573 btrfs_release_path(&path);
2574 btrfs_commit_transaction(trans, root);
2581 ret = btrfs_del_item(trans, root, &path);
2583 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2585 btrfs_release_path(&path);
2586 btrfs_commit_transaction(trans, root);
2590 static int create_inode_item(struct btrfs_root *root,
2591 struct inode_record *rec,
2594 struct btrfs_trans_handle *trans;
2595 struct btrfs_inode_item inode_item;
2596 time_t now = time(NULL);
2599 trans = btrfs_start_transaction(root, 1);
2600 if (IS_ERR(trans)) {
2601 ret = PTR_ERR(trans);
2605 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2606 "be incomplete, please check permissions and content after "
2607 "the fsck completes.\n", (unsigned long long)root->objectid,
2608 (unsigned long long)rec->ino);
2610 memset(&inode_item, 0, sizeof(inode_item));
2611 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2613 btrfs_set_stack_inode_nlink(&inode_item, 1);
2615 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2616 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2617 if (rec->found_dir_item) {
2618 if (rec->found_file_extent)
2619 fprintf(stderr, "root %llu inode %llu has both a dir "
2620 "item and extents, unsure if it is a dir or a "
2621 "regular file so setting it as a directory\n",
2622 (unsigned long long)root->objectid,
2623 (unsigned long long)rec->ino);
2624 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2625 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2626 } else if (!rec->found_dir_item) {
2627 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2628 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2630 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2631 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2632 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2633 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2634 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2635 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2636 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2637 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2639 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2641 btrfs_commit_transaction(trans, root);
2645 static int repair_inode_backrefs(struct btrfs_root *root,
2646 struct inode_record *rec,
2647 struct cache_tree *inode_cache,
2650 struct inode_backref *tmp, *backref;
2651 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2655 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2656 if (!delete && rec->ino == root_dirid) {
2657 if (!rec->found_inode_item) {
2658 ret = create_inode_item(root, rec, 1);
2665 /* Index 0 for root dir's are special, don't mess with it */
2666 if (rec->ino == root_dirid && backref->index == 0)
2670 ((backref->found_dir_index && !backref->found_inode_ref) ||
2671 (backref->found_dir_index && backref->found_inode_ref &&
2672 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2673 ret = delete_dir_index(root, backref);
2677 list_del(&backref->list);
2682 if (!delete && !backref->found_dir_index &&
2683 backref->found_dir_item && backref->found_inode_ref) {
2684 ret = add_missing_dir_index(root, inode_cache, rec,
2689 if (backref->found_dir_item &&
2690 backref->found_dir_index) {
2691 if (!backref->errors &&
2692 backref->found_inode_ref) {
2693 list_del(&backref->list);
2700 if (!delete && (!backref->found_dir_index &&
2701 !backref->found_dir_item &&
2702 backref->found_inode_ref)) {
2703 struct btrfs_trans_handle *trans;
2704 struct btrfs_key location;
2706 ret = check_dir_conflict(root, backref->name,
2712 * let nlink fixing routine to handle it,
2713 * which can do it better.
2718 location.objectid = rec->ino;
2719 location.type = BTRFS_INODE_ITEM_KEY;
2720 location.offset = 0;
2722 trans = btrfs_start_transaction(root, 1);
2723 if (IS_ERR(trans)) {
2724 ret = PTR_ERR(trans);
2727 fprintf(stderr, "adding missing dir index/item pair "
2729 (unsigned long long)rec->ino);
2730 ret = btrfs_insert_dir_item(trans, root, backref->name,
2732 backref->dir, &location,
2733 imode_to_type(rec->imode),
2736 btrfs_commit_transaction(trans, root);
2740 if (!delete && (backref->found_inode_ref &&
2741 backref->found_dir_index &&
2742 backref->found_dir_item &&
2743 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2744 !rec->found_inode_item)) {
2745 ret = create_inode_item(root, rec, 0);
2752 return ret ? ret : repaired;
2756 * To determine the file type for nlink/inode_item repair
2758 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2759 * Return -ENOENT if file type is not found.
2761 static int find_file_type(struct inode_record *rec, u8 *type)
2763 struct inode_backref *backref;
2765 /* For inode item recovered case */
2766 if (rec->found_inode_item) {
2767 *type = imode_to_type(rec->imode);
2771 list_for_each_entry(backref, &rec->backrefs, list) {
2772 if (backref->found_dir_index || backref->found_dir_item) {
2773 *type = backref->filetype;
2781 * To determine the file name for nlink repair
2783 * Return 0 if file name is found, set name and namelen.
2784 * Return -ENOENT if file name is not found.
2786 static int find_file_name(struct inode_record *rec,
2787 char *name, int *namelen)
2789 struct inode_backref *backref;
2791 list_for_each_entry(backref, &rec->backrefs, list) {
2792 if (backref->found_dir_index || backref->found_dir_item ||
2793 backref->found_inode_ref) {
2794 memcpy(name, backref->name, backref->namelen);
2795 *namelen = backref->namelen;
2802 /* Reset the nlink of the inode to the correct one */
2803 static int reset_nlink(struct btrfs_trans_handle *trans,
2804 struct btrfs_root *root,
2805 struct btrfs_path *path,
2806 struct inode_record *rec)
2808 struct inode_backref *backref;
2809 struct inode_backref *tmp;
2810 struct btrfs_key key;
2811 struct btrfs_inode_item *inode_item;
2814 /* We don't believe this either, reset it and iterate backref */
2815 rec->found_link = 0;
2817 /* Remove all backref including the valid ones */
2818 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2819 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2820 backref->index, backref->name,
2821 backref->namelen, 0);
2825 /* remove invalid backref, so it won't be added back */
2826 if (!(backref->found_dir_index &&
2827 backref->found_dir_item &&
2828 backref->found_inode_ref)) {
2829 list_del(&backref->list);
2836 /* Set nlink to 0 */
2837 key.objectid = rec->ino;
2838 key.type = BTRFS_INODE_ITEM_KEY;
2840 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2847 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2848 struct btrfs_inode_item);
2849 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2850 btrfs_mark_buffer_dirty(path->nodes[0]);
2851 btrfs_release_path(path);
2854 * Add back valid inode_ref/dir_item/dir_index,
2855 * add_link() will handle the nlink inc, so new nlink must be correct
2857 list_for_each_entry(backref, &rec->backrefs, list) {
2858 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2859 backref->name, backref->namelen,
2860 backref->filetype, &backref->index, 1);
2865 btrfs_release_path(path);
2869 static int get_highest_inode(struct btrfs_trans_handle *trans,
2870 struct btrfs_root *root,
2871 struct btrfs_path *path,
2874 struct btrfs_key key, found_key;
2877 btrfs_init_path(path);
2878 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2880 key.type = BTRFS_INODE_ITEM_KEY;
2881 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2883 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2884 path->slots[0] - 1);
2885 *highest_ino = found_key.objectid;
2888 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2890 btrfs_release_path(path);
2894 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2895 struct btrfs_root *root,
2896 struct btrfs_path *path,
2897 struct inode_record *rec)
2899 char *dir_name = "lost+found";
2900 char namebuf[BTRFS_NAME_LEN] = {0};
2905 int name_recovered = 0;
2906 int type_recovered = 0;
2910 * Get file name and type first before these invalid inode ref
2911 * are deleted by remove_all_invalid_backref()
2913 name_recovered = !find_file_name(rec, namebuf, &namelen);
2914 type_recovered = !find_file_type(rec, &type);
2916 if (!name_recovered) {
2917 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2918 rec->ino, rec->ino);
2919 namelen = count_digits(rec->ino);
2920 sprintf(namebuf, "%llu", rec->ino);
2923 if (!type_recovered) {
2924 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2926 type = BTRFS_FT_REG_FILE;
2930 ret = reset_nlink(trans, root, path, rec);
2933 "Failed to reset nlink for inode %llu: %s\n",
2934 rec->ino, strerror(-ret));
2938 if (rec->found_link == 0) {
2939 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2943 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2944 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2947 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2948 dir_name, strerror(-ret));
2951 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2952 namebuf, namelen, type, NULL, 1);
2954 * Add ".INO" suffix several times to handle case where
2955 * "FILENAME.INO" is already taken by another file.
2957 while (ret == -EEXIST) {
2959 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2961 if (namelen + count_digits(rec->ino) + 1 >
2966 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2968 namelen += count_digits(rec->ino) + 1;
2969 ret = btrfs_add_link(trans, root, rec->ino,
2970 lost_found_ino, namebuf,
2971 namelen, type, NULL, 1);
2975 "Failed to link the inode %llu to %s dir: %s\n",
2976 rec->ino, dir_name, strerror(-ret));
2980 * Just increase the found_link, don't actually add the
2981 * backref. This will make things easier and this inode
2982 * record will be freed after the repair is done.
2983 * So fsck will not report problem about this inode.
2986 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2987 namelen, namebuf, dir_name);
2989 printf("Fixed the nlink of inode %llu\n", rec->ino);
2992 * Clear the flag anyway, or we will loop forever for the same inode
2993 * as it will not be removed from the bad inode list and the dead loop
2996 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2997 btrfs_release_path(path);
3002 * Check if there is any normal(reg or prealloc) file extent for given
3004 * This is used to determine the file type when neither its dir_index/item or
3005 * inode_item exists.
3007 * This will *NOT* report error, if any error happens, just consider it does
3008 * not have any normal file extent.
3010 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3012 struct btrfs_path path;
3013 struct btrfs_key key;
3014 struct btrfs_key found_key;
3015 struct btrfs_file_extent_item *fi;
3019 btrfs_init_path(&path);
3021 key.type = BTRFS_EXTENT_DATA_KEY;
3024 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3029 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3030 ret = btrfs_next_leaf(root, &path);
3037 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3039 if (found_key.objectid != ino ||
3040 found_key.type != BTRFS_EXTENT_DATA_KEY)
3042 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3043 struct btrfs_file_extent_item);
3044 type = btrfs_file_extent_type(path.nodes[0], fi);
3045 if (type != BTRFS_FILE_EXTENT_INLINE) {
3051 btrfs_release_path(&path);
3055 static u32 btrfs_type_to_imode(u8 type)
3057 static u32 imode_by_btrfs_type[] = {
3058 [BTRFS_FT_REG_FILE] = S_IFREG,
3059 [BTRFS_FT_DIR] = S_IFDIR,
3060 [BTRFS_FT_CHRDEV] = S_IFCHR,
3061 [BTRFS_FT_BLKDEV] = S_IFBLK,
3062 [BTRFS_FT_FIFO] = S_IFIFO,
3063 [BTRFS_FT_SOCK] = S_IFSOCK,
3064 [BTRFS_FT_SYMLINK] = S_IFLNK,
3067 return imode_by_btrfs_type[(type)];
3070 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3071 struct btrfs_root *root,
3072 struct btrfs_path *path,
3073 struct inode_record *rec)
3077 int type_recovered = 0;
3080 printf("Trying to rebuild inode:%llu\n", rec->ino);
3082 type_recovered = !find_file_type(rec, &filetype);
3085 * Try to determine inode type if type not found.
3087 * For found regular file extent, it must be FILE.
3088 * For found dir_item/index, it must be DIR.
3090 * For undetermined one, use FILE as fallback.
3093 * 1. If found backref(inode_index/item is already handled) to it,
3095 * Need new inode-inode ref structure to allow search for that.
3097 if (!type_recovered) {
3098 if (rec->found_file_extent &&
3099 find_normal_file_extent(root, rec->ino)) {
3101 filetype = BTRFS_FT_REG_FILE;
3102 } else if (rec->found_dir_item) {
3104 filetype = BTRFS_FT_DIR;
3105 } else if (!list_empty(&rec->orphan_extents)) {
3107 filetype = BTRFS_FT_REG_FILE;
3109 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3112 filetype = BTRFS_FT_REG_FILE;
3116 ret = btrfs_new_inode(trans, root, rec->ino,
3117 mode | btrfs_type_to_imode(filetype));
3122 * Here inode rebuild is done, we only rebuild the inode item,
3123 * don't repair the nlink(like move to lost+found).
3124 * That is the job of nlink repair.
3126 * We just fill the record and return
3128 rec->found_dir_item = 1;
3129 rec->imode = mode | btrfs_type_to_imode(filetype);
3131 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3132 /* Ensure the inode_nlinks repair function will be called */
3133 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3138 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3139 struct btrfs_root *root,
3140 struct btrfs_path *path,
3141 struct inode_record *rec)
3143 struct orphan_data_extent *orphan;
3144 struct orphan_data_extent *tmp;
3147 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3149 * Check for conflicting file extents
3151 * Here we don't know whether the extents is compressed or not,
3152 * so we can only assume it not compressed nor data offset,
3153 * and use its disk_len as extent length.
3155 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3156 orphan->offset, orphan->disk_len, 0);
3157 btrfs_release_path(path);
3162 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3163 orphan->disk_bytenr, orphan->disk_len);
3164 ret = btrfs_free_extent(trans,
3165 root->fs_info->extent_root,
3166 orphan->disk_bytenr, orphan->disk_len,
3167 0, root->objectid, orphan->objectid,
3172 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3173 orphan->offset, orphan->disk_bytenr,
3174 orphan->disk_len, orphan->disk_len);
3178 /* Update file size info */
3179 rec->found_size += orphan->disk_len;
3180 if (rec->found_size == rec->nbytes)
3181 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3183 /* Update the file extent hole info too */
3184 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3188 if (RB_EMPTY_ROOT(&rec->holes))
3189 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3191 list_del(&orphan->list);
3194 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3199 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3200 struct btrfs_root *root,
3201 struct btrfs_path *path,
3202 struct inode_record *rec)
3204 struct rb_node *node;
3205 struct file_extent_hole *hole;
3209 node = rb_first(&rec->holes);
3213 hole = rb_entry(node, struct file_extent_hole, node);
3214 ret = btrfs_punch_hole(trans, root, rec->ino,
3215 hole->start, hole->len);
3218 ret = del_file_extent_hole(&rec->holes, hole->start,
3222 if (RB_EMPTY_ROOT(&rec->holes))
3223 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3224 node = rb_first(&rec->holes);
3226 /* special case for a file losing all its file extent */
3228 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3229 round_up(rec->isize, root->sectorsize));
3233 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3234 rec->ino, root->objectid);
3239 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3241 struct btrfs_trans_handle *trans;
3242 struct btrfs_path path;
3245 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3246 I_ERR_NO_ORPHAN_ITEM |
3247 I_ERR_LINK_COUNT_WRONG |
3248 I_ERR_NO_INODE_ITEM |
3249 I_ERR_FILE_EXTENT_ORPHAN |
3250 I_ERR_FILE_EXTENT_DISCOUNT|
3251 I_ERR_FILE_NBYTES_WRONG)))
3255 * For nlink repair, it may create a dir and add link, so
3256 * 2 for parent(256)'s dir_index and dir_item
3257 * 2 for lost+found dir's inode_item and inode_ref
3258 * 1 for the new inode_ref of the file
3259 * 2 for lost+found dir's dir_index and dir_item for the file
3261 trans = btrfs_start_transaction(root, 7);
3263 return PTR_ERR(trans);
3265 btrfs_init_path(&path);
3266 if (rec->errors & I_ERR_NO_INODE_ITEM)
3267 ret = repair_inode_no_item(trans, root, &path, rec);
3268 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3269 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3270 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3271 ret = repair_inode_discount_extent(trans, root, &path, rec);
3272 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3273 ret = repair_inode_isize(trans, root, &path, rec);
3274 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3275 ret = repair_inode_orphan_item(trans, root, &path, rec);
3276 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3277 ret = repair_inode_nlinks(trans, root, &path, rec);
3278 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3279 ret = repair_inode_nbytes(trans, root, &path, rec);
3280 btrfs_commit_transaction(trans, root);
3281 btrfs_release_path(&path);
3285 static int check_inode_recs(struct btrfs_root *root,
3286 struct cache_tree *inode_cache)
3288 struct cache_extent *cache;
3289 struct ptr_node *node;
3290 struct inode_record *rec;
3291 struct inode_backref *backref;
3296 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3298 if (btrfs_root_refs(&root->root_item) == 0) {
3299 if (!cache_tree_empty(inode_cache))
3300 fprintf(stderr, "warning line %d\n", __LINE__);
3305 * We need to repair backrefs first because we could change some of the
3306 * errors in the inode recs.
3308 * We also need to go through and delete invalid backrefs first and then
3309 * add the correct ones second. We do this because we may get EEXIST
3310 * when adding back the correct index because we hadn't yet deleted the
3313 * For example, if we were missing a dir index then the directories
3314 * isize would be wrong, so if we fixed the isize to what we thought it
3315 * would be and then fixed the backref we'd still have a invalid fs, so
3316 * we need to add back the dir index and then check to see if the isize
3321 if (stage == 3 && !err)
3324 cache = search_cache_extent(inode_cache, 0);
3325 while (repair && cache) {
3326 node = container_of(cache, struct ptr_node, cache);
3328 cache = next_cache_extent(cache);
3330 /* Need to free everything up and rescan */
3332 remove_cache_extent(inode_cache, &node->cache);
3334 free_inode_rec(rec);
3338 if (list_empty(&rec->backrefs))
3341 ret = repair_inode_backrefs(root, rec, inode_cache,
3355 rec = get_inode_rec(inode_cache, root_dirid, 0);
3356 BUG_ON(IS_ERR(rec));
3358 ret = check_root_dir(rec);
3360 fprintf(stderr, "root %llu root dir %llu error\n",
3361 (unsigned long long)root->root_key.objectid,
3362 (unsigned long long)root_dirid);
3363 print_inode_error(root, rec);
3368 struct btrfs_trans_handle *trans;
3370 trans = btrfs_start_transaction(root, 1);
3371 if (IS_ERR(trans)) {
3372 err = PTR_ERR(trans);
3377 "root %llu missing its root dir, recreating\n",
3378 (unsigned long long)root->objectid);
3380 ret = btrfs_make_root_dir(trans, root, root_dirid);
3383 btrfs_commit_transaction(trans, root);
3387 fprintf(stderr, "root %llu root dir %llu not found\n",
3388 (unsigned long long)root->root_key.objectid,
3389 (unsigned long long)root_dirid);
3393 cache = search_cache_extent(inode_cache, 0);
3396 node = container_of(cache, struct ptr_node, cache);
3398 remove_cache_extent(inode_cache, &node->cache);
3400 if (rec->ino == root_dirid ||
3401 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3402 free_inode_rec(rec);
3406 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3407 ret = check_orphan_item(root, rec->ino);
3409 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3410 if (can_free_inode_rec(rec)) {
3411 free_inode_rec(rec);
3416 if (!rec->found_inode_item)
3417 rec->errors |= I_ERR_NO_INODE_ITEM;
3418 if (rec->found_link != rec->nlink)
3419 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3421 ret = try_repair_inode(root, rec);
3422 if (ret == 0 && can_free_inode_rec(rec)) {
3423 free_inode_rec(rec);
3429 if (!(repair && ret == 0))
3431 print_inode_error(root, rec);
3432 list_for_each_entry(backref, &rec->backrefs, list) {
3433 if (!backref->found_dir_item)
3434 backref->errors |= REF_ERR_NO_DIR_ITEM;
3435 if (!backref->found_dir_index)
3436 backref->errors |= REF_ERR_NO_DIR_INDEX;
3437 if (!backref->found_inode_ref)
3438 backref->errors |= REF_ERR_NO_INODE_REF;
3439 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3440 " namelen %u name %s filetype %d errors %x",
3441 (unsigned long long)backref->dir,
3442 (unsigned long long)backref->index,
3443 backref->namelen, backref->name,
3444 backref->filetype, backref->errors);
3445 print_ref_error(backref->errors);
3447 free_inode_rec(rec);
3449 return (error > 0) ? -1 : 0;
3452 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3455 struct cache_extent *cache;
3456 struct root_record *rec = NULL;
3459 cache = lookup_cache_extent(root_cache, objectid, 1);
3461 rec = container_of(cache, struct root_record, cache);
3463 rec = calloc(1, sizeof(*rec));
3465 return ERR_PTR(-ENOMEM);
3466 rec->objectid = objectid;
3467 INIT_LIST_HEAD(&rec->backrefs);
3468 rec->cache.start = objectid;
3469 rec->cache.size = 1;
3471 ret = insert_cache_extent(root_cache, &rec->cache);
3473 return ERR_PTR(-EEXIST);
3478 static struct root_backref *get_root_backref(struct root_record *rec,
3479 u64 ref_root, u64 dir, u64 index,
3480 const char *name, int namelen)
3482 struct root_backref *backref;
3484 list_for_each_entry(backref, &rec->backrefs, list) {
3485 if (backref->ref_root != ref_root || backref->dir != dir ||
3486 backref->namelen != namelen)
3488 if (memcmp(name, backref->name, namelen))
3493 backref = calloc(1, sizeof(*backref) + namelen + 1);
3496 backref->ref_root = ref_root;
3498 backref->index = index;
3499 backref->namelen = namelen;
3500 memcpy(backref->name, name, namelen);
3501 backref->name[namelen] = '\0';
3502 list_add_tail(&backref->list, &rec->backrefs);
3506 static void free_root_record(struct cache_extent *cache)
3508 struct root_record *rec;
3509 struct root_backref *backref;
3511 rec = container_of(cache, struct root_record, cache);
3512 while (!list_empty(&rec->backrefs)) {
3513 backref = to_root_backref(rec->backrefs.next);
3514 list_del(&backref->list);
3521 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3523 static int add_root_backref(struct cache_tree *root_cache,
3524 u64 root_id, u64 ref_root, u64 dir, u64 index,
3525 const char *name, int namelen,
3526 int item_type, int errors)
3528 struct root_record *rec;
3529 struct root_backref *backref;
3531 rec = get_root_rec(root_cache, root_id);
3532 BUG_ON(IS_ERR(rec));
3533 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3536 backref->errors |= errors;
3538 if (item_type != BTRFS_DIR_ITEM_KEY) {
3539 if (backref->found_dir_index || backref->found_back_ref ||
3540 backref->found_forward_ref) {
3541 if (backref->index != index)
3542 backref->errors |= REF_ERR_INDEX_UNMATCH;
3544 backref->index = index;
3548 if (item_type == BTRFS_DIR_ITEM_KEY) {
3549 if (backref->found_forward_ref)
3551 backref->found_dir_item = 1;
3552 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3553 backref->found_dir_index = 1;
3554 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3555 if (backref->found_forward_ref)
3556 backref->errors |= REF_ERR_DUP_ROOT_REF;
3557 else if (backref->found_dir_item)
3559 backref->found_forward_ref = 1;
3560 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3561 if (backref->found_back_ref)
3562 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3563 backref->found_back_ref = 1;
3568 if (backref->found_forward_ref && backref->found_dir_item)
3569 backref->reachable = 1;
3573 static int merge_root_recs(struct btrfs_root *root,
3574 struct cache_tree *src_cache,
3575 struct cache_tree *dst_cache)
3577 struct cache_extent *cache;
3578 struct ptr_node *node;
3579 struct inode_record *rec;
3580 struct inode_backref *backref;
3583 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3584 free_inode_recs_tree(src_cache);
3589 cache = search_cache_extent(src_cache, 0);
3592 node = container_of(cache, struct ptr_node, cache);
3594 remove_cache_extent(src_cache, &node->cache);
3597 ret = is_child_root(root, root->objectid, rec->ino);
3603 list_for_each_entry(backref, &rec->backrefs, list) {
3604 BUG_ON(backref->found_inode_ref);
3605 if (backref->found_dir_item)
3606 add_root_backref(dst_cache, rec->ino,
3607 root->root_key.objectid, backref->dir,
3608 backref->index, backref->name,
3609 backref->namelen, BTRFS_DIR_ITEM_KEY,
3611 if (backref->found_dir_index)
3612 add_root_backref(dst_cache, rec->ino,
3613 root->root_key.objectid, backref->dir,
3614 backref->index, backref->name,
3615 backref->namelen, BTRFS_DIR_INDEX_KEY,
3619 free_inode_rec(rec);
3626 static int check_root_refs(struct btrfs_root *root,
3627 struct cache_tree *root_cache)
3629 struct root_record *rec;
3630 struct root_record *ref_root;
3631 struct root_backref *backref;
3632 struct cache_extent *cache;
3638 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3639 BUG_ON(IS_ERR(rec));
3642 /* fixme: this can not detect circular references */
3645 cache = search_cache_extent(root_cache, 0);
3649 rec = container_of(cache, struct root_record, cache);
3650 cache = next_cache_extent(cache);
3652 if (rec->found_ref == 0)
3655 list_for_each_entry(backref, &rec->backrefs, list) {
3656 if (!backref->reachable)
3659 ref_root = get_root_rec(root_cache,
3661 BUG_ON(IS_ERR(ref_root));
3662 if (ref_root->found_ref > 0)
3665 backref->reachable = 0;
3667 if (rec->found_ref == 0)
3673 cache = search_cache_extent(root_cache, 0);
3677 rec = container_of(cache, struct root_record, cache);
3678 cache = next_cache_extent(cache);
3680 if (rec->found_ref == 0 &&
3681 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3682 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3683 ret = check_orphan_item(root->fs_info->tree_root,
3689 * If we don't have a root item then we likely just have
3690 * a dir item in a snapshot for this root but no actual
3691 * ref key or anything so it's meaningless.
3693 if (!rec->found_root_item)
3696 fprintf(stderr, "fs tree %llu not referenced\n",
3697 (unsigned long long)rec->objectid);
3701 if (rec->found_ref > 0 && !rec->found_root_item)
3703 list_for_each_entry(backref, &rec->backrefs, list) {
3704 if (!backref->found_dir_item)
3705 backref->errors |= REF_ERR_NO_DIR_ITEM;
3706 if (!backref->found_dir_index)
3707 backref->errors |= REF_ERR_NO_DIR_INDEX;
3708 if (!backref->found_back_ref)
3709 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3710 if (!backref->found_forward_ref)
3711 backref->errors |= REF_ERR_NO_ROOT_REF;
3712 if (backref->reachable && backref->errors)
3719 fprintf(stderr, "fs tree %llu refs %u %s\n",
3720 (unsigned long long)rec->objectid, rec->found_ref,
3721 rec->found_root_item ? "" : "not found");
3723 list_for_each_entry(backref, &rec->backrefs, list) {
3724 if (!backref->reachable)
3726 if (!backref->errors && rec->found_root_item)
3728 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3729 " index %llu namelen %u name %s errors %x\n",
3730 (unsigned long long)backref->ref_root,
3731 (unsigned long long)backref->dir,
3732 (unsigned long long)backref->index,
3733 backref->namelen, backref->name,
3735 print_ref_error(backref->errors);
3738 return errors > 0 ? 1 : 0;
3741 static int process_root_ref(struct extent_buffer *eb, int slot,
3742 struct btrfs_key *key,
3743 struct cache_tree *root_cache)
3749 struct btrfs_root_ref *ref;
3750 char namebuf[BTRFS_NAME_LEN];
3753 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3755 dirid = btrfs_root_ref_dirid(eb, ref);
3756 index = btrfs_root_ref_sequence(eb, ref);
3757 name_len = btrfs_root_ref_name_len(eb, ref);
3759 if (name_len <= BTRFS_NAME_LEN) {
3763 len = BTRFS_NAME_LEN;
3764 error = REF_ERR_NAME_TOO_LONG;
3766 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3768 if (key->type == BTRFS_ROOT_REF_KEY) {
3769 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3770 index, namebuf, len, key->type, error);
3772 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3773 index, namebuf, len, key->type, error);
3778 static void free_corrupt_block(struct cache_extent *cache)
3780 struct btrfs_corrupt_block *corrupt;
3782 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3786 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3789 * Repair the btree of the given root.
3791 * The fix is to remove the node key in corrupt_blocks cache_tree.
3792 * and rebalance the tree.
3793 * After the fix, the btree should be writeable.
3795 static int repair_btree(struct btrfs_root *root,
3796 struct cache_tree *corrupt_blocks)
3798 struct btrfs_trans_handle *trans;
3799 struct btrfs_path path;
3800 struct btrfs_corrupt_block *corrupt;
3801 struct cache_extent *cache;
3802 struct btrfs_key key;
3807 if (cache_tree_empty(corrupt_blocks))
3810 trans = btrfs_start_transaction(root, 1);
3811 if (IS_ERR(trans)) {
3812 ret = PTR_ERR(trans);
3813 fprintf(stderr, "Error starting transaction: %s\n",
3817 btrfs_init_path(&path);
3818 cache = first_cache_extent(corrupt_blocks);
3820 corrupt = container_of(cache, struct btrfs_corrupt_block,
3822 level = corrupt->level;
3823 path.lowest_level = level;
3824 key.objectid = corrupt->key.objectid;
3825 key.type = corrupt->key.type;
3826 key.offset = corrupt->key.offset;
3829 * Here we don't want to do any tree balance, since it may
3830 * cause a balance with corrupted brother leaf/node,
3831 * so ins_len set to 0 here.
3832 * Balance will be done after all corrupt node/leaf is deleted.
3834 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3837 offset = btrfs_node_blockptr(path.nodes[level],
3840 /* Remove the ptr */
3841 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3845 * Remove the corresponding extent
3846 * return value is not concerned.
3848 btrfs_release_path(&path);
3849 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3850 0, root->root_key.objectid,
3852 cache = next_cache_extent(cache);
3855 /* Balance the btree using btrfs_search_slot() */
3856 cache = first_cache_extent(corrupt_blocks);
3858 corrupt = container_of(cache, struct btrfs_corrupt_block,
3860 memcpy(&key, &corrupt->key, sizeof(key));
3861 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3864 /* return will always >0 since it won't find the item */
3866 btrfs_release_path(&path);
3867 cache = next_cache_extent(cache);
3870 btrfs_commit_transaction(trans, root);
3871 btrfs_release_path(&path);
3875 static int check_fs_root(struct btrfs_root *root,
3876 struct cache_tree *root_cache,
3877 struct walk_control *wc)
3883 struct btrfs_path path;
3884 struct shared_node root_node;
3885 struct root_record *rec;
3886 struct btrfs_root_item *root_item = &root->root_item;
3887 struct cache_tree corrupt_blocks;
3888 struct orphan_data_extent *orphan;
3889 struct orphan_data_extent *tmp;
3890 enum btrfs_tree_block_status status;
3891 struct node_refs nrefs;
3894 * Reuse the corrupt_block cache tree to record corrupted tree block
3896 * Unlike the usage in extent tree check, here we do it in a per
3897 * fs/subvol tree base.
3899 cache_tree_init(&corrupt_blocks);
3900 root->fs_info->corrupt_blocks = &corrupt_blocks;
3902 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3903 rec = get_root_rec(root_cache, root->root_key.objectid);
3904 BUG_ON(IS_ERR(rec));
3905 if (btrfs_root_refs(root_item) > 0)
3906 rec->found_root_item = 1;
3909 btrfs_init_path(&path);
3910 memset(&root_node, 0, sizeof(root_node));
3911 cache_tree_init(&root_node.root_cache);
3912 cache_tree_init(&root_node.inode_cache);
3913 memset(&nrefs, 0, sizeof(nrefs));
3915 /* Move the orphan extent record to corresponding inode_record */
3916 list_for_each_entry_safe(orphan, tmp,
3917 &root->orphan_data_extents, list) {
3918 struct inode_record *inode;
3920 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3922 BUG_ON(IS_ERR(inode));
3923 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3924 list_move(&orphan->list, &inode->orphan_extents);
3927 level = btrfs_header_level(root->node);
3928 memset(wc->nodes, 0, sizeof(wc->nodes));
3929 wc->nodes[level] = &root_node;
3930 wc->active_node = level;
3931 wc->root_level = level;
3933 /* We may not have checked the root block, lets do that now */
3934 if (btrfs_is_leaf(root->node))
3935 status = btrfs_check_leaf(root, NULL, root->node);
3937 status = btrfs_check_node(root, NULL, root->node);
3938 if (status != BTRFS_TREE_BLOCK_CLEAN)
3941 if (btrfs_root_refs(root_item) > 0 ||
3942 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3943 path.nodes[level] = root->node;
3944 extent_buffer_get(root->node);
3945 path.slots[level] = 0;
3947 struct btrfs_key key;
3948 struct btrfs_disk_key found_key;
3950 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3951 level = root_item->drop_level;
3952 path.lowest_level = level;
3953 if (level > btrfs_header_level(root->node) ||
3954 level >= BTRFS_MAX_LEVEL) {
3955 error("ignoring invalid drop level: %u", level);
3958 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3961 btrfs_node_key(path.nodes[level], &found_key,
3963 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3964 sizeof(found_key)));
3968 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3974 wret = walk_up_tree(root, &path, wc, &level);
3981 btrfs_release_path(&path);
3983 if (!cache_tree_empty(&corrupt_blocks)) {
3984 struct cache_extent *cache;
3985 struct btrfs_corrupt_block *corrupt;
3987 printf("The following tree block(s) is corrupted in tree %llu:\n",
3988 root->root_key.objectid);
3989 cache = first_cache_extent(&corrupt_blocks);
3991 corrupt = container_of(cache,
3992 struct btrfs_corrupt_block,
3994 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3995 cache->start, corrupt->level,
3996 corrupt->key.objectid, corrupt->key.type,
3997 corrupt->key.offset);
3998 cache = next_cache_extent(cache);
4001 printf("Try to repair the btree for root %llu\n",
4002 root->root_key.objectid);
4003 ret = repair_btree(root, &corrupt_blocks);
4005 fprintf(stderr, "Failed to repair btree: %s\n",
4008 printf("Btree for root %llu is fixed\n",
4009 root->root_key.objectid);
4013 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4017 if (root_node.current) {
4018 root_node.current->checked = 1;
4019 maybe_free_inode_rec(&root_node.inode_cache,
4023 err = check_inode_recs(root, &root_node.inode_cache);
4027 free_corrupt_blocks_tree(&corrupt_blocks);
4028 root->fs_info->corrupt_blocks = NULL;
4029 free_orphan_data_extents(&root->orphan_data_extents);
4033 static int fs_root_objectid(u64 objectid)
4035 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4036 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4038 return is_fstree(objectid);
4041 static int check_fs_roots(struct btrfs_root *root,
4042 struct cache_tree *root_cache)
4044 struct btrfs_path path;
4045 struct btrfs_key key;
4046 struct walk_control wc;
4047 struct extent_buffer *leaf, *tree_node;
4048 struct btrfs_root *tmp_root;
4049 struct btrfs_root *tree_root = root->fs_info->tree_root;
4053 if (ctx.progress_enabled) {
4054 ctx.tp = TASK_FS_ROOTS;
4055 task_start(ctx.info);
4059 * Just in case we made any changes to the extent tree that weren't
4060 * reflected into the free space cache yet.
4063 reset_cached_block_groups(root->fs_info);
4064 memset(&wc, 0, sizeof(wc));
4065 cache_tree_init(&wc.shared);
4066 btrfs_init_path(&path);
4071 key.type = BTRFS_ROOT_ITEM_KEY;
4072 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4077 tree_node = tree_root->node;
4079 if (tree_node != tree_root->node) {
4080 free_root_recs_tree(root_cache);
4081 btrfs_release_path(&path);
4084 leaf = path.nodes[0];
4085 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4086 ret = btrfs_next_leaf(tree_root, &path);
4092 leaf = path.nodes[0];
4094 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4095 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4096 fs_root_objectid(key.objectid)) {
4097 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4098 tmp_root = btrfs_read_fs_root_no_cache(
4099 root->fs_info, &key);
4101 key.offset = (u64)-1;
4102 tmp_root = btrfs_read_fs_root(
4103 root->fs_info, &key);
4105 if (IS_ERR(tmp_root)) {
4109 ret = check_fs_root(tmp_root, root_cache, &wc);
4110 if (ret == -EAGAIN) {
4111 free_root_recs_tree(root_cache);
4112 btrfs_release_path(&path);
4117 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4118 btrfs_free_fs_root(tmp_root);
4119 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4120 key.type == BTRFS_ROOT_BACKREF_KEY) {
4121 process_root_ref(leaf, path.slots[0], &key,
4128 btrfs_release_path(&path);
4130 free_extent_cache_tree(&wc.shared);
4131 if (!cache_tree_empty(&wc.shared))
4132 fprintf(stderr, "warning line %d\n", __LINE__);
4134 task_stop(ctx.info);
4140 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4141 * INODE_REF/INODE_EXTREF match.
4143 * @root: the root of the fs/file tree
4144 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4145 * @key: the key of the DIR_ITEM/DIR_INDEX
4146 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4147 * distinguish root_dir between normal dir/file
4148 * @name: the name in the INODE_REF/INODE_EXTREF
4149 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4150 * @mode: the st_mode of INODE_ITEM
4152 * Return 0 if no error occurred.
4153 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4154 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4156 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4157 * not match for normal dir/file.
4159 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4160 struct btrfs_key *key, u64 index, char *name,
4161 u32 namelen, u32 mode)
4163 struct btrfs_path path;
4164 struct extent_buffer *node;
4165 struct btrfs_dir_item *di;
4166 struct btrfs_key location;
4167 char namebuf[BTRFS_NAME_LEN] = {0};
4177 btrfs_init_path(&path);
4178 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4180 ret = DIR_ITEM_MISSING;
4184 /* Process root dir and goto out*/
4187 ret = ROOT_DIR_ERROR;
4189 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4191 ref_key->type == BTRFS_INODE_REF_KEY ?
4193 ref_key->objectid, ref_key->offset,
4194 key->type == BTRFS_DIR_ITEM_KEY ?
4195 "DIR_ITEM" : "DIR_INDEX");
4203 /* Process normal file/dir */
4205 ret = DIR_ITEM_MISSING;
4207 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4209 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4210 ref_key->objectid, ref_key->offset,
4211 key->type == BTRFS_DIR_ITEM_KEY ?
4212 "DIR_ITEM" : "DIR_INDEX",
4213 key->objectid, key->offset, namelen, name,
4214 imode_to_type(mode));
4218 /* Check whether inode_id/filetype/name match */
4219 node = path.nodes[0];
4220 slot = path.slots[0];
4221 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4222 total = btrfs_item_size_nr(node, slot);
4223 while (cur < total) {
4224 ret = DIR_ITEM_MISMATCH;
4225 name_len = btrfs_dir_name_len(node, di);
4226 data_len = btrfs_dir_data_len(node, di);
4228 btrfs_dir_item_key_to_cpu(node, di, &location);
4229 if (location.objectid != ref_key->objectid ||
4230 location.type != BTRFS_INODE_ITEM_KEY ||
4231 location.offset != 0)
4234 filetype = btrfs_dir_type(node, di);
4235 if (imode_to_type(mode) != filetype)
4238 if (name_len <= BTRFS_NAME_LEN) {
4241 len = BTRFS_NAME_LEN;
4242 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4244 key->type == BTRFS_DIR_ITEM_KEY ?
4245 "DIR_ITEM" : "DIR_INDEX",
4246 key->objectid, key->offset, name_len);
4248 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4249 if (len != namelen || strncmp(namebuf, name, len))
4255 len = sizeof(*di) + name_len + data_len;
4256 di = (struct btrfs_dir_item *)((char *)di + len);
4259 if (ret == DIR_ITEM_MISMATCH)
4261 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4263 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4264 ref_key->objectid, ref_key->offset,
4265 key->type == BTRFS_DIR_ITEM_KEY ?
4266 "DIR_ITEM" : "DIR_INDEX",
4267 key->objectid, key->offset, namelen, name,
4268 imode_to_type(mode));
4270 btrfs_release_path(&path);
4275 * Traverse the given INODE_REF and call find_dir_item() to find related
4276 * DIR_ITEM/DIR_INDEX.
4278 * @root: the root of the fs/file tree
4279 * @ref_key: the key of the INODE_REF
4280 * @refs: the count of INODE_REF
4281 * @mode: the st_mode of INODE_ITEM
4283 * Return 0 if no error occurred.
4285 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4286 struct extent_buffer *node, int slot, u64 *refs,
4289 struct btrfs_key key;
4290 struct btrfs_inode_ref *ref;
4291 char namebuf[BTRFS_NAME_LEN] = {0};
4299 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4300 total = btrfs_item_size_nr(node, slot);
4303 /* Update inode ref count */
4306 index = btrfs_inode_ref_index(node, ref);
4307 name_len = btrfs_inode_ref_name_len(node, ref);
4308 if (cur + sizeof(*ref) + name_len > total ||
4309 name_len > BTRFS_NAME_LEN) {
4310 warning("root %llu INODE_REF[%llu %llu] name too long",
4311 root->objectid, ref_key->objectid, ref_key->offset);
4313 if (total < cur + sizeof(*ref))
4315 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4320 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4322 /* Check root dir ref name */
4323 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4324 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4325 root->objectid, ref_key->objectid, ref_key->offset,
4327 err |= ROOT_DIR_ERROR;
4330 /* Find related DIR_INDEX */
4331 key.objectid = ref_key->offset;
4332 key.type = BTRFS_DIR_INDEX_KEY;
4334 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4337 /* Find related dir_item */
4338 key.objectid = ref_key->offset;
4339 key.type = BTRFS_DIR_ITEM_KEY;
4340 key.offset = btrfs_name_hash(namebuf, len);
4341 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4344 len = sizeof(*ref) + name_len;
4345 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4355 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4356 * DIR_ITEM/DIR_INDEX.
4358 * @root: the root of the fs/file tree
4359 * @ref_key: the key of the INODE_EXTREF
4360 * @refs: the count of INODE_EXTREF
4361 * @mode: the st_mode of INODE_ITEM
4363 * Return 0 if no error occurred.
4365 static int check_inode_extref(struct btrfs_root *root,
4366 struct btrfs_key *ref_key,
4367 struct extent_buffer *node, int slot, u64 *refs,
4370 struct btrfs_key key;
4371 struct btrfs_inode_extref *extref;
4372 char namebuf[BTRFS_NAME_LEN] = {0};
4382 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4383 total = btrfs_item_size_nr(node, slot);
4386 /* update inode ref count */
4388 name_len = btrfs_inode_extref_name_len(node, extref);
4389 index = btrfs_inode_extref_index(node, extref);
4390 parent = btrfs_inode_extref_parent(node, extref);
4391 if (name_len <= BTRFS_NAME_LEN) {
4394 len = BTRFS_NAME_LEN;
4395 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4396 root->objectid, ref_key->objectid, ref_key->offset);
4398 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4400 /* Check root dir ref name */
4401 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4402 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4403 root->objectid, ref_key->objectid, ref_key->offset,
4405 err |= ROOT_DIR_ERROR;
4408 /* find related dir_index */
4409 key.objectid = parent;
4410 key.type = BTRFS_DIR_INDEX_KEY;
4412 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4415 /* find related dir_item */
4416 key.objectid = parent;
4417 key.type = BTRFS_DIR_ITEM_KEY;
4418 key.offset = btrfs_name_hash(namebuf, len);
4419 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4422 len = sizeof(*extref) + name_len;
4423 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4433 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4434 * DIR_ITEM/DIR_INDEX match.
4436 * @root: the root of the fs/file tree
4437 * @key: the key of the INODE_REF/INODE_EXTREF
4438 * @name: the name in the INODE_REF/INODE_EXTREF
4439 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4440 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4442 * @ext_ref: the EXTENDED_IREF feature
4444 * Return 0 if no error occurred.
4445 * Return >0 for error bitmap
4447 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4448 char *name, int namelen, u64 index,
4449 unsigned int ext_ref)
4451 struct btrfs_path path;
4452 struct btrfs_inode_ref *ref;
4453 struct btrfs_inode_extref *extref;
4454 struct extent_buffer *node;
4455 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4466 btrfs_init_path(&path);
4467 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4469 ret = INODE_REF_MISSING;
4473 node = path.nodes[0];
4474 slot = path.slots[0];
4476 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4477 total = btrfs_item_size_nr(node, slot);
4479 /* Iterate all entry of INODE_REF */
4480 while (cur < total) {
4481 ret = INODE_REF_MISSING;
4483 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4484 ref_index = btrfs_inode_ref_index(node, ref);
4485 if (index != (u64)-1 && index != ref_index)
4488 if (cur + sizeof(*ref) + ref_namelen > total ||
4489 ref_namelen > BTRFS_NAME_LEN) {
4490 warning("root %llu INODE %s[%llu %llu] name too long",
4492 key->type == BTRFS_INODE_REF_KEY ?
4494 key->objectid, key->offset);
4496 if (cur + sizeof(*ref) > total)
4498 len = min_t(u32, total - cur - sizeof(*ref),
4504 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4507 if (len != namelen || strncmp(ref_namebuf, name, len))
4513 len = sizeof(*ref) + ref_namelen;
4514 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4519 /* Skip if not support EXTENDED_IREF feature */
4523 btrfs_release_path(&path);
4524 btrfs_init_path(&path);
4526 dir_id = key->offset;
4527 key->type = BTRFS_INODE_EXTREF_KEY;
4528 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4530 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4532 ret = INODE_REF_MISSING;
4536 node = path.nodes[0];
4537 slot = path.slots[0];
4539 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4541 total = btrfs_item_size_nr(node, slot);
4543 /* Iterate all entry of INODE_EXTREF */
4544 while (cur < total) {
4545 ret = INODE_REF_MISSING;
4547 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4548 ref_index = btrfs_inode_extref_index(node, extref);
4549 parent = btrfs_inode_extref_parent(node, extref);
4550 if (index != (u64)-1 && index != ref_index)
4553 if (parent != dir_id)
4556 if (ref_namelen <= BTRFS_NAME_LEN) {
4559 len = BTRFS_NAME_LEN;
4560 warning("root %llu INODE %s[%llu %llu] name too long",
4562 key->type == BTRFS_INODE_REF_KEY ?
4564 key->objectid, key->offset);
4566 read_extent_buffer(node, ref_namebuf,
4567 (unsigned long)(extref + 1), len);
4569 if (len != namelen || strncmp(ref_namebuf, name, len))
4576 len = sizeof(*extref) + ref_namelen;
4577 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4582 btrfs_release_path(&path);
4587 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4588 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4590 * @root: the root of the fs/file tree
4591 * @key: the key of the INODE_REF/INODE_EXTREF
4592 * @size: the st_size of the INODE_ITEM
4593 * @ext_ref: the EXTENDED_IREF feature
4595 * Return 0 if no error occurred.
4597 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4598 struct extent_buffer *node, int slot, u64 *size,
4599 unsigned int ext_ref)
4601 struct btrfs_dir_item *di;
4602 struct btrfs_inode_item *ii;
4603 struct btrfs_path path;
4604 struct btrfs_key location;
4605 char namebuf[BTRFS_NAME_LEN] = {0};
4618 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4619 * ignore index check.
4621 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4623 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4624 total = btrfs_item_size_nr(node, slot);
4626 while (cur < total) {
4627 data_len = btrfs_dir_data_len(node, di);
4629 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4630 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4631 "DIR_ITEM" : "DIR_INDEX",
4632 key->objectid, key->offset, data_len);
4634 name_len = btrfs_dir_name_len(node, di);
4635 if (name_len <= BTRFS_NAME_LEN) {
4638 len = BTRFS_NAME_LEN;
4639 warning("root %llu %s[%llu %llu] name too long",
4641 key->type == BTRFS_DIR_ITEM_KEY ?
4642 "DIR_ITEM" : "DIR_INDEX",
4643 key->objectid, key->offset);
4645 (*size) += name_len;
4647 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4648 filetype = btrfs_dir_type(node, di);
4650 btrfs_init_path(&path);
4651 btrfs_dir_item_key_to_cpu(node, di, &location);
4653 /* Ignore related ROOT_ITEM check */
4654 if (location.type == BTRFS_ROOT_ITEM_KEY)
4657 /* Check relative INODE_ITEM(existence/filetype) */
4658 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4660 err |= INODE_ITEM_MISSING;
4661 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4662 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4663 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4664 key->offset, location.objectid, name_len,
4669 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4670 struct btrfs_inode_item);
4671 mode = btrfs_inode_mode(path.nodes[0], ii);
4673 if (imode_to_type(mode) != filetype) {
4674 err |= INODE_ITEM_MISMATCH;
4675 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4676 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4677 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4678 key->offset, name_len, namebuf, filetype);
4681 /* Check relative INODE_REF/INODE_EXTREF */
4682 location.type = BTRFS_INODE_REF_KEY;
4683 location.offset = key->objectid;
4684 ret = find_inode_ref(root, &location, namebuf, len,
4687 if (ret & INODE_REF_MISSING)
4688 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4689 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4690 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4691 key->offset, name_len, namebuf, filetype);
4694 btrfs_release_path(&path);
4695 len = sizeof(*di) + name_len + data_len;
4696 di = (struct btrfs_dir_item *)((char *)di + len);
4699 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4700 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4701 root->objectid, key->objectid, key->offset);
4710 * Check file extent datasum/hole, update the size of the file extents,
4711 * check and update the last offset of the file extent.
4713 * @root: the root of fs/file tree.
4714 * @fkey: the key of the file extent.
4715 * @nodatasum: INODE_NODATASUM feature.
4716 * @size: the sum of all EXTENT_DATA items size for this inode.
4717 * @end: the offset of the last extent.
4719 * Return 0 if no error occurred.
4721 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4722 struct extent_buffer *node, int slot,
4723 unsigned int nodatasum, u64 *size, u64 *end)
4725 struct btrfs_file_extent_item *fi;
4728 u64 extent_num_bytes;
4730 u64 csum_found; /* In byte size, sectorsize aligned */
4731 u64 search_start; /* Logical range start we search for csum */
4732 u64 search_len; /* Logical range len we search for csum */
4733 unsigned int extent_type;
4734 unsigned int is_hole;
4739 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4741 /* Check inline extent */
4742 extent_type = btrfs_file_extent_type(node, fi);
4743 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4744 struct btrfs_item *e = btrfs_item_nr(slot);
4745 u32 item_inline_len;
4747 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4748 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4749 compressed = btrfs_file_extent_compression(node, fi);
4750 if (extent_num_bytes == 0) {
4752 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4753 root->objectid, fkey->objectid, fkey->offset);
4754 err |= FILE_EXTENT_ERROR;
4756 if (!compressed && extent_num_bytes != item_inline_len) {
4758 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4759 root->objectid, fkey->objectid, fkey->offset,
4760 extent_num_bytes, item_inline_len);
4761 err |= FILE_EXTENT_ERROR;
4763 *size += extent_num_bytes;
4767 /* Check extent type */
4768 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4769 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4770 err |= FILE_EXTENT_ERROR;
4771 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4772 root->objectid, fkey->objectid, fkey->offset);
4776 /* Check REG_EXTENT/PREALLOC_EXTENT */
4777 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4778 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4779 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4780 extent_offset = btrfs_file_extent_offset(node, fi);
4781 compressed = btrfs_file_extent_compression(node, fi);
4782 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4785 * Check EXTENT_DATA csum
4787 * For plain (uncompressed) extent, we should only check the range
4788 * we're referring to, as it's possible that part of prealloc extent
4789 * has been written, and has csum:
4791 * |<--- Original large preallocated extent A ---->|
4792 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4795 * For compressed extent, we should check the whole range.
4798 search_start = disk_bytenr + extent_offset;
4799 search_len = extent_num_bytes;
4801 search_start = disk_bytenr;
4802 search_len = disk_num_bytes;
4804 ret = count_csum_range(root, search_start, search_len, &csum_found);
4805 if (csum_found > 0 && nodatasum) {
4806 err |= ODD_CSUM_ITEM;
4807 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4808 root->objectid, fkey->objectid, fkey->offset);
4809 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4810 !is_hole && (ret < 0 || csum_found < search_len)) {
4811 err |= CSUM_ITEM_MISSING;
4812 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4813 root->objectid, fkey->objectid, fkey->offset,
4814 csum_found, search_len);
4815 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4816 err |= ODD_CSUM_ITEM;
4817 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4818 root->objectid, fkey->objectid, fkey->offset, csum_found);
4821 /* Check EXTENT_DATA hole */
4822 if (no_holes && is_hole) {
4823 err |= FILE_EXTENT_ERROR;
4824 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4825 root->objectid, fkey->objectid, fkey->offset);
4826 } else if (!no_holes && *end != fkey->offset) {
4827 err |= FILE_EXTENT_ERROR;
4828 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4829 root->objectid, fkey->objectid, fkey->offset);
4832 *end += extent_num_bytes;
4834 *size += extent_num_bytes;
4840 * Check INODE_ITEM and related ITEMs (the same inode number)
4841 * 1. check link count
4842 * 2. check inode ref/extref
4843 * 3. check dir item/index
4845 * @ext_ref: the EXTENDED_IREF feature
4847 * Return 0 if no error occurred.
4848 * Return >0 for error or hit the traversal is done(by error bitmap)
4850 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4851 unsigned int ext_ref)
4853 struct extent_buffer *node;
4854 struct btrfs_inode_item *ii;
4855 struct btrfs_key key;
4864 u64 extent_size = 0;
4866 unsigned int nodatasum;
4871 node = path->nodes[0];
4872 slot = path->slots[0];
4874 btrfs_item_key_to_cpu(node, &key, slot);
4875 inode_id = key.objectid;
4877 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4878 ret = btrfs_next_item(root, path);
4884 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4885 isize = btrfs_inode_size(node, ii);
4886 nbytes = btrfs_inode_nbytes(node, ii);
4887 mode = btrfs_inode_mode(node, ii);
4888 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4889 nlink = btrfs_inode_nlink(node, ii);
4890 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4893 ret = btrfs_next_item(root, path);
4895 /* out will fill 'err' rusing current statistics */
4897 } else if (ret > 0) {
4902 node = path->nodes[0];
4903 slot = path->slots[0];
4904 btrfs_item_key_to_cpu(node, &key, slot);
4905 if (key.objectid != inode_id)
4909 case BTRFS_INODE_REF_KEY:
4910 ret = check_inode_ref(root, &key, node, slot, &refs,
4914 case BTRFS_INODE_EXTREF_KEY:
4915 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4916 warning("root %llu EXTREF[%llu %llu] isn't supported",
4917 root->objectid, key.objectid,
4919 ret = check_inode_extref(root, &key, node, slot, &refs,
4923 case BTRFS_DIR_ITEM_KEY:
4924 case BTRFS_DIR_INDEX_KEY:
4926 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4927 root->objectid, inode_id,
4928 imode_to_type(mode), key.objectid,
4931 ret = check_dir_item(root, &key, node, slot, &size,
4935 case BTRFS_EXTENT_DATA_KEY:
4937 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4938 root->objectid, inode_id, key.objectid,
4941 ret = check_file_extent(root, &key, node, slot,
4942 nodatasum, &extent_size,
4946 case BTRFS_XATTR_ITEM_KEY:
4949 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4950 key.objectid, key.type, key.offset);
4955 /* verify INODE_ITEM nlink/isize/nbytes */
4958 err |= LINK_COUNT_ERROR;
4959 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4960 root->objectid, inode_id, nlink);
4964 * Just a warning, as dir inode nbytes is just an
4965 * instructive value.
4967 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4968 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4969 root->objectid, inode_id, root->nodesize);
4972 if (isize != size) {
4974 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4975 root->objectid, inode_id, isize, size);
4978 if (nlink != refs) {
4979 err |= LINK_COUNT_ERROR;
4980 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4981 root->objectid, inode_id, nlink, refs);
4982 } else if (!nlink) {
4986 if (!nbytes && !no_holes && extent_end < isize) {
4987 err |= NBYTES_ERROR;
4988 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4989 root->objectid, inode_id, isize);
4992 if (nbytes != extent_size) {
4993 err |= NBYTES_ERROR;
4994 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4995 root->objectid, inode_id, nbytes, extent_size);
5002 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5004 struct btrfs_path path;
5005 struct btrfs_key key;
5009 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5010 key.type = BTRFS_INODE_ITEM_KEY;
5013 /* For root being dropped, we don't need to check first inode */
5014 if (btrfs_root_refs(&root->root_item) == 0 &&
5015 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5019 btrfs_init_path(&path);
5021 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5026 err |= INODE_ITEM_MISSING;
5027 error("first inode item of root %llu is missing",
5031 err |= check_inode_item(root, &path, ext_ref);
5036 btrfs_release_path(&path);
5041 * Iterate all item on the tree and call check_inode_item() to check.
5043 * @root: the root of the tree to be checked.
5044 * @ext_ref: the EXTENDED_IREF feature
5046 * Return 0 if no error found.
5047 * Return <0 for error.
5049 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5051 struct btrfs_path path;
5052 struct node_refs nrefs;
5053 struct btrfs_root_item *root_item = &root->root_item;
5059 * We need to manually check the first inode item(256)
5060 * As the following traversal function will only start from
5061 * the first inode item in the leaf, if inode item(256) is missing
5062 * we will just skip it forever.
5064 ret = check_fs_first_inode(root, ext_ref);
5068 memset(&nrefs, 0, sizeof(nrefs));
5069 level = btrfs_header_level(root->node);
5070 btrfs_init_path(&path);
5072 if (btrfs_root_refs(root_item) > 0 ||
5073 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5074 path.nodes[level] = root->node;
5075 path.slots[level] = 0;
5076 extent_buffer_get(root->node);
5078 struct btrfs_key key;
5080 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5081 level = root_item->drop_level;
5082 path.lowest_level = level;
5083 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5090 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5093 /* if ret is negative, walk shall stop */
5099 ret = walk_up_tree_v2(root, &path, &level);
5101 /* Normal exit, reset ret to err */
5108 btrfs_release_path(&path);
5113 * Find the relative ref for root_ref and root_backref.
5115 * @root: the root of the root tree.
5116 * @ref_key: the key of the root ref.
5118 * Return 0 if no error occurred.
5120 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5121 struct extent_buffer *node, int slot)
5123 struct btrfs_path path;
5124 struct btrfs_key key;
5125 struct btrfs_root_ref *ref;
5126 struct btrfs_root_ref *backref;
5127 char ref_name[BTRFS_NAME_LEN] = {0};
5128 char backref_name[BTRFS_NAME_LEN] = {0};
5134 u32 backref_namelen;
5139 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5140 ref_dirid = btrfs_root_ref_dirid(node, ref);
5141 ref_seq = btrfs_root_ref_sequence(node, ref);
5142 ref_namelen = btrfs_root_ref_name_len(node, ref);
5144 if (ref_namelen <= BTRFS_NAME_LEN) {
5147 len = BTRFS_NAME_LEN;
5148 warning("%s[%llu %llu] ref_name too long",
5149 ref_key->type == BTRFS_ROOT_REF_KEY ?
5150 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5153 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5155 /* Find relative root_ref */
5156 key.objectid = ref_key->offset;
5157 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5158 key.offset = ref_key->objectid;
5160 btrfs_init_path(&path);
5161 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5163 err |= ROOT_REF_MISSING;
5164 error("%s[%llu %llu] couldn't find relative ref",
5165 ref_key->type == BTRFS_ROOT_REF_KEY ?
5166 "ROOT_REF" : "ROOT_BACKREF",
5167 ref_key->objectid, ref_key->offset);
5171 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5172 struct btrfs_root_ref);
5173 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5174 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5175 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5177 if (backref_namelen <= BTRFS_NAME_LEN) {
5178 len = backref_namelen;
5180 len = BTRFS_NAME_LEN;
5181 warning("%s[%llu %llu] ref_name too long",
5182 key.type == BTRFS_ROOT_REF_KEY ?
5183 "ROOT_REF" : "ROOT_BACKREF",
5184 key.objectid, key.offset);
5186 read_extent_buffer(path.nodes[0], backref_name,
5187 (unsigned long)(backref + 1), len);
5189 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5190 ref_namelen != backref_namelen ||
5191 strncmp(ref_name, backref_name, len)) {
5192 err |= ROOT_REF_MISMATCH;
5193 error("%s[%llu %llu] mismatch relative ref",
5194 ref_key->type == BTRFS_ROOT_REF_KEY ?
5195 "ROOT_REF" : "ROOT_BACKREF",
5196 ref_key->objectid, ref_key->offset);
5199 btrfs_release_path(&path);
5204 * Check all fs/file tree in low_memory mode.
5206 * 1. for fs tree root item, call check_fs_root_v2()
5207 * 2. for fs tree root ref/backref, call check_root_ref()
5209 * Return 0 if no error occurred.
5211 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5213 struct btrfs_root *tree_root = fs_info->tree_root;
5214 struct btrfs_root *cur_root = NULL;
5215 struct btrfs_path path;
5216 struct btrfs_key key;
5217 struct extent_buffer *node;
5218 unsigned int ext_ref;
5223 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5225 btrfs_init_path(&path);
5226 key.objectid = BTRFS_FS_TREE_OBJECTID;
5228 key.type = BTRFS_ROOT_ITEM_KEY;
5230 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5234 } else if (ret > 0) {
5240 node = path.nodes[0];
5241 slot = path.slots[0];
5242 btrfs_item_key_to_cpu(node, &key, slot);
5243 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5245 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5246 fs_root_objectid(key.objectid)) {
5247 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5248 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5251 key.offset = (u64)-1;
5252 cur_root = btrfs_read_fs_root(fs_info, &key);
5255 if (IS_ERR(cur_root)) {
5256 error("Fail to read fs/subvol tree: %lld",
5262 ret = check_fs_root_v2(cur_root, ext_ref);
5265 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5266 btrfs_free_fs_root(cur_root);
5267 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5268 key.type == BTRFS_ROOT_BACKREF_KEY) {
5269 ret = check_root_ref(tree_root, &key, node, slot);
5273 ret = btrfs_next_item(tree_root, &path);
5283 btrfs_release_path(&path);
5287 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5289 struct list_head *cur = rec->backrefs.next;
5290 struct extent_backref *back;
5291 struct tree_backref *tback;
5292 struct data_backref *dback;
5296 while(cur != &rec->backrefs) {
5297 back = to_extent_backref(cur);
5299 if (!back->found_extent_tree) {
5303 if (back->is_data) {
5304 dback = to_data_backref(back);
5305 fprintf(stderr, "Backref %llu %s %llu"
5306 " owner %llu offset %llu num_refs %lu"
5307 " not found in extent tree\n",
5308 (unsigned long long)rec->start,
5309 back->full_backref ?
5311 back->full_backref ?
5312 (unsigned long long)dback->parent:
5313 (unsigned long long)dback->root,
5314 (unsigned long long)dback->owner,
5315 (unsigned long long)dback->offset,
5316 (unsigned long)dback->num_refs);
5318 tback = to_tree_backref(back);
5319 fprintf(stderr, "Backref %llu parent %llu"
5320 " root %llu not found in extent tree\n",
5321 (unsigned long long)rec->start,
5322 (unsigned long long)tback->parent,
5323 (unsigned long long)tback->root);
5326 if (!back->is_data && !back->found_ref) {
5330 tback = to_tree_backref(back);
5331 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5332 (unsigned long long)rec->start,
5333 back->full_backref ? "parent" : "root",
5334 back->full_backref ?
5335 (unsigned long long)tback->parent :
5336 (unsigned long long)tback->root, back);
5338 if (back->is_data) {
5339 dback = to_data_backref(back);
5340 if (dback->found_ref != dback->num_refs) {
5344 fprintf(stderr, "Incorrect local backref count"
5345 " on %llu %s %llu owner %llu"
5346 " offset %llu found %u wanted %u back %p\n",
5347 (unsigned long long)rec->start,
5348 back->full_backref ?
5350 back->full_backref ?
5351 (unsigned long long)dback->parent:
5352 (unsigned long long)dback->root,
5353 (unsigned long long)dback->owner,
5354 (unsigned long long)dback->offset,
5355 dback->found_ref, dback->num_refs, back);
5357 if (dback->disk_bytenr != rec->start) {
5361 fprintf(stderr, "Backref disk bytenr does not"
5362 " match extent record, bytenr=%llu, "
5363 "ref bytenr=%llu\n",
5364 (unsigned long long)rec->start,
5365 (unsigned long long)dback->disk_bytenr);
5368 if (dback->bytes != rec->nr) {
5372 fprintf(stderr, "Backref bytes do not match "
5373 "extent backref, bytenr=%llu, ref "
5374 "bytes=%llu, backref bytes=%llu\n",
5375 (unsigned long long)rec->start,
5376 (unsigned long long)rec->nr,
5377 (unsigned long long)dback->bytes);
5380 if (!back->is_data) {
5383 dback = to_data_backref(back);
5384 found += dback->found_ref;
5387 if (found != rec->refs) {
5391 fprintf(stderr, "Incorrect global backref count "
5392 "on %llu found %llu wanted %llu\n",
5393 (unsigned long long)rec->start,
5394 (unsigned long long)found,
5395 (unsigned long long)rec->refs);
5401 static int free_all_extent_backrefs(struct extent_record *rec)
5403 struct extent_backref *back;
5404 struct list_head *cur;
5405 while (!list_empty(&rec->backrefs)) {
5406 cur = rec->backrefs.next;
5407 back = to_extent_backref(cur);
5414 static void free_extent_record_cache(struct cache_tree *extent_cache)
5416 struct cache_extent *cache;
5417 struct extent_record *rec;
5420 cache = first_cache_extent(extent_cache);
5423 rec = container_of(cache, struct extent_record, cache);
5424 remove_cache_extent(extent_cache, cache);
5425 free_all_extent_backrefs(rec);
5430 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5431 struct extent_record *rec)
5433 if (rec->content_checked && rec->owner_ref_checked &&
5434 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5435 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5436 !rec->bad_full_backref && !rec->crossing_stripes &&
5437 !rec->wrong_chunk_type) {
5438 remove_cache_extent(extent_cache, &rec->cache);
5439 free_all_extent_backrefs(rec);
5440 list_del_init(&rec->list);
5446 static int check_owner_ref(struct btrfs_root *root,
5447 struct extent_record *rec,
5448 struct extent_buffer *buf)
5450 struct extent_backref *node;
5451 struct tree_backref *back;
5452 struct btrfs_root *ref_root;
5453 struct btrfs_key key;
5454 struct btrfs_path path;
5455 struct extent_buffer *parent;
5460 list_for_each_entry(node, &rec->backrefs, list) {
5463 if (!node->found_ref)
5465 if (node->full_backref)
5467 back = to_tree_backref(node);
5468 if (btrfs_header_owner(buf) == back->root)
5471 BUG_ON(rec->is_root);
5473 /* try to find the block by search corresponding fs tree */
5474 key.objectid = btrfs_header_owner(buf);
5475 key.type = BTRFS_ROOT_ITEM_KEY;
5476 key.offset = (u64)-1;
5478 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5479 if (IS_ERR(ref_root))
5482 level = btrfs_header_level(buf);
5484 btrfs_item_key_to_cpu(buf, &key, 0);
5486 btrfs_node_key_to_cpu(buf, &key, 0);
5488 btrfs_init_path(&path);
5489 path.lowest_level = level + 1;
5490 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5494 parent = path.nodes[level + 1];
5495 if (parent && buf->start == btrfs_node_blockptr(parent,
5496 path.slots[level + 1]))
5499 btrfs_release_path(&path);
5500 return found ? 0 : 1;
5503 static int is_extent_tree_record(struct extent_record *rec)
5505 struct list_head *cur = rec->backrefs.next;
5506 struct extent_backref *node;
5507 struct tree_backref *back;
5510 while(cur != &rec->backrefs) {
5511 node = to_extent_backref(cur);
5515 back = to_tree_backref(node);
5516 if (node->full_backref)
5518 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5525 static int record_bad_block_io(struct btrfs_fs_info *info,
5526 struct cache_tree *extent_cache,
5529 struct extent_record *rec;
5530 struct cache_extent *cache;
5531 struct btrfs_key key;
5533 cache = lookup_cache_extent(extent_cache, start, len);
5537 rec = container_of(cache, struct extent_record, cache);
5538 if (!is_extent_tree_record(rec))
5541 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5542 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5545 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5546 struct extent_buffer *buf, int slot)
5548 if (btrfs_header_level(buf)) {
5549 struct btrfs_key_ptr ptr1, ptr2;
5551 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5552 sizeof(struct btrfs_key_ptr));
5553 read_extent_buffer(buf, &ptr2,
5554 btrfs_node_key_ptr_offset(slot + 1),
5555 sizeof(struct btrfs_key_ptr));
5556 write_extent_buffer(buf, &ptr1,
5557 btrfs_node_key_ptr_offset(slot + 1),
5558 sizeof(struct btrfs_key_ptr));
5559 write_extent_buffer(buf, &ptr2,
5560 btrfs_node_key_ptr_offset(slot),
5561 sizeof(struct btrfs_key_ptr));
5563 struct btrfs_disk_key key;
5564 btrfs_node_key(buf, &key, 0);
5565 btrfs_fixup_low_keys(root, path, &key,
5566 btrfs_header_level(buf) + 1);
5569 struct btrfs_item *item1, *item2;
5570 struct btrfs_key k1, k2;
5571 char *item1_data, *item2_data;
5572 u32 item1_offset, item2_offset, item1_size, item2_size;
5574 item1 = btrfs_item_nr(slot);
5575 item2 = btrfs_item_nr(slot + 1);
5576 btrfs_item_key_to_cpu(buf, &k1, slot);
5577 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5578 item1_offset = btrfs_item_offset(buf, item1);
5579 item2_offset = btrfs_item_offset(buf, item2);
5580 item1_size = btrfs_item_size(buf, item1);
5581 item2_size = btrfs_item_size(buf, item2);
5583 item1_data = malloc(item1_size);
5586 item2_data = malloc(item2_size);
5592 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5593 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5595 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5596 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5600 btrfs_set_item_offset(buf, item1, item2_offset);
5601 btrfs_set_item_offset(buf, item2, item1_offset);
5602 btrfs_set_item_size(buf, item1, item2_size);
5603 btrfs_set_item_size(buf, item2, item1_size);
5605 path->slots[0] = slot;
5606 btrfs_set_item_key_unsafe(root, path, &k2);
5607 path->slots[0] = slot + 1;
5608 btrfs_set_item_key_unsafe(root, path, &k1);
5613 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5615 struct extent_buffer *buf;
5616 struct btrfs_key k1, k2;
5618 int level = path->lowest_level;
5621 buf = path->nodes[level];
5622 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5624 btrfs_node_key_to_cpu(buf, &k1, i);
5625 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5627 btrfs_item_key_to_cpu(buf, &k1, i);
5628 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5630 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5632 ret = swap_values(root, path, buf, i);
5635 btrfs_mark_buffer_dirty(buf);
5641 static int delete_bogus_item(struct btrfs_root *root,
5642 struct btrfs_path *path,
5643 struct extent_buffer *buf, int slot)
5645 struct btrfs_key key;
5646 int nritems = btrfs_header_nritems(buf);
5648 btrfs_item_key_to_cpu(buf, &key, slot);
5650 /* These are all the keys we can deal with missing. */
5651 if (key.type != BTRFS_DIR_INDEX_KEY &&
5652 key.type != BTRFS_EXTENT_ITEM_KEY &&
5653 key.type != BTRFS_METADATA_ITEM_KEY &&
5654 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5655 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5658 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5659 (unsigned long long)key.objectid, key.type,
5660 (unsigned long long)key.offset, slot, buf->start);
5661 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5662 btrfs_item_nr_offset(slot + 1),
5663 sizeof(struct btrfs_item) *
5664 (nritems - slot - 1));
5665 btrfs_set_header_nritems(buf, nritems - 1);
5667 struct btrfs_disk_key disk_key;
5669 btrfs_item_key(buf, &disk_key, 0);
5670 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5672 btrfs_mark_buffer_dirty(buf);
5676 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5678 struct extent_buffer *buf;
5682 /* We should only get this for leaves */
5683 BUG_ON(path->lowest_level);
5684 buf = path->nodes[0];
5686 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5687 unsigned int shift = 0, offset;
5689 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5690 BTRFS_LEAF_DATA_SIZE(root)) {
5691 if (btrfs_item_end_nr(buf, i) >
5692 BTRFS_LEAF_DATA_SIZE(root)) {
5693 ret = delete_bogus_item(root, path, buf, i);
5696 fprintf(stderr, "item is off the end of the "
5697 "leaf, can't fix\n");
5701 shift = BTRFS_LEAF_DATA_SIZE(root) -
5702 btrfs_item_end_nr(buf, i);
5703 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5704 btrfs_item_offset_nr(buf, i - 1)) {
5705 if (btrfs_item_end_nr(buf, i) >
5706 btrfs_item_offset_nr(buf, i - 1)) {
5707 ret = delete_bogus_item(root, path, buf, i);
5710 fprintf(stderr, "items overlap, can't fix\n");
5714 shift = btrfs_item_offset_nr(buf, i - 1) -
5715 btrfs_item_end_nr(buf, i);
5720 printf("Shifting item nr %d by %u bytes in block %llu\n",
5721 i, shift, (unsigned long long)buf->start);
5722 offset = btrfs_item_offset_nr(buf, i);
5723 memmove_extent_buffer(buf,
5724 btrfs_leaf_data(buf) + offset + shift,
5725 btrfs_leaf_data(buf) + offset,
5726 btrfs_item_size_nr(buf, i));
5727 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5729 btrfs_mark_buffer_dirty(buf);
5733 * We may have moved things, in which case we want to exit so we don't
5734 * write those changes out. Once we have proper abort functionality in
5735 * progs this can be changed to something nicer.
5742 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5743 * then just return -EIO.
5745 static int try_to_fix_bad_block(struct btrfs_root *root,
5746 struct extent_buffer *buf,
5747 enum btrfs_tree_block_status status)
5749 struct btrfs_trans_handle *trans;
5750 struct ulist *roots;
5751 struct ulist_node *node;
5752 struct btrfs_root *search_root;
5753 struct btrfs_path path;
5754 struct ulist_iterator iter;
5755 struct btrfs_key root_key, key;
5758 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5759 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5762 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5766 btrfs_init_path(&path);
5767 ULIST_ITER_INIT(&iter);
5768 while ((node = ulist_next(roots, &iter))) {
5769 root_key.objectid = node->val;
5770 root_key.type = BTRFS_ROOT_ITEM_KEY;
5771 root_key.offset = (u64)-1;
5773 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5780 trans = btrfs_start_transaction(search_root, 0);
5781 if (IS_ERR(trans)) {
5782 ret = PTR_ERR(trans);
5786 path.lowest_level = btrfs_header_level(buf);
5787 path.skip_check_block = 1;
5788 if (path.lowest_level)
5789 btrfs_node_key_to_cpu(buf, &key, 0);
5791 btrfs_item_key_to_cpu(buf, &key, 0);
5792 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5795 btrfs_commit_transaction(trans, search_root);
5798 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5799 ret = fix_key_order(search_root, &path);
5800 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5801 ret = fix_item_offset(search_root, &path);
5803 btrfs_commit_transaction(trans, search_root);
5806 btrfs_release_path(&path);
5807 btrfs_commit_transaction(trans, search_root);
5810 btrfs_release_path(&path);
5814 static int check_block(struct btrfs_root *root,
5815 struct cache_tree *extent_cache,
5816 struct extent_buffer *buf, u64 flags)
5818 struct extent_record *rec;
5819 struct cache_extent *cache;
5820 struct btrfs_key key;
5821 enum btrfs_tree_block_status status;
5825 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5828 rec = container_of(cache, struct extent_record, cache);
5829 rec->generation = btrfs_header_generation(buf);
5831 level = btrfs_header_level(buf);
5832 if (btrfs_header_nritems(buf) > 0) {
5835 btrfs_item_key_to_cpu(buf, &key, 0);
5837 btrfs_node_key_to_cpu(buf, &key, 0);
5839 rec->info_objectid = key.objectid;
5841 rec->info_level = level;
5843 if (btrfs_is_leaf(buf))
5844 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5846 status = btrfs_check_node(root, &rec->parent_key, buf);
5848 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5850 status = try_to_fix_bad_block(root, buf, status);
5851 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5853 fprintf(stderr, "bad block %llu\n",
5854 (unsigned long long)buf->start);
5857 * Signal to callers we need to start the scan over
5858 * again since we'll have cowed blocks.
5863 rec->content_checked = 1;
5864 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5865 rec->owner_ref_checked = 1;
5867 ret = check_owner_ref(root, rec, buf);
5869 rec->owner_ref_checked = 1;
5873 maybe_free_extent_rec(extent_cache, rec);
5877 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5878 u64 parent, u64 root)
5880 struct list_head *cur = rec->backrefs.next;
5881 struct extent_backref *node;
5882 struct tree_backref *back;
5884 while(cur != &rec->backrefs) {
5885 node = to_extent_backref(cur);
5889 back = to_tree_backref(node);
5891 if (!node->full_backref)
5893 if (parent == back->parent)
5896 if (node->full_backref)
5898 if (back->root == root)
5905 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5906 u64 parent, u64 root)
5908 struct tree_backref *ref = malloc(sizeof(*ref));
5912 memset(&ref->node, 0, sizeof(ref->node));
5914 ref->parent = parent;
5915 ref->node.full_backref = 1;
5918 ref->node.full_backref = 0;
5920 list_add_tail(&ref->node.list, &rec->backrefs);
5925 static struct data_backref *find_data_backref(struct extent_record *rec,
5926 u64 parent, u64 root,
5927 u64 owner, u64 offset,
5929 u64 disk_bytenr, u64 bytes)
5931 struct list_head *cur = rec->backrefs.next;
5932 struct extent_backref *node;
5933 struct data_backref *back;
5935 while(cur != &rec->backrefs) {
5936 node = to_extent_backref(cur);
5940 back = to_data_backref(node);
5942 if (!node->full_backref)
5944 if (parent == back->parent)
5947 if (node->full_backref)
5949 if (back->root == root && back->owner == owner &&
5950 back->offset == offset) {
5951 if (found_ref && node->found_ref &&
5952 (back->bytes != bytes ||
5953 back->disk_bytenr != disk_bytenr))
5962 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5963 u64 parent, u64 root,
5964 u64 owner, u64 offset,
5967 struct data_backref *ref = malloc(sizeof(*ref));
5971 memset(&ref->node, 0, sizeof(ref->node));
5972 ref->node.is_data = 1;
5975 ref->parent = parent;
5978 ref->node.full_backref = 1;
5982 ref->offset = offset;
5983 ref->node.full_backref = 0;
5985 ref->bytes = max_size;
5988 list_add_tail(&ref->node.list, &rec->backrefs);
5989 if (max_size > rec->max_size)
5990 rec->max_size = max_size;
5994 /* Check if the type of extent matches with its chunk */
5995 static void check_extent_type(struct extent_record *rec)
5997 struct btrfs_block_group_cache *bg_cache;
5999 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6003 /* data extent, check chunk directly*/
6004 if (!rec->metadata) {
6005 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6006 rec->wrong_chunk_type = 1;
6010 /* metadata extent, check the obvious case first */
6011 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6012 BTRFS_BLOCK_GROUP_METADATA))) {
6013 rec->wrong_chunk_type = 1;
6018 * Check SYSTEM extent, as it's also marked as metadata, we can only
6019 * make sure it's a SYSTEM extent by its backref
6021 if (!list_empty(&rec->backrefs)) {
6022 struct extent_backref *node;
6023 struct tree_backref *tback;
6026 node = to_extent_backref(rec->backrefs.next);
6027 if (node->is_data) {
6028 /* tree block shouldn't have data backref */
6029 rec->wrong_chunk_type = 1;
6032 tback = container_of(node, struct tree_backref, node);
6034 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6035 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6037 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6038 if (!(bg_cache->flags & bg_type))
6039 rec->wrong_chunk_type = 1;
6044 * Allocate a new extent record, fill default values from @tmpl and insert int
6045 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6046 * the cache, otherwise it fails.
6048 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6049 struct extent_record *tmpl)
6051 struct extent_record *rec;
6054 BUG_ON(tmpl->max_size == 0);
6055 rec = malloc(sizeof(*rec));
6058 rec->start = tmpl->start;
6059 rec->max_size = tmpl->max_size;
6060 rec->nr = max(tmpl->nr, tmpl->max_size);
6061 rec->found_rec = tmpl->found_rec;
6062 rec->content_checked = tmpl->content_checked;
6063 rec->owner_ref_checked = tmpl->owner_ref_checked;
6064 rec->num_duplicates = 0;
6065 rec->metadata = tmpl->metadata;
6066 rec->flag_block_full_backref = FLAG_UNSET;
6067 rec->bad_full_backref = 0;
6068 rec->crossing_stripes = 0;
6069 rec->wrong_chunk_type = 0;
6070 rec->is_root = tmpl->is_root;
6071 rec->refs = tmpl->refs;
6072 rec->extent_item_refs = tmpl->extent_item_refs;
6073 rec->parent_generation = tmpl->parent_generation;
6074 INIT_LIST_HEAD(&rec->backrefs);
6075 INIT_LIST_HEAD(&rec->dups);
6076 INIT_LIST_HEAD(&rec->list);
6077 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6078 rec->cache.start = tmpl->start;
6079 rec->cache.size = tmpl->nr;
6080 ret = insert_cache_extent(extent_cache, &rec->cache);
6085 bytes_used += rec->nr;
6088 rec->crossing_stripes = check_crossing_stripes(global_info,
6089 rec->start, global_info->tree_root->nodesize);
6090 check_extent_type(rec);
6095 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6097 * - refs - if found, increase refs
6098 * - is_root - if found, set
6099 * - content_checked - if found, set
6100 * - owner_ref_checked - if found, set
6102 * If not found, create a new one, initialize and insert.
6104 static int add_extent_rec(struct cache_tree *extent_cache,
6105 struct extent_record *tmpl)
6107 struct extent_record *rec;
6108 struct cache_extent *cache;
6112 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6114 rec = container_of(cache, struct extent_record, cache);
6118 rec->nr = max(tmpl->nr, tmpl->max_size);
6121 * We need to make sure to reset nr to whatever the extent
6122 * record says was the real size, this way we can compare it to
6125 if (tmpl->found_rec) {
6126 if (tmpl->start != rec->start || rec->found_rec) {
6127 struct extent_record *tmp;
6130 if (list_empty(&rec->list))
6131 list_add_tail(&rec->list,
6132 &duplicate_extents);
6135 * We have to do this song and dance in case we
6136 * find an extent record that falls inside of
6137 * our current extent record but does not have
6138 * the same objectid.
6140 tmp = malloc(sizeof(*tmp));
6143 tmp->start = tmpl->start;
6144 tmp->max_size = tmpl->max_size;
6147 tmp->metadata = tmpl->metadata;
6148 tmp->extent_item_refs = tmpl->extent_item_refs;
6149 INIT_LIST_HEAD(&tmp->list);
6150 list_add_tail(&tmp->list, &rec->dups);
6151 rec->num_duplicates++;
6158 if (tmpl->extent_item_refs && !dup) {
6159 if (rec->extent_item_refs) {
6160 fprintf(stderr, "block %llu rec "
6161 "extent_item_refs %llu, passed %llu\n",
6162 (unsigned long long)tmpl->start,
6163 (unsigned long long)
6164 rec->extent_item_refs,
6165 (unsigned long long)tmpl->extent_item_refs);
6167 rec->extent_item_refs = tmpl->extent_item_refs;
6171 if (tmpl->content_checked)
6172 rec->content_checked = 1;
6173 if (tmpl->owner_ref_checked)
6174 rec->owner_ref_checked = 1;
6175 memcpy(&rec->parent_key, &tmpl->parent_key,
6176 sizeof(tmpl->parent_key));
6177 if (tmpl->parent_generation)
6178 rec->parent_generation = tmpl->parent_generation;
6179 if (rec->max_size < tmpl->max_size)
6180 rec->max_size = tmpl->max_size;
6183 * A metadata extent can't cross stripe_len boundary, otherwise
6184 * kernel scrub won't be able to handle it.
6185 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6189 rec->crossing_stripes = check_crossing_stripes(
6190 global_info, rec->start,
6191 global_info->tree_root->nodesize);
6192 check_extent_type(rec);
6193 maybe_free_extent_rec(extent_cache, rec);
6197 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6202 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6203 u64 parent, u64 root, int found_ref)
6205 struct extent_record *rec;
6206 struct tree_backref *back;
6207 struct cache_extent *cache;
6210 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6212 struct extent_record tmpl;
6214 memset(&tmpl, 0, sizeof(tmpl));
6215 tmpl.start = bytenr;
6220 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6224 /* really a bug in cache_extent implement now */
6225 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6230 rec = container_of(cache, struct extent_record, cache);
6231 if (rec->start != bytenr) {
6233 * Several cause, from unaligned bytenr to over lapping extents
6238 back = find_tree_backref(rec, parent, root);
6240 back = alloc_tree_backref(rec, parent, root);
6246 if (back->node.found_ref) {
6247 fprintf(stderr, "Extent back ref already exists "
6248 "for %llu parent %llu root %llu \n",
6249 (unsigned long long)bytenr,
6250 (unsigned long long)parent,
6251 (unsigned long long)root);
6253 back->node.found_ref = 1;
6255 if (back->node.found_extent_tree) {
6256 fprintf(stderr, "Extent back ref already exists "
6257 "for %llu parent %llu root %llu \n",
6258 (unsigned long long)bytenr,
6259 (unsigned long long)parent,
6260 (unsigned long long)root);
6262 back->node.found_extent_tree = 1;
6264 check_extent_type(rec);
6265 maybe_free_extent_rec(extent_cache, rec);
6269 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6270 u64 parent, u64 root, u64 owner, u64 offset,
6271 u32 num_refs, int found_ref, u64 max_size)
6273 struct extent_record *rec;
6274 struct data_backref *back;
6275 struct cache_extent *cache;
6278 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6280 struct extent_record tmpl;
6282 memset(&tmpl, 0, sizeof(tmpl));
6283 tmpl.start = bytenr;
6285 tmpl.max_size = max_size;
6287 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6291 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6296 rec = container_of(cache, struct extent_record, cache);
6297 if (rec->max_size < max_size)
6298 rec->max_size = max_size;
6301 * If found_ref is set then max_size is the real size and must match the
6302 * existing refs. So if we have already found a ref then we need to
6303 * make sure that this ref matches the existing one, otherwise we need
6304 * to add a new backref so we can notice that the backrefs don't match
6305 * and we need to figure out who is telling the truth. This is to
6306 * account for that awful fsync bug I introduced where we'd end up with
6307 * a btrfs_file_extent_item that would have its length include multiple
6308 * prealloc extents or point inside of a prealloc extent.
6310 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6313 back = alloc_data_backref(rec, parent, root, owner, offset,
6319 BUG_ON(num_refs != 1);
6320 if (back->node.found_ref)
6321 BUG_ON(back->bytes != max_size);
6322 back->node.found_ref = 1;
6323 back->found_ref += 1;
6324 back->bytes = max_size;
6325 back->disk_bytenr = bytenr;
6327 rec->content_checked = 1;
6328 rec->owner_ref_checked = 1;
6330 if (back->node.found_extent_tree) {
6331 fprintf(stderr, "Extent back ref already exists "
6332 "for %llu parent %llu root %llu "
6333 "owner %llu offset %llu num_refs %lu\n",
6334 (unsigned long long)bytenr,
6335 (unsigned long long)parent,
6336 (unsigned long long)root,
6337 (unsigned long long)owner,
6338 (unsigned long long)offset,
6339 (unsigned long)num_refs);
6341 back->num_refs = num_refs;
6342 back->node.found_extent_tree = 1;
6344 maybe_free_extent_rec(extent_cache, rec);
6348 static int add_pending(struct cache_tree *pending,
6349 struct cache_tree *seen, u64 bytenr, u32 size)
6352 ret = add_cache_extent(seen, bytenr, size);
6355 add_cache_extent(pending, bytenr, size);
6359 static int pick_next_pending(struct cache_tree *pending,
6360 struct cache_tree *reada,
6361 struct cache_tree *nodes,
6362 u64 last, struct block_info *bits, int bits_nr,
6365 unsigned long node_start = last;
6366 struct cache_extent *cache;
6369 cache = search_cache_extent(reada, 0);
6371 bits[0].start = cache->start;
6372 bits[0].size = cache->size;
6377 if (node_start > 32768)
6378 node_start -= 32768;
6380 cache = search_cache_extent(nodes, node_start);
6382 cache = search_cache_extent(nodes, 0);
6385 cache = search_cache_extent(pending, 0);
6390 bits[ret].start = cache->start;
6391 bits[ret].size = cache->size;
6392 cache = next_cache_extent(cache);
6394 } while (cache && ret < bits_nr);
6400 bits[ret].start = cache->start;
6401 bits[ret].size = cache->size;
6402 cache = next_cache_extent(cache);
6404 } while (cache && ret < bits_nr);
6406 if (bits_nr - ret > 8) {
6407 u64 lookup = bits[0].start + bits[0].size;
6408 struct cache_extent *next;
6409 next = search_cache_extent(pending, lookup);
6411 if (next->start - lookup > 32768)
6413 bits[ret].start = next->start;
6414 bits[ret].size = next->size;
6415 lookup = next->start + next->size;
6419 next = next_cache_extent(next);
6427 static void free_chunk_record(struct cache_extent *cache)
6429 struct chunk_record *rec;
6431 rec = container_of(cache, struct chunk_record, cache);
6432 list_del_init(&rec->list);
6433 list_del_init(&rec->dextents);
6437 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6439 cache_tree_free_extents(chunk_cache, free_chunk_record);
6442 static void free_device_record(struct rb_node *node)
6444 struct device_record *rec;
6446 rec = container_of(node, struct device_record, node);
6450 FREE_RB_BASED_TREE(device_cache, free_device_record);
6452 int insert_block_group_record(struct block_group_tree *tree,
6453 struct block_group_record *bg_rec)
6457 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6461 list_add_tail(&bg_rec->list, &tree->block_groups);
6465 static void free_block_group_record(struct cache_extent *cache)
6467 struct block_group_record *rec;
6469 rec = container_of(cache, struct block_group_record, cache);
6470 list_del_init(&rec->list);
6474 void free_block_group_tree(struct block_group_tree *tree)
6476 cache_tree_free_extents(&tree->tree, free_block_group_record);
6479 int insert_device_extent_record(struct device_extent_tree *tree,
6480 struct device_extent_record *de_rec)
6485 * Device extent is a bit different from the other extents, because
6486 * the extents which belong to the different devices may have the
6487 * same start and size, so we need use the special extent cache
6488 * search/insert functions.
6490 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6494 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6495 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6499 static void free_device_extent_record(struct cache_extent *cache)
6501 struct device_extent_record *rec;
6503 rec = container_of(cache, struct device_extent_record, cache);
6504 if (!list_empty(&rec->chunk_list))
6505 list_del_init(&rec->chunk_list);
6506 if (!list_empty(&rec->device_list))
6507 list_del_init(&rec->device_list);
6511 void free_device_extent_tree(struct device_extent_tree *tree)
6513 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6516 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6517 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6518 struct extent_buffer *leaf, int slot)
6520 struct btrfs_extent_ref_v0 *ref0;
6521 struct btrfs_key key;
6524 btrfs_item_key_to_cpu(leaf, &key, slot);
6525 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6526 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6527 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6530 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6531 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6537 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6538 struct btrfs_key *key,
6541 struct btrfs_chunk *ptr;
6542 struct chunk_record *rec;
6545 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6546 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6548 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6550 fprintf(stderr, "memory allocation failed\n");
6554 INIT_LIST_HEAD(&rec->list);
6555 INIT_LIST_HEAD(&rec->dextents);
6558 rec->cache.start = key->offset;
6559 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6561 rec->generation = btrfs_header_generation(leaf);
6563 rec->objectid = key->objectid;
6564 rec->type = key->type;
6565 rec->offset = key->offset;
6567 rec->length = rec->cache.size;
6568 rec->owner = btrfs_chunk_owner(leaf, ptr);
6569 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6570 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6571 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6572 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6573 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6574 rec->num_stripes = num_stripes;
6575 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6577 for (i = 0; i < rec->num_stripes; ++i) {
6578 rec->stripes[i].devid =
6579 btrfs_stripe_devid_nr(leaf, ptr, i);
6580 rec->stripes[i].offset =
6581 btrfs_stripe_offset_nr(leaf, ptr, i);
6582 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6583 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6590 static int process_chunk_item(struct cache_tree *chunk_cache,
6591 struct btrfs_key *key, struct extent_buffer *eb,
6594 struct chunk_record *rec;
6595 struct btrfs_chunk *chunk;
6598 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6600 * Do extra check for this chunk item,
6602 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6603 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6604 * and owner<->key_type check.
6606 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6609 error("chunk(%llu, %llu) is not valid, ignore it",
6610 key->offset, btrfs_chunk_length(eb, chunk));
6613 rec = btrfs_new_chunk_record(eb, key, slot);
6614 ret = insert_cache_extent(chunk_cache, &rec->cache);
6616 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6617 rec->offset, rec->length);
6624 static int process_device_item(struct rb_root *dev_cache,
6625 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6627 struct btrfs_dev_item *ptr;
6628 struct device_record *rec;
6631 ptr = btrfs_item_ptr(eb,
6632 slot, struct btrfs_dev_item);
6634 rec = malloc(sizeof(*rec));
6636 fprintf(stderr, "memory allocation failed\n");
6640 rec->devid = key->offset;
6641 rec->generation = btrfs_header_generation(eb);
6643 rec->objectid = key->objectid;
6644 rec->type = key->type;
6645 rec->offset = key->offset;
6647 rec->devid = btrfs_device_id(eb, ptr);
6648 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6649 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6651 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6653 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6660 struct block_group_record *
6661 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6664 struct btrfs_block_group_item *ptr;
6665 struct block_group_record *rec;
6667 rec = calloc(1, sizeof(*rec));
6669 fprintf(stderr, "memory allocation failed\n");
6673 rec->cache.start = key->objectid;
6674 rec->cache.size = key->offset;
6676 rec->generation = btrfs_header_generation(leaf);
6678 rec->objectid = key->objectid;
6679 rec->type = key->type;
6680 rec->offset = key->offset;
6682 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6683 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6685 INIT_LIST_HEAD(&rec->list);
6690 static int process_block_group_item(struct block_group_tree *block_group_cache,
6691 struct btrfs_key *key,
6692 struct extent_buffer *eb, int slot)
6694 struct block_group_record *rec;
6697 rec = btrfs_new_block_group_record(eb, key, slot);
6698 ret = insert_block_group_record(block_group_cache, rec);
6700 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6701 rec->objectid, rec->offset);
6708 struct device_extent_record *
6709 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6710 struct btrfs_key *key, int slot)
6712 struct device_extent_record *rec;
6713 struct btrfs_dev_extent *ptr;
6715 rec = calloc(1, sizeof(*rec));
6717 fprintf(stderr, "memory allocation failed\n");
6721 rec->cache.objectid = key->objectid;
6722 rec->cache.start = key->offset;
6724 rec->generation = btrfs_header_generation(leaf);
6726 rec->objectid = key->objectid;
6727 rec->type = key->type;
6728 rec->offset = key->offset;
6730 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6731 rec->chunk_objecteid =
6732 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6734 btrfs_dev_extent_chunk_offset(leaf, ptr);
6735 rec->length = btrfs_dev_extent_length(leaf, ptr);
6736 rec->cache.size = rec->length;
6738 INIT_LIST_HEAD(&rec->chunk_list);
6739 INIT_LIST_HEAD(&rec->device_list);
6745 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6746 struct btrfs_key *key, struct extent_buffer *eb,
6749 struct device_extent_record *rec;
6752 rec = btrfs_new_device_extent_record(eb, key, slot);
6753 ret = insert_device_extent_record(dev_extent_cache, rec);
6756 "Device extent[%llu, %llu, %llu] existed.\n",
6757 rec->objectid, rec->offset, rec->length);
6764 static int process_extent_item(struct btrfs_root *root,
6765 struct cache_tree *extent_cache,
6766 struct extent_buffer *eb, int slot)
6768 struct btrfs_extent_item *ei;
6769 struct btrfs_extent_inline_ref *iref;
6770 struct btrfs_extent_data_ref *dref;
6771 struct btrfs_shared_data_ref *sref;
6772 struct btrfs_key key;
6773 struct extent_record tmpl;
6778 u32 item_size = btrfs_item_size_nr(eb, slot);
6784 btrfs_item_key_to_cpu(eb, &key, slot);
6786 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6788 num_bytes = root->nodesize;
6790 num_bytes = key.offset;
6793 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6794 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6795 key.objectid, root->sectorsize);
6798 if (item_size < sizeof(*ei)) {
6799 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6800 struct btrfs_extent_item_v0 *ei0;
6801 BUG_ON(item_size != sizeof(*ei0));
6802 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6803 refs = btrfs_extent_refs_v0(eb, ei0);
6807 memset(&tmpl, 0, sizeof(tmpl));
6808 tmpl.start = key.objectid;
6809 tmpl.nr = num_bytes;
6810 tmpl.extent_item_refs = refs;
6811 tmpl.metadata = metadata;
6813 tmpl.max_size = num_bytes;
6815 return add_extent_rec(extent_cache, &tmpl);
6818 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6819 refs = btrfs_extent_refs(eb, ei);
6820 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6824 if (metadata && num_bytes != root->nodesize) {
6825 error("ignore invalid metadata extent, length %llu does not equal to %u",
6826 num_bytes, root->nodesize);
6829 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6830 error("ignore invalid data extent, length %llu is not aligned to %u",
6831 num_bytes, root->sectorsize);
6835 memset(&tmpl, 0, sizeof(tmpl));
6836 tmpl.start = key.objectid;
6837 tmpl.nr = num_bytes;
6838 tmpl.extent_item_refs = refs;
6839 tmpl.metadata = metadata;
6841 tmpl.max_size = num_bytes;
6842 add_extent_rec(extent_cache, &tmpl);
6844 ptr = (unsigned long)(ei + 1);
6845 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6846 key.type == BTRFS_EXTENT_ITEM_KEY)
6847 ptr += sizeof(struct btrfs_tree_block_info);
6849 end = (unsigned long)ei + item_size;
6851 iref = (struct btrfs_extent_inline_ref *)ptr;
6852 type = btrfs_extent_inline_ref_type(eb, iref);
6853 offset = btrfs_extent_inline_ref_offset(eb, iref);
6855 case BTRFS_TREE_BLOCK_REF_KEY:
6856 ret = add_tree_backref(extent_cache, key.objectid,
6860 "add_tree_backref failed (extent items tree block): %s",
6863 case BTRFS_SHARED_BLOCK_REF_KEY:
6864 ret = add_tree_backref(extent_cache, key.objectid,
6868 "add_tree_backref failed (extent items shared block): %s",
6871 case BTRFS_EXTENT_DATA_REF_KEY:
6872 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6873 add_data_backref(extent_cache, key.objectid, 0,
6874 btrfs_extent_data_ref_root(eb, dref),
6875 btrfs_extent_data_ref_objectid(eb,
6877 btrfs_extent_data_ref_offset(eb, dref),
6878 btrfs_extent_data_ref_count(eb, dref),
6881 case BTRFS_SHARED_DATA_REF_KEY:
6882 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6883 add_data_backref(extent_cache, key.objectid, offset,
6885 btrfs_shared_data_ref_count(eb, sref),
6889 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6890 key.objectid, key.type, num_bytes);
6893 ptr += btrfs_extent_inline_ref_size(type);
6900 static int check_cache_range(struct btrfs_root *root,
6901 struct btrfs_block_group_cache *cache,
6902 u64 offset, u64 bytes)
6904 struct btrfs_free_space *entry;
6910 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6911 bytenr = btrfs_sb_offset(i);
6912 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6913 cache->key.objectid, bytenr, 0,
6914 &logical, &nr, &stripe_len);
6919 if (logical[nr] + stripe_len <= offset)
6921 if (offset + bytes <= logical[nr])
6923 if (logical[nr] == offset) {
6924 if (stripe_len >= bytes) {
6928 bytes -= stripe_len;
6929 offset += stripe_len;
6930 } else if (logical[nr] < offset) {
6931 if (logical[nr] + stripe_len >=
6936 bytes = (offset + bytes) -
6937 (logical[nr] + stripe_len);
6938 offset = logical[nr] + stripe_len;
6941 * Could be tricky, the super may land in the
6942 * middle of the area we're checking. First
6943 * check the easiest case, it's at the end.
6945 if (logical[nr] + stripe_len >=
6947 bytes = logical[nr] - offset;
6951 /* Check the left side */
6952 ret = check_cache_range(root, cache,
6954 logical[nr] - offset);
6960 /* Now we continue with the right side */
6961 bytes = (offset + bytes) -
6962 (logical[nr] + stripe_len);
6963 offset = logical[nr] + stripe_len;
6970 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6972 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6973 offset, offset+bytes);
6977 if (entry->offset != offset) {
6978 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6983 if (entry->bytes != bytes) {
6984 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6985 bytes, entry->bytes, offset);
6989 unlink_free_space(cache->free_space_ctl, entry);
6994 static int verify_space_cache(struct btrfs_root *root,
6995 struct btrfs_block_group_cache *cache)
6997 struct btrfs_path path;
6998 struct extent_buffer *leaf;
6999 struct btrfs_key key;
7003 root = root->fs_info->extent_root;
7005 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7007 btrfs_init_path(&path);
7008 key.objectid = last;
7010 key.type = BTRFS_EXTENT_ITEM_KEY;
7011 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7016 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7017 ret = btrfs_next_leaf(root, &path);
7025 leaf = path.nodes[0];
7026 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7027 if (key.objectid >= cache->key.offset + cache->key.objectid)
7029 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7030 key.type != BTRFS_METADATA_ITEM_KEY) {
7035 if (last == key.objectid) {
7036 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7037 last = key.objectid + key.offset;
7039 last = key.objectid + root->nodesize;
7044 ret = check_cache_range(root, cache, last,
7045 key.objectid - last);
7048 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7049 last = key.objectid + key.offset;
7051 last = key.objectid + root->nodesize;
7055 if (last < cache->key.objectid + cache->key.offset)
7056 ret = check_cache_range(root, cache, last,
7057 cache->key.objectid +
7058 cache->key.offset - last);
7061 btrfs_release_path(&path);
7064 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7065 fprintf(stderr, "There are still entries left in the space "
7073 static int check_space_cache(struct btrfs_root *root)
7075 struct btrfs_block_group_cache *cache;
7076 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7080 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7081 btrfs_super_generation(root->fs_info->super_copy) !=
7082 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7083 printf("cache and super generation don't match, space cache "
7084 "will be invalidated\n");
7088 if (ctx.progress_enabled) {
7089 ctx.tp = TASK_FREE_SPACE;
7090 task_start(ctx.info);
7094 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7098 start = cache->key.objectid + cache->key.offset;
7099 if (!cache->free_space_ctl) {
7100 if (btrfs_init_free_space_ctl(cache,
7101 root->sectorsize)) {
7106 btrfs_remove_free_space_cache(cache);
7109 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7110 ret = exclude_super_stripes(root, cache);
7112 fprintf(stderr, "could not exclude super stripes: %s\n",
7117 ret = load_free_space_tree(root->fs_info, cache);
7118 free_excluded_extents(root, cache);
7120 fprintf(stderr, "could not load free space tree: %s\n",
7127 ret = load_free_space_cache(root->fs_info, cache);
7132 ret = verify_space_cache(root, cache);
7134 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7135 cache->key.objectid);
7140 task_stop(ctx.info);
7142 return error ? -EINVAL : 0;
7145 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7146 u64 num_bytes, unsigned long leaf_offset,
7147 struct extent_buffer *eb) {
7150 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7152 unsigned long csum_offset;
7156 u64 data_checked = 0;
7162 if (num_bytes % root->sectorsize)
7165 data = malloc(num_bytes);
7169 while (offset < num_bytes) {
7172 read_len = num_bytes - offset;
7173 /* read as much space once a time */
7174 ret = read_extent_data(root, data + offset,
7175 bytenr + offset, &read_len, mirror);
7179 /* verify every 4k data's checksum */
7180 while (data_checked < read_len) {
7182 tmp = offset + data_checked;
7184 csum = btrfs_csum_data((char *)data + tmp,
7185 csum, root->sectorsize);
7186 btrfs_csum_final(csum, (u8 *)&csum);
7188 csum_offset = leaf_offset +
7189 tmp / root->sectorsize * csum_size;
7190 read_extent_buffer(eb, (char *)&csum_expected,
7191 csum_offset, csum_size);
7192 /* try another mirror */
7193 if (csum != csum_expected) {
7194 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7195 mirror, bytenr + tmp,
7196 csum, csum_expected);
7197 num_copies = btrfs_num_copies(
7198 &root->fs_info->mapping_tree,
7200 if (mirror < num_copies - 1) {
7205 data_checked += root->sectorsize;
7214 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7217 struct btrfs_path path;
7218 struct extent_buffer *leaf;
7219 struct btrfs_key key;
7222 btrfs_init_path(&path);
7223 key.objectid = bytenr;
7224 key.type = BTRFS_EXTENT_ITEM_KEY;
7225 key.offset = (u64)-1;
7228 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7231 fprintf(stderr, "Error looking up extent record %d\n", ret);
7232 btrfs_release_path(&path);
7235 if (path.slots[0] > 0) {
7238 ret = btrfs_prev_leaf(root, &path);
7241 } else if (ret > 0) {
7248 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7251 * Block group items come before extent items if they have the same
7252 * bytenr, so walk back one more just in case. Dear future traveller,
7253 * first congrats on mastering time travel. Now if it's not too much
7254 * trouble could you go back to 2006 and tell Chris to make the
7255 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7256 * EXTENT_ITEM_KEY please?
7258 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7259 if (path.slots[0] > 0) {
7262 ret = btrfs_prev_leaf(root, &path);
7265 } else if (ret > 0) {
7270 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7274 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7275 ret = btrfs_next_leaf(root, &path);
7277 fprintf(stderr, "Error going to next leaf "
7279 btrfs_release_path(&path);
7285 leaf = path.nodes[0];
7286 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7287 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7291 if (key.objectid + key.offset < bytenr) {
7295 if (key.objectid > bytenr + num_bytes)
7298 if (key.objectid == bytenr) {
7299 if (key.offset >= num_bytes) {
7303 num_bytes -= key.offset;
7304 bytenr += key.offset;
7305 } else if (key.objectid < bytenr) {
7306 if (key.objectid + key.offset >= bytenr + num_bytes) {
7310 num_bytes = (bytenr + num_bytes) -
7311 (key.objectid + key.offset);
7312 bytenr = key.objectid + key.offset;
7314 if (key.objectid + key.offset < bytenr + num_bytes) {
7315 u64 new_start = key.objectid + key.offset;
7316 u64 new_bytes = bytenr + num_bytes - new_start;
7319 * Weird case, the extent is in the middle of
7320 * our range, we'll have to search one side
7321 * and then the other. Not sure if this happens
7322 * in real life, but no harm in coding it up
7323 * anyway just in case.
7325 btrfs_release_path(&path);
7326 ret = check_extent_exists(root, new_start,
7329 fprintf(stderr, "Right section didn't "
7333 num_bytes = key.objectid - bytenr;
7336 num_bytes = key.objectid - bytenr;
7343 if (num_bytes && !ret) {
7344 fprintf(stderr, "There are no extents for csum range "
7345 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7349 btrfs_release_path(&path);
7353 static int check_csums(struct btrfs_root *root)
7355 struct btrfs_path path;
7356 struct extent_buffer *leaf;
7357 struct btrfs_key key;
7358 u64 offset = 0, num_bytes = 0;
7359 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7363 unsigned long leaf_offset;
7365 root = root->fs_info->csum_root;
7366 if (!extent_buffer_uptodate(root->node)) {
7367 fprintf(stderr, "No valid csum tree found\n");
7371 btrfs_init_path(&path);
7372 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7373 key.type = BTRFS_EXTENT_CSUM_KEY;
7375 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7377 fprintf(stderr, "Error searching csum tree %d\n", ret);
7378 btrfs_release_path(&path);
7382 if (ret > 0 && path.slots[0])
7387 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7388 ret = btrfs_next_leaf(root, &path);
7390 fprintf(stderr, "Error going to next leaf "
7397 leaf = path.nodes[0];
7399 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7400 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7405 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7406 csum_size) * root->sectorsize;
7407 if (!check_data_csum)
7408 goto skip_csum_check;
7409 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7410 ret = check_extent_csums(root, key.offset, data_len,
7416 offset = key.offset;
7417 } else if (key.offset != offset + num_bytes) {
7418 ret = check_extent_exists(root, offset, num_bytes);
7420 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7421 "there is no extent record\n",
7422 offset, offset+num_bytes);
7425 offset = key.offset;
7428 num_bytes += data_len;
7432 btrfs_release_path(&path);
7436 static int is_dropped_key(struct btrfs_key *key,
7437 struct btrfs_key *drop_key) {
7438 if (key->objectid < drop_key->objectid)
7440 else if (key->objectid == drop_key->objectid) {
7441 if (key->type < drop_key->type)
7443 else if (key->type == drop_key->type) {
7444 if (key->offset < drop_key->offset)
7452 * Here are the rules for FULL_BACKREF.
7454 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7455 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7457 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7458 * if it happened after the relocation occurred since we'll have dropped the
7459 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7460 * have no real way to know for sure.
7462 * We process the blocks one root at a time, and we start from the lowest root
7463 * objectid and go to the highest. So we can just lookup the owner backref for
7464 * the record and if we don't find it then we know it doesn't exist and we have
7467 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7468 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7469 * be set or not and then we can check later once we've gathered all the refs.
7471 static int calc_extent_flag(struct cache_tree *extent_cache,
7472 struct extent_buffer *buf,
7473 struct root_item_record *ri,
7476 struct extent_record *rec;
7477 struct cache_extent *cache;
7478 struct tree_backref *tback;
7481 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7482 /* we have added this extent before */
7486 rec = container_of(cache, struct extent_record, cache);
7489 * Except file/reloc tree, we can not have
7492 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7497 if (buf->start == ri->bytenr)
7500 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7503 owner = btrfs_header_owner(buf);
7504 if (owner == ri->objectid)
7507 tback = find_tree_backref(rec, 0, owner);
7512 if (rec->flag_block_full_backref != FLAG_UNSET &&
7513 rec->flag_block_full_backref != 0)
7514 rec->bad_full_backref = 1;
7517 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7518 if (rec->flag_block_full_backref != FLAG_UNSET &&
7519 rec->flag_block_full_backref != 1)
7520 rec->bad_full_backref = 1;
7524 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7526 fprintf(stderr, "Invalid key type(");
7527 print_key_type(stderr, 0, key_type);
7528 fprintf(stderr, ") found in root(");
7529 print_objectid(stderr, rootid, 0);
7530 fprintf(stderr, ")\n");
7534 * Check if the key is valid with its extent buffer.
7536 * This is a early check in case invalid key exists in a extent buffer
7537 * This is not comprehensive yet, but should prevent wrong key/item passed
7540 static int check_type_with_root(u64 rootid, u8 key_type)
7543 /* Only valid in chunk tree */
7544 case BTRFS_DEV_ITEM_KEY:
7545 case BTRFS_CHUNK_ITEM_KEY:
7546 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7549 /* valid in csum and log tree */
7550 case BTRFS_CSUM_TREE_OBJECTID:
7551 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7555 case BTRFS_EXTENT_ITEM_KEY:
7556 case BTRFS_METADATA_ITEM_KEY:
7557 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7558 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7561 case BTRFS_ROOT_ITEM_KEY:
7562 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7565 case BTRFS_DEV_EXTENT_KEY:
7566 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7572 report_mismatch_key_root(key_type, rootid);
7576 static int run_next_block(struct btrfs_root *root,
7577 struct block_info *bits,
7580 struct cache_tree *pending,
7581 struct cache_tree *seen,
7582 struct cache_tree *reada,
7583 struct cache_tree *nodes,
7584 struct cache_tree *extent_cache,
7585 struct cache_tree *chunk_cache,
7586 struct rb_root *dev_cache,
7587 struct block_group_tree *block_group_cache,
7588 struct device_extent_tree *dev_extent_cache,
7589 struct root_item_record *ri)
7591 struct extent_buffer *buf;
7592 struct extent_record *rec = NULL;
7603 struct btrfs_key key;
7604 struct cache_extent *cache;
7607 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7608 bits_nr, &reada_bits);
7613 for(i = 0; i < nritems; i++) {
7614 ret = add_cache_extent(reada, bits[i].start,
7619 /* fixme, get the parent transid */
7620 readahead_tree_block(root, bits[i].start,
7624 *last = bits[0].start;
7625 bytenr = bits[0].start;
7626 size = bits[0].size;
7628 cache = lookup_cache_extent(pending, bytenr, size);
7630 remove_cache_extent(pending, cache);
7633 cache = lookup_cache_extent(reada, bytenr, size);
7635 remove_cache_extent(reada, cache);
7638 cache = lookup_cache_extent(nodes, bytenr, size);
7640 remove_cache_extent(nodes, cache);
7643 cache = lookup_cache_extent(extent_cache, bytenr, size);
7645 rec = container_of(cache, struct extent_record, cache);
7646 gen = rec->parent_generation;
7649 /* fixme, get the real parent transid */
7650 buf = read_tree_block(root, bytenr, size, gen);
7651 if (!extent_buffer_uptodate(buf)) {
7652 record_bad_block_io(root->fs_info,
7653 extent_cache, bytenr, size);
7657 nritems = btrfs_header_nritems(buf);
7660 if (!init_extent_tree) {
7661 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7662 btrfs_header_level(buf), 1, NULL,
7665 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7667 fprintf(stderr, "Couldn't calc extent flags\n");
7668 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7673 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7675 fprintf(stderr, "Couldn't calc extent flags\n");
7676 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7680 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7682 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7683 ri->objectid == btrfs_header_owner(buf)) {
7685 * Ok we got to this block from it's original owner and
7686 * we have FULL_BACKREF set. Relocation can leave
7687 * converted blocks over so this is altogether possible,
7688 * however it's not possible if the generation > the
7689 * last snapshot, so check for this case.
7691 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7692 btrfs_header_generation(buf) > ri->last_snapshot) {
7693 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7694 rec->bad_full_backref = 1;
7699 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7700 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7701 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7702 rec->bad_full_backref = 1;
7706 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7707 rec->flag_block_full_backref = 1;
7711 rec->flag_block_full_backref = 0;
7713 owner = btrfs_header_owner(buf);
7716 ret = check_block(root, extent_cache, buf, flags);
7720 if (btrfs_is_leaf(buf)) {
7721 btree_space_waste += btrfs_leaf_free_space(root, buf);
7722 for (i = 0; i < nritems; i++) {
7723 struct btrfs_file_extent_item *fi;
7724 btrfs_item_key_to_cpu(buf, &key, i);
7726 * Check key type against the leaf owner.
7727 * Could filter quite a lot of early error if
7730 if (check_type_with_root(btrfs_header_owner(buf),
7732 fprintf(stderr, "ignoring invalid key\n");
7735 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7736 process_extent_item(root, extent_cache, buf,
7740 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7741 process_extent_item(root, extent_cache, buf,
7745 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7747 btrfs_item_size_nr(buf, i);
7750 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7751 process_chunk_item(chunk_cache, &key, buf, i);
7754 if (key.type == BTRFS_DEV_ITEM_KEY) {
7755 process_device_item(dev_cache, &key, buf, i);
7758 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7759 process_block_group_item(block_group_cache,
7763 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7764 process_device_extent_item(dev_extent_cache,
7769 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7770 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7771 process_extent_ref_v0(extent_cache, buf, i);
7778 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7779 ret = add_tree_backref(extent_cache,
7780 key.objectid, 0, key.offset, 0);
7783 "add_tree_backref failed (leaf tree block): %s",
7787 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7788 ret = add_tree_backref(extent_cache,
7789 key.objectid, key.offset, 0, 0);
7792 "add_tree_backref failed (leaf shared block): %s",
7796 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7797 struct btrfs_extent_data_ref *ref;
7798 ref = btrfs_item_ptr(buf, i,
7799 struct btrfs_extent_data_ref);
7800 add_data_backref(extent_cache,
7802 btrfs_extent_data_ref_root(buf, ref),
7803 btrfs_extent_data_ref_objectid(buf,
7805 btrfs_extent_data_ref_offset(buf, ref),
7806 btrfs_extent_data_ref_count(buf, ref),
7807 0, root->sectorsize);
7810 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7811 struct btrfs_shared_data_ref *ref;
7812 ref = btrfs_item_ptr(buf, i,
7813 struct btrfs_shared_data_ref);
7814 add_data_backref(extent_cache,
7815 key.objectid, key.offset, 0, 0, 0,
7816 btrfs_shared_data_ref_count(buf, ref),
7817 0, root->sectorsize);
7820 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7821 struct bad_item *bad;
7823 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7827 bad = malloc(sizeof(struct bad_item));
7830 INIT_LIST_HEAD(&bad->list);
7831 memcpy(&bad->key, &key,
7832 sizeof(struct btrfs_key));
7833 bad->root_id = owner;
7834 list_add_tail(&bad->list, &delete_items);
7837 if (key.type != BTRFS_EXTENT_DATA_KEY)
7839 fi = btrfs_item_ptr(buf, i,
7840 struct btrfs_file_extent_item);
7841 if (btrfs_file_extent_type(buf, fi) ==
7842 BTRFS_FILE_EXTENT_INLINE)
7844 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7847 data_bytes_allocated +=
7848 btrfs_file_extent_disk_num_bytes(buf, fi);
7849 if (data_bytes_allocated < root->sectorsize) {
7852 data_bytes_referenced +=
7853 btrfs_file_extent_num_bytes(buf, fi);
7854 add_data_backref(extent_cache,
7855 btrfs_file_extent_disk_bytenr(buf, fi),
7856 parent, owner, key.objectid, key.offset -
7857 btrfs_file_extent_offset(buf, fi), 1, 1,
7858 btrfs_file_extent_disk_num_bytes(buf, fi));
7862 struct btrfs_key first_key;
7864 first_key.objectid = 0;
7867 btrfs_item_key_to_cpu(buf, &first_key, 0);
7868 level = btrfs_header_level(buf);
7869 for (i = 0; i < nritems; i++) {
7870 struct extent_record tmpl;
7872 ptr = btrfs_node_blockptr(buf, i);
7873 size = root->nodesize;
7874 btrfs_node_key_to_cpu(buf, &key, i);
7876 if ((level == ri->drop_level)
7877 && is_dropped_key(&key, &ri->drop_key)) {
7882 memset(&tmpl, 0, sizeof(tmpl));
7883 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7884 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7889 tmpl.max_size = size;
7890 ret = add_extent_rec(extent_cache, &tmpl);
7894 ret = add_tree_backref(extent_cache, ptr, parent,
7898 "add_tree_backref failed (non-leaf block): %s",
7904 add_pending(nodes, seen, ptr, size);
7906 add_pending(pending, seen, ptr, size);
7909 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7910 nritems) * sizeof(struct btrfs_key_ptr);
7912 total_btree_bytes += buf->len;
7913 if (fs_root_objectid(btrfs_header_owner(buf)))
7914 total_fs_tree_bytes += buf->len;
7915 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7916 total_extent_tree_bytes += buf->len;
7917 if (!found_old_backref &&
7918 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7919 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7920 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7921 found_old_backref = 1;
7923 free_extent_buffer(buf);
7927 static int add_root_to_pending(struct extent_buffer *buf,
7928 struct cache_tree *extent_cache,
7929 struct cache_tree *pending,
7930 struct cache_tree *seen,
7931 struct cache_tree *nodes,
7934 struct extent_record tmpl;
7937 if (btrfs_header_level(buf) > 0)
7938 add_pending(nodes, seen, buf->start, buf->len);
7940 add_pending(pending, seen, buf->start, buf->len);
7942 memset(&tmpl, 0, sizeof(tmpl));
7943 tmpl.start = buf->start;
7948 tmpl.max_size = buf->len;
7949 add_extent_rec(extent_cache, &tmpl);
7951 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7952 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7953 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7956 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7961 /* as we fix the tree, we might be deleting blocks that
7962 * we're tracking for repair. This hook makes sure we
7963 * remove any backrefs for blocks as we are fixing them.
7965 static int free_extent_hook(struct btrfs_trans_handle *trans,
7966 struct btrfs_root *root,
7967 u64 bytenr, u64 num_bytes, u64 parent,
7968 u64 root_objectid, u64 owner, u64 offset,
7971 struct extent_record *rec;
7972 struct cache_extent *cache;
7974 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7976 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7977 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7981 rec = container_of(cache, struct extent_record, cache);
7983 struct data_backref *back;
7984 back = find_data_backref(rec, parent, root_objectid, owner,
7985 offset, 1, bytenr, num_bytes);
7988 if (back->node.found_ref) {
7989 back->found_ref -= refs_to_drop;
7991 rec->refs -= refs_to_drop;
7993 if (back->node.found_extent_tree) {
7994 back->num_refs -= refs_to_drop;
7995 if (rec->extent_item_refs)
7996 rec->extent_item_refs -= refs_to_drop;
7998 if (back->found_ref == 0)
7999 back->node.found_ref = 0;
8000 if (back->num_refs == 0)
8001 back->node.found_extent_tree = 0;
8003 if (!back->node.found_extent_tree && back->node.found_ref) {
8004 list_del(&back->node.list);
8008 struct tree_backref *back;
8009 back = find_tree_backref(rec, parent, root_objectid);
8012 if (back->node.found_ref) {
8015 back->node.found_ref = 0;
8017 if (back->node.found_extent_tree) {
8018 if (rec->extent_item_refs)
8019 rec->extent_item_refs--;
8020 back->node.found_extent_tree = 0;
8022 if (!back->node.found_extent_tree && back->node.found_ref) {
8023 list_del(&back->node.list);
8027 maybe_free_extent_rec(extent_cache, rec);
8032 static int delete_extent_records(struct btrfs_trans_handle *trans,
8033 struct btrfs_root *root,
8034 struct btrfs_path *path,
8037 struct btrfs_key key;
8038 struct btrfs_key found_key;
8039 struct extent_buffer *leaf;
8044 key.objectid = bytenr;
8046 key.offset = (u64)-1;
8049 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8056 if (path->slots[0] == 0)
8062 leaf = path->nodes[0];
8063 slot = path->slots[0];
8065 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8066 if (found_key.objectid != bytenr)
8069 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8070 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8071 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8072 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8073 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8074 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8075 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8076 btrfs_release_path(path);
8077 if (found_key.type == 0) {
8078 if (found_key.offset == 0)
8080 key.offset = found_key.offset - 1;
8081 key.type = found_key.type;
8083 key.type = found_key.type - 1;
8084 key.offset = (u64)-1;
8088 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8089 found_key.objectid, found_key.type, found_key.offset);
8091 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8094 btrfs_release_path(path);
8096 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8097 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8098 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8099 found_key.offset : root->nodesize;
8101 ret = btrfs_update_block_group(trans, root, bytenr,
8108 btrfs_release_path(path);
8113 * for a single backref, this will allocate a new extent
8114 * and add the backref to it.
8116 static int record_extent(struct btrfs_trans_handle *trans,
8117 struct btrfs_fs_info *info,
8118 struct btrfs_path *path,
8119 struct extent_record *rec,
8120 struct extent_backref *back,
8121 int allocated, u64 flags)
8124 struct btrfs_root *extent_root = info->extent_root;
8125 struct extent_buffer *leaf;
8126 struct btrfs_key ins_key;
8127 struct btrfs_extent_item *ei;
8128 struct data_backref *dback;
8129 struct btrfs_tree_block_info *bi;
8132 rec->max_size = max_t(u64, rec->max_size,
8133 info->extent_root->nodesize);
8136 u32 item_size = sizeof(*ei);
8139 item_size += sizeof(*bi);
8141 ins_key.objectid = rec->start;
8142 ins_key.offset = rec->max_size;
8143 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8145 ret = btrfs_insert_empty_item(trans, extent_root, path,
8146 &ins_key, item_size);
8150 leaf = path->nodes[0];
8151 ei = btrfs_item_ptr(leaf, path->slots[0],
8152 struct btrfs_extent_item);
8154 btrfs_set_extent_refs(leaf, ei, 0);
8155 btrfs_set_extent_generation(leaf, ei, rec->generation);
8157 if (back->is_data) {
8158 btrfs_set_extent_flags(leaf, ei,
8159 BTRFS_EXTENT_FLAG_DATA);
8161 struct btrfs_disk_key copy_key;;
8163 bi = (struct btrfs_tree_block_info *)(ei + 1);
8164 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8167 btrfs_set_disk_key_objectid(©_key,
8168 rec->info_objectid);
8169 btrfs_set_disk_key_type(©_key, 0);
8170 btrfs_set_disk_key_offset(©_key, 0);
8172 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8173 btrfs_set_tree_block_key(leaf, bi, ©_key);
8175 btrfs_set_extent_flags(leaf, ei,
8176 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8179 btrfs_mark_buffer_dirty(leaf);
8180 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8181 rec->max_size, 1, 0);
8184 btrfs_release_path(path);
8187 if (back->is_data) {
8191 dback = to_data_backref(back);
8192 if (back->full_backref)
8193 parent = dback->parent;
8197 for (i = 0; i < dback->found_ref; i++) {
8198 /* if parent != 0, we're doing a full backref
8199 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8200 * just makes the backref allocator create a data
8203 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8204 rec->start, rec->max_size,
8208 BTRFS_FIRST_FREE_OBJECTID :
8214 fprintf(stderr, "adding new data backref"
8215 " on %llu %s %llu owner %llu"
8216 " offset %llu found %d\n",
8217 (unsigned long long)rec->start,
8218 back->full_backref ?
8220 back->full_backref ?
8221 (unsigned long long)parent :
8222 (unsigned long long)dback->root,
8223 (unsigned long long)dback->owner,
8224 (unsigned long long)dback->offset,
8228 struct tree_backref *tback;
8230 tback = to_tree_backref(back);
8231 if (back->full_backref)
8232 parent = tback->parent;
8236 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8237 rec->start, rec->max_size,
8238 parent, tback->root, 0, 0);
8239 fprintf(stderr, "adding new tree backref on "
8240 "start %llu len %llu parent %llu root %llu\n",
8241 rec->start, rec->max_size, parent, tback->root);
8244 btrfs_release_path(path);
8248 static struct extent_entry *find_entry(struct list_head *entries,
8249 u64 bytenr, u64 bytes)
8251 struct extent_entry *entry = NULL;
8253 list_for_each_entry(entry, entries, list) {
8254 if (entry->bytenr == bytenr && entry->bytes == bytes)
8261 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8263 struct extent_entry *entry, *best = NULL, *prev = NULL;
8265 list_for_each_entry(entry, entries, list) {
8267 * If there are as many broken entries as entries then we know
8268 * not to trust this particular entry.
8270 if (entry->broken == entry->count)
8274 * Special case, when there are only two entries and 'best' is
8284 * If our current entry == best then we can't be sure our best
8285 * is really the best, so we need to keep searching.
8287 if (best && best->count == entry->count) {
8293 /* Prev == entry, not good enough, have to keep searching */
8294 if (!prev->broken && prev->count == entry->count)
8298 best = (prev->count > entry->count) ? prev : entry;
8299 else if (best->count < entry->count)
8307 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8308 struct data_backref *dback, struct extent_entry *entry)
8310 struct btrfs_trans_handle *trans;
8311 struct btrfs_root *root;
8312 struct btrfs_file_extent_item *fi;
8313 struct extent_buffer *leaf;
8314 struct btrfs_key key;
8318 key.objectid = dback->root;
8319 key.type = BTRFS_ROOT_ITEM_KEY;
8320 key.offset = (u64)-1;
8321 root = btrfs_read_fs_root(info, &key);
8323 fprintf(stderr, "Couldn't find root for our ref\n");
8328 * The backref points to the original offset of the extent if it was
8329 * split, so we need to search down to the offset we have and then walk
8330 * forward until we find the backref we're looking for.
8332 key.objectid = dback->owner;
8333 key.type = BTRFS_EXTENT_DATA_KEY;
8334 key.offset = dback->offset;
8335 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8337 fprintf(stderr, "Error looking up ref %d\n", ret);
8342 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8343 ret = btrfs_next_leaf(root, path);
8345 fprintf(stderr, "Couldn't find our ref, next\n");
8349 leaf = path->nodes[0];
8350 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8351 if (key.objectid != dback->owner ||
8352 key.type != BTRFS_EXTENT_DATA_KEY) {
8353 fprintf(stderr, "Couldn't find our ref, search\n");
8356 fi = btrfs_item_ptr(leaf, path->slots[0],
8357 struct btrfs_file_extent_item);
8358 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8359 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8361 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8366 btrfs_release_path(path);
8368 trans = btrfs_start_transaction(root, 1);
8370 return PTR_ERR(trans);
8373 * Ok we have the key of the file extent we want to fix, now we can cow
8374 * down to the thing and fix it.
8376 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8378 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8379 key.objectid, key.type, key.offset, ret);
8383 fprintf(stderr, "Well that's odd, we just found this key "
8384 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8389 leaf = path->nodes[0];
8390 fi = btrfs_item_ptr(leaf, path->slots[0],
8391 struct btrfs_file_extent_item);
8393 if (btrfs_file_extent_compression(leaf, fi) &&
8394 dback->disk_bytenr != entry->bytenr) {
8395 fprintf(stderr, "Ref doesn't match the record start and is "
8396 "compressed, please take a btrfs-image of this file "
8397 "system and send it to a btrfs developer so they can "
8398 "complete this functionality for bytenr %Lu\n",
8399 dback->disk_bytenr);
8404 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8405 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8406 } else if (dback->disk_bytenr > entry->bytenr) {
8407 u64 off_diff, offset;
8409 off_diff = dback->disk_bytenr - entry->bytenr;
8410 offset = btrfs_file_extent_offset(leaf, fi);
8411 if (dback->disk_bytenr + offset +
8412 btrfs_file_extent_num_bytes(leaf, fi) >
8413 entry->bytenr + entry->bytes) {
8414 fprintf(stderr, "Ref is past the entry end, please "
8415 "take a btrfs-image of this file system and "
8416 "send it to a btrfs developer, ref %Lu\n",
8417 dback->disk_bytenr);
8422 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8423 btrfs_set_file_extent_offset(leaf, fi, offset);
8424 } else if (dback->disk_bytenr < entry->bytenr) {
8427 offset = btrfs_file_extent_offset(leaf, fi);
8428 if (dback->disk_bytenr + offset < entry->bytenr) {
8429 fprintf(stderr, "Ref is before the entry start, please"
8430 " take a btrfs-image of this file system and "
8431 "send it to a btrfs developer, ref %Lu\n",
8432 dback->disk_bytenr);
8437 offset += dback->disk_bytenr;
8438 offset -= entry->bytenr;
8439 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8440 btrfs_set_file_extent_offset(leaf, fi, offset);
8443 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8446 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8447 * only do this if we aren't using compression, otherwise it's a
8450 if (!btrfs_file_extent_compression(leaf, fi))
8451 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8453 printf("ram bytes may be wrong?\n");
8454 btrfs_mark_buffer_dirty(leaf);
8456 err = btrfs_commit_transaction(trans, root);
8457 btrfs_release_path(path);
8458 return ret ? ret : err;
8461 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8462 struct extent_record *rec)
8464 struct extent_backref *back;
8465 struct data_backref *dback;
8466 struct extent_entry *entry, *best = NULL;
8469 int broken_entries = 0;
8474 * Metadata is easy and the backrefs should always agree on bytenr and
8475 * size, if not we've got bigger issues.
8480 list_for_each_entry(back, &rec->backrefs, list) {
8481 if (back->full_backref || !back->is_data)
8484 dback = to_data_backref(back);
8487 * We only pay attention to backrefs that we found a real
8490 if (dback->found_ref == 0)
8494 * For now we only catch when the bytes don't match, not the
8495 * bytenr. We can easily do this at the same time, but I want
8496 * to have a fs image to test on before we just add repair
8497 * functionality willy-nilly so we know we won't screw up the
8501 entry = find_entry(&entries, dback->disk_bytenr,
8504 entry = malloc(sizeof(struct extent_entry));
8509 memset(entry, 0, sizeof(*entry));
8510 entry->bytenr = dback->disk_bytenr;
8511 entry->bytes = dback->bytes;
8512 list_add_tail(&entry->list, &entries);
8517 * If we only have on entry we may think the entries agree when
8518 * in reality they don't so we have to do some extra checking.
8520 if (dback->disk_bytenr != rec->start ||
8521 dback->bytes != rec->nr || back->broken)
8532 /* Yay all the backrefs agree, carry on good sir */
8533 if (nr_entries <= 1 && !mismatch)
8536 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8537 "%Lu\n", rec->start);
8540 * First we want to see if the backrefs can agree amongst themselves who
8541 * is right, so figure out which one of the entries has the highest
8544 best = find_most_right_entry(&entries);
8547 * Ok so we may have an even split between what the backrefs think, so
8548 * this is where we use the extent ref to see what it thinks.
8551 entry = find_entry(&entries, rec->start, rec->nr);
8552 if (!entry && (!broken_entries || !rec->found_rec)) {
8553 fprintf(stderr, "Backrefs don't agree with each other "
8554 "and extent record doesn't agree with anybody,"
8555 " so we can't fix bytenr %Lu bytes %Lu\n",
8556 rec->start, rec->nr);
8559 } else if (!entry) {
8561 * Ok our backrefs were broken, we'll assume this is the
8562 * correct value and add an entry for this range.
8564 entry = malloc(sizeof(struct extent_entry));
8569 memset(entry, 0, sizeof(*entry));
8570 entry->bytenr = rec->start;
8571 entry->bytes = rec->nr;
8572 list_add_tail(&entry->list, &entries);
8576 best = find_most_right_entry(&entries);
8578 fprintf(stderr, "Backrefs and extent record evenly "
8579 "split on who is right, this is going to "
8580 "require user input to fix bytenr %Lu bytes "
8581 "%Lu\n", rec->start, rec->nr);
8588 * I don't think this can happen currently as we'll abort() if we catch
8589 * this case higher up, but in case somebody removes that we still can't
8590 * deal with it properly here yet, so just bail out of that's the case.
8592 if (best->bytenr != rec->start) {
8593 fprintf(stderr, "Extent start and backref starts don't match, "
8594 "please use btrfs-image on this file system and send "
8595 "it to a btrfs developer so they can make fsck fix "
8596 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8597 rec->start, rec->nr);
8603 * Ok great we all agreed on an extent record, let's go find the real
8604 * references and fix up the ones that don't match.
8606 list_for_each_entry(back, &rec->backrefs, list) {
8607 if (back->full_backref || !back->is_data)
8610 dback = to_data_backref(back);
8613 * Still ignoring backrefs that don't have a real ref attached
8616 if (dback->found_ref == 0)
8619 if (dback->bytes == best->bytes &&
8620 dback->disk_bytenr == best->bytenr)
8623 ret = repair_ref(info, path, dback, best);
8629 * Ok we messed with the actual refs, which means we need to drop our
8630 * entire cache and go back and rescan. I know this is a huge pain and
8631 * adds a lot of extra work, but it's the only way to be safe. Once all
8632 * the backrefs agree we may not need to do anything to the extent
8637 while (!list_empty(&entries)) {
8638 entry = list_entry(entries.next, struct extent_entry, list);
8639 list_del_init(&entry->list);
8645 static int process_duplicates(struct cache_tree *extent_cache,
8646 struct extent_record *rec)
8648 struct extent_record *good, *tmp;
8649 struct cache_extent *cache;
8653 * If we found a extent record for this extent then return, or if we
8654 * have more than one duplicate we are likely going to need to delete
8657 if (rec->found_rec || rec->num_duplicates > 1)
8660 /* Shouldn't happen but just in case */
8661 BUG_ON(!rec->num_duplicates);
8664 * So this happens if we end up with a backref that doesn't match the
8665 * actual extent entry. So either the backref is bad or the extent
8666 * entry is bad. Either way we want to have the extent_record actually
8667 * reflect what we found in the extent_tree, so we need to take the
8668 * duplicate out and use that as the extent_record since the only way we
8669 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8671 remove_cache_extent(extent_cache, &rec->cache);
8673 good = to_extent_record(rec->dups.next);
8674 list_del_init(&good->list);
8675 INIT_LIST_HEAD(&good->backrefs);
8676 INIT_LIST_HEAD(&good->dups);
8677 good->cache.start = good->start;
8678 good->cache.size = good->nr;
8679 good->content_checked = 0;
8680 good->owner_ref_checked = 0;
8681 good->num_duplicates = 0;
8682 good->refs = rec->refs;
8683 list_splice_init(&rec->backrefs, &good->backrefs);
8685 cache = lookup_cache_extent(extent_cache, good->start,
8689 tmp = container_of(cache, struct extent_record, cache);
8692 * If we find another overlapping extent and it's found_rec is
8693 * set then it's a duplicate and we need to try and delete
8696 if (tmp->found_rec || tmp->num_duplicates > 0) {
8697 if (list_empty(&good->list))
8698 list_add_tail(&good->list,
8699 &duplicate_extents);
8700 good->num_duplicates += tmp->num_duplicates + 1;
8701 list_splice_init(&tmp->dups, &good->dups);
8702 list_del_init(&tmp->list);
8703 list_add_tail(&tmp->list, &good->dups);
8704 remove_cache_extent(extent_cache, &tmp->cache);
8709 * Ok we have another non extent item backed extent rec, so lets
8710 * just add it to this extent and carry on like we did above.
8712 good->refs += tmp->refs;
8713 list_splice_init(&tmp->backrefs, &good->backrefs);
8714 remove_cache_extent(extent_cache, &tmp->cache);
8717 ret = insert_cache_extent(extent_cache, &good->cache);
8720 return good->num_duplicates ? 0 : 1;
8723 static int delete_duplicate_records(struct btrfs_root *root,
8724 struct extent_record *rec)
8726 struct btrfs_trans_handle *trans;
8727 LIST_HEAD(delete_list);
8728 struct btrfs_path path;
8729 struct extent_record *tmp, *good, *n;
8732 struct btrfs_key key;
8734 btrfs_init_path(&path);
8737 /* Find the record that covers all of the duplicates. */
8738 list_for_each_entry(tmp, &rec->dups, list) {
8739 if (good->start < tmp->start)
8741 if (good->nr > tmp->nr)
8744 if (tmp->start + tmp->nr < good->start + good->nr) {
8745 fprintf(stderr, "Ok we have overlapping extents that "
8746 "aren't completely covered by each other, this "
8747 "is going to require more careful thought. "
8748 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8749 tmp->start, tmp->nr, good->start, good->nr);
8756 list_add_tail(&rec->list, &delete_list);
8758 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8761 list_move_tail(&tmp->list, &delete_list);
8764 root = root->fs_info->extent_root;
8765 trans = btrfs_start_transaction(root, 1);
8766 if (IS_ERR(trans)) {
8767 ret = PTR_ERR(trans);
8771 list_for_each_entry(tmp, &delete_list, list) {
8772 if (tmp->found_rec == 0)
8774 key.objectid = tmp->start;
8775 key.type = BTRFS_EXTENT_ITEM_KEY;
8776 key.offset = tmp->nr;
8778 /* Shouldn't happen but just in case */
8779 if (tmp->metadata) {
8780 fprintf(stderr, "Well this shouldn't happen, extent "
8781 "record overlaps but is metadata? "
8782 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8786 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8792 ret = btrfs_del_item(trans, root, &path);
8795 btrfs_release_path(&path);
8798 err = btrfs_commit_transaction(trans, root);
8802 while (!list_empty(&delete_list)) {
8803 tmp = to_extent_record(delete_list.next);
8804 list_del_init(&tmp->list);
8810 while (!list_empty(&rec->dups)) {
8811 tmp = to_extent_record(rec->dups.next);
8812 list_del_init(&tmp->list);
8816 btrfs_release_path(&path);
8818 if (!ret && !nr_del)
8819 rec->num_duplicates = 0;
8821 return ret ? ret : nr_del;
8824 static int find_possible_backrefs(struct btrfs_fs_info *info,
8825 struct btrfs_path *path,
8826 struct cache_tree *extent_cache,
8827 struct extent_record *rec)
8829 struct btrfs_root *root;
8830 struct extent_backref *back;
8831 struct data_backref *dback;
8832 struct cache_extent *cache;
8833 struct btrfs_file_extent_item *fi;
8834 struct btrfs_key key;
8838 list_for_each_entry(back, &rec->backrefs, list) {
8839 /* Don't care about full backrefs (poor unloved backrefs) */
8840 if (back->full_backref || !back->is_data)
8843 dback = to_data_backref(back);
8845 /* We found this one, we don't need to do a lookup */
8846 if (dback->found_ref)
8849 key.objectid = dback->root;
8850 key.type = BTRFS_ROOT_ITEM_KEY;
8851 key.offset = (u64)-1;
8853 root = btrfs_read_fs_root(info, &key);
8855 /* No root, definitely a bad ref, skip */
8856 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8858 /* Other err, exit */
8860 return PTR_ERR(root);
8862 key.objectid = dback->owner;
8863 key.type = BTRFS_EXTENT_DATA_KEY;
8864 key.offset = dback->offset;
8865 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8867 btrfs_release_path(path);
8870 /* Didn't find it, we can carry on */
8875 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8876 struct btrfs_file_extent_item);
8877 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8878 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8879 btrfs_release_path(path);
8880 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8882 struct extent_record *tmp;
8883 tmp = container_of(cache, struct extent_record, cache);
8886 * If we found an extent record for the bytenr for this
8887 * particular backref then we can't add it to our
8888 * current extent record. We only want to add backrefs
8889 * that don't have a corresponding extent item in the
8890 * extent tree since they likely belong to this record
8891 * and we need to fix it if it doesn't match bytenrs.
8897 dback->found_ref += 1;
8898 dback->disk_bytenr = bytenr;
8899 dback->bytes = bytes;
8902 * Set this so the verify backref code knows not to trust the
8903 * values in this backref.
8912 * Record orphan data ref into corresponding root.
8914 * Return 0 if the extent item contains data ref and recorded.
8915 * Return 1 if the extent item contains no useful data ref
8916 * On that case, it may contains only shared_dataref or metadata backref
8917 * or the file extent exists(this should be handled by the extent bytenr
8919 * Return <0 if something goes wrong.
8921 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8922 struct extent_record *rec)
8924 struct btrfs_key key;
8925 struct btrfs_root *dest_root;
8926 struct extent_backref *back;
8927 struct data_backref *dback;
8928 struct orphan_data_extent *orphan;
8929 struct btrfs_path path;
8930 int recorded_data_ref = 0;
8935 btrfs_init_path(&path);
8936 list_for_each_entry(back, &rec->backrefs, list) {
8937 if (back->full_backref || !back->is_data ||
8938 !back->found_extent_tree)
8940 dback = to_data_backref(back);
8941 if (dback->found_ref)
8943 key.objectid = dback->root;
8944 key.type = BTRFS_ROOT_ITEM_KEY;
8945 key.offset = (u64)-1;
8947 dest_root = btrfs_read_fs_root(fs_info, &key);
8949 /* For non-exist root we just skip it */
8950 if (IS_ERR(dest_root) || !dest_root)
8953 key.objectid = dback->owner;
8954 key.type = BTRFS_EXTENT_DATA_KEY;
8955 key.offset = dback->offset;
8957 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8958 btrfs_release_path(&path);
8960 * For ret < 0, it's OK since the fs-tree may be corrupted,
8961 * we need to record it for inode/file extent rebuild.
8962 * For ret > 0, we record it only for file extent rebuild.
8963 * For ret == 0, the file extent exists but only bytenr
8964 * mismatch, let the original bytenr fix routine to handle,
8970 orphan = malloc(sizeof(*orphan));
8975 INIT_LIST_HEAD(&orphan->list);
8976 orphan->root = dback->root;
8977 orphan->objectid = dback->owner;
8978 orphan->offset = dback->offset;
8979 orphan->disk_bytenr = rec->cache.start;
8980 orphan->disk_len = rec->cache.size;
8981 list_add(&dest_root->orphan_data_extents, &orphan->list);
8982 recorded_data_ref = 1;
8985 btrfs_release_path(&path);
8987 return !recorded_data_ref;
8993 * when an incorrect extent item is found, this will delete
8994 * all of the existing entries for it and recreate them
8995 * based on what the tree scan found.
8997 static int fixup_extent_refs(struct btrfs_fs_info *info,
8998 struct cache_tree *extent_cache,
8999 struct extent_record *rec)
9001 struct btrfs_trans_handle *trans = NULL;
9003 struct btrfs_path path;
9004 struct list_head *cur = rec->backrefs.next;
9005 struct cache_extent *cache;
9006 struct extent_backref *back;
9010 if (rec->flag_block_full_backref)
9011 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9013 btrfs_init_path(&path);
9014 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9016 * Sometimes the backrefs themselves are so broken they don't
9017 * get attached to any meaningful rec, so first go back and
9018 * check any of our backrefs that we couldn't find and throw
9019 * them into the list if we find the backref so that
9020 * verify_backrefs can figure out what to do.
9022 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9027 /* step one, make sure all of the backrefs agree */
9028 ret = verify_backrefs(info, &path, rec);
9032 trans = btrfs_start_transaction(info->extent_root, 1);
9033 if (IS_ERR(trans)) {
9034 ret = PTR_ERR(trans);
9038 /* step two, delete all the existing records */
9039 ret = delete_extent_records(trans, info->extent_root, &path,
9045 /* was this block corrupt? If so, don't add references to it */
9046 cache = lookup_cache_extent(info->corrupt_blocks,
9047 rec->start, rec->max_size);
9053 /* step three, recreate all the refs we did find */
9054 while(cur != &rec->backrefs) {
9055 back = to_extent_backref(cur);
9059 * if we didn't find any references, don't create a
9062 if (!back->found_ref)
9065 rec->bad_full_backref = 0;
9066 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9074 int err = btrfs_commit_transaction(trans, info->extent_root);
9080 fprintf(stderr, "Repaired extent references for %llu\n",
9081 (unsigned long long)rec->start);
9083 btrfs_release_path(&path);
9087 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9088 struct extent_record *rec)
9090 struct btrfs_trans_handle *trans;
9091 struct btrfs_root *root = fs_info->extent_root;
9092 struct btrfs_path path;
9093 struct btrfs_extent_item *ei;
9094 struct btrfs_key key;
9098 key.objectid = rec->start;
9099 if (rec->metadata) {
9100 key.type = BTRFS_METADATA_ITEM_KEY;
9101 key.offset = rec->info_level;
9103 key.type = BTRFS_EXTENT_ITEM_KEY;
9104 key.offset = rec->max_size;
9107 trans = btrfs_start_transaction(root, 0);
9109 return PTR_ERR(trans);
9111 btrfs_init_path(&path);
9112 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9114 btrfs_release_path(&path);
9115 btrfs_commit_transaction(trans, root);
9118 fprintf(stderr, "Didn't find extent for %llu\n",
9119 (unsigned long long)rec->start);
9120 btrfs_release_path(&path);
9121 btrfs_commit_transaction(trans, root);
9125 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9126 struct btrfs_extent_item);
9127 flags = btrfs_extent_flags(path.nodes[0], ei);
9128 if (rec->flag_block_full_backref) {
9129 fprintf(stderr, "setting full backref on %llu\n",
9130 (unsigned long long)key.objectid);
9131 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9133 fprintf(stderr, "clearing full backref on %llu\n",
9134 (unsigned long long)key.objectid);
9135 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9137 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9138 btrfs_mark_buffer_dirty(path.nodes[0]);
9139 btrfs_release_path(&path);
9140 ret = btrfs_commit_transaction(trans, root);
9142 fprintf(stderr, "Repaired extent flags for %llu\n",
9143 (unsigned long long)rec->start);
9148 /* right now we only prune from the extent allocation tree */
9149 static int prune_one_block(struct btrfs_trans_handle *trans,
9150 struct btrfs_fs_info *info,
9151 struct btrfs_corrupt_block *corrupt)
9154 struct btrfs_path path;
9155 struct extent_buffer *eb;
9159 int level = corrupt->level + 1;
9161 btrfs_init_path(&path);
9163 /* we want to stop at the parent to our busted block */
9164 path.lowest_level = level;
9166 ret = btrfs_search_slot(trans, info->extent_root,
9167 &corrupt->key, &path, -1, 1);
9172 eb = path.nodes[level];
9179 * hopefully the search gave us the block we want to prune,
9180 * lets try that first
9182 slot = path.slots[level];
9183 found = btrfs_node_blockptr(eb, slot);
9184 if (found == corrupt->cache.start)
9187 nritems = btrfs_header_nritems(eb);
9189 /* the search failed, lets scan this node and hope we find it */
9190 for (slot = 0; slot < nritems; slot++) {
9191 found = btrfs_node_blockptr(eb, slot);
9192 if (found == corrupt->cache.start)
9196 * we couldn't find the bad block. TODO, search all the nodes for pointers
9199 if (eb == info->extent_root->node) {
9204 btrfs_release_path(&path);
9209 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9210 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9213 btrfs_release_path(&path);
9217 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9219 struct btrfs_trans_handle *trans = NULL;
9220 struct cache_extent *cache;
9221 struct btrfs_corrupt_block *corrupt;
9224 cache = search_cache_extent(info->corrupt_blocks, 0);
9228 trans = btrfs_start_transaction(info->extent_root, 1);
9230 return PTR_ERR(trans);
9232 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9233 prune_one_block(trans, info, corrupt);
9234 remove_cache_extent(info->corrupt_blocks, cache);
9237 return btrfs_commit_transaction(trans, info->extent_root);
9241 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9243 struct btrfs_block_group_cache *cache;
9248 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9249 &start, &end, EXTENT_DIRTY);
9252 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9257 cache = btrfs_lookup_first_block_group(fs_info, start);
9262 start = cache->key.objectid + cache->key.offset;
9266 static int check_extent_refs(struct btrfs_root *root,
9267 struct cache_tree *extent_cache)
9269 struct extent_record *rec;
9270 struct cache_extent *cache;
9276 * if we're doing a repair, we have to make sure
9277 * we don't allocate from the problem extents.
9278 * In the worst case, this will be all the
9281 cache = search_cache_extent(extent_cache, 0);
9283 rec = container_of(cache, struct extent_record, cache);
9284 set_extent_dirty(root->fs_info->excluded_extents,
9286 rec->start + rec->max_size - 1);
9287 cache = next_cache_extent(cache);
9290 /* pin down all the corrupted blocks too */
9291 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9293 set_extent_dirty(root->fs_info->excluded_extents,
9295 cache->start + cache->size - 1);
9296 cache = next_cache_extent(cache);
9298 prune_corrupt_blocks(root->fs_info);
9299 reset_cached_block_groups(root->fs_info);
9302 reset_cached_block_groups(root->fs_info);
9305 * We need to delete any duplicate entries we find first otherwise we
9306 * could mess up the extent tree when we have backrefs that actually
9307 * belong to a different extent item and not the weird duplicate one.
9309 while (repair && !list_empty(&duplicate_extents)) {
9310 rec = to_extent_record(duplicate_extents.next);
9311 list_del_init(&rec->list);
9313 /* Sometimes we can find a backref before we find an actual
9314 * extent, so we need to process it a little bit to see if there
9315 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9316 * if this is a backref screwup. If we need to delete stuff
9317 * process_duplicates() will return 0, otherwise it will return
9320 if (process_duplicates(extent_cache, rec))
9322 ret = delete_duplicate_records(root, rec);
9326 * delete_duplicate_records will return the number of entries
9327 * deleted, so if it's greater than 0 then we know we actually
9328 * did something and we need to remove.
9341 cache = search_cache_extent(extent_cache, 0);
9344 rec = container_of(cache, struct extent_record, cache);
9345 if (rec->num_duplicates) {
9346 fprintf(stderr, "extent item %llu has multiple extent "
9347 "items\n", (unsigned long long)rec->start);
9351 if (rec->refs != rec->extent_item_refs) {
9352 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9353 (unsigned long long)rec->start,
9354 (unsigned long long)rec->nr);
9355 fprintf(stderr, "extent item %llu, found %llu\n",
9356 (unsigned long long)rec->extent_item_refs,
9357 (unsigned long long)rec->refs);
9358 ret = record_orphan_data_extents(root->fs_info, rec);
9364 if (all_backpointers_checked(rec, 1)) {
9365 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9366 (unsigned long long)rec->start,
9367 (unsigned long long)rec->nr);
9371 if (!rec->owner_ref_checked) {
9372 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9373 (unsigned long long)rec->start,
9374 (unsigned long long)rec->nr);
9379 if (repair && fix) {
9380 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9386 if (rec->bad_full_backref) {
9387 fprintf(stderr, "bad full backref, on [%llu]\n",
9388 (unsigned long long)rec->start);
9390 ret = fixup_extent_flags(root->fs_info, rec);
9398 * Although it's not a extent ref's problem, we reuse this
9399 * routine for error reporting.
9400 * No repair function yet.
9402 if (rec->crossing_stripes) {
9404 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9405 rec->start, rec->start + rec->max_size);
9409 if (rec->wrong_chunk_type) {
9411 "bad extent [%llu, %llu), type mismatch with chunk\n",
9412 rec->start, rec->start + rec->max_size);
9416 remove_cache_extent(extent_cache, cache);
9417 free_all_extent_backrefs(rec);
9418 if (!init_extent_tree && repair && (!cur_err || fix))
9419 clear_extent_dirty(root->fs_info->excluded_extents,
9421 rec->start + rec->max_size - 1);
9426 if (ret && ret != -EAGAIN) {
9427 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9430 struct btrfs_trans_handle *trans;
9432 root = root->fs_info->extent_root;
9433 trans = btrfs_start_transaction(root, 1);
9434 if (IS_ERR(trans)) {
9435 ret = PTR_ERR(trans);
9439 btrfs_fix_block_accounting(trans, root);
9440 ret = btrfs_commit_transaction(trans, root);
9449 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9453 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9454 stripe_size = length;
9455 stripe_size /= num_stripes;
9456 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9457 stripe_size = length * 2;
9458 stripe_size /= num_stripes;
9459 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9460 stripe_size = length;
9461 stripe_size /= (num_stripes - 1);
9462 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9463 stripe_size = length;
9464 stripe_size /= (num_stripes - 2);
9466 stripe_size = length;
9472 * Check the chunk with its block group/dev list ref:
9473 * Return 0 if all refs seems valid.
9474 * Return 1 if part of refs seems valid, need later check for rebuild ref
9475 * like missing block group and needs to search extent tree to rebuild them.
9476 * Return -1 if essential refs are missing and unable to rebuild.
9478 static int check_chunk_refs(struct chunk_record *chunk_rec,
9479 struct block_group_tree *block_group_cache,
9480 struct device_extent_tree *dev_extent_cache,
9483 struct cache_extent *block_group_item;
9484 struct block_group_record *block_group_rec;
9485 struct cache_extent *dev_extent_item;
9486 struct device_extent_record *dev_extent_rec;
9490 int metadump_v2 = 0;
9494 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9497 if (block_group_item) {
9498 block_group_rec = container_of(block_group_item,
9499 struct block_group_record,
9501 if (chunk_rec->length != block_group_rec->offset ||
9502 chunk_rec->offset != block_group_rec->objectid ||
9504 chunk_rec->type_flags != block_group_rec->flags)) {
9507 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9508 chunk_rec->objectid,
9513 chunk_rec->type_flags,
9514 block_group_rec->objectid,
9515 block_group_rec->type,
9516 block_group_rec->offset,
9517 block_group_rec->offset,
9518 block_group_rec->objectid,
9519 block_group_rec->flags);
9522 list_del_init(&block_group_rec->list);
9523 chunk_rec->bg_rec = block_group_rec;
9528 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9529 chunk_rec->objectid,
9534 chunk_rec->type_flags);
9541 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9542 chunk_rec->num_stripes);
9543 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9544 devid = chunk_rec->stripes[i].devid;
9545 offset = chunk_rec->stripes[i].offset;
9546 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9547 devid, offset, length);
9548 if (dev_extent_item) {
9549 dev_extent_rec = container_of(dev_extent_item,
9550 struct device_extent_record,
9552 if (dev_extent_rec->objectid != devid ||
9553 dev_extent_rec->offset != offset ||
9554 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9555 dev_extent_rec->length != length) {
9558 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9559 chunk_rec->objectid,
9562 chunk_rec->stripes[i].devid,
9563 chunk_rec->stripes[i].offset,
9564 dev_extent_rec->objectid,
9565 dev_extent_rec->offset,
9566 dev_extent_rec->length);
9569 list_move(&dev_extent_rec->chunk_list,
9570 &chunk_rec->dextents);
9575 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9576 chunk_rec->objectid,
9579 chunk_rec->stripes[i].devid,
9580 chunk_rec->stripes[i].offset);
9587 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9588 int check_chunks(struct cache_tree *chunk_cache,
9589 struct block_group_tree *block_group_cache,
9590 struct device_extent_tree *dev_extent_cache,
9591 struct list_head *good, struct list_head *bad,
9592 struct list_head *rebuild, int silent)
9594 struct cache_extent *chunk_item;
9595 struct chunk_record *chunk_rec;
9596 struct block_group_record *bg_rec;
9597 struct device_extent_record *dext_rec;
9601 chunk_item = first_cache_extent(chunk_cache);
9602 while (chunk_item) {
9603 chunk_rec = container_of(chunk_item, struct chunk_record,
9605 err = check_chunk_refs(chunk_rec, block_group_cache,
9606 dev_extent_cache, silent);
9609 if (err == 0 && good)
9610 list_add_tail(&chunk_rec->list, good);
9611 if (err > 0 && rebuild)
9612 list_add_tail(&chunk_rec->list, rebuild);
9614 list_add_tail(&chunk_rec->list, bad);
9615 chunk_item = next_cache_extent(chunk_item);
9618 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9621 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9629 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9633 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9644 static int check_device_used(struct device_record *dev_rec,
9645 struct device_extent_tree *dext_cache)
9647 struct cache_extent *cache;
9648 struct device_extent_record *dev_extent_rec;
9651 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9653 dev_extent_rec = container_of(cache,
9654 struct device_extent_record,
9656 if (dev_extent_rec->objectid != dev_rec->devid)
9659 list_del_init(&dev_extent_rec->device_list);
9660 total_byte += dev_extent_rec->length;
9661 cache = next_cache_extent(cache);
9664 if (total_byte != dev_rec->byte_used) {
9666 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9667 total_byte, dev_rec->byte_used, dev_rec->objectid,
9668 dev_rec->type, dev_rec->offset);
9675 /* check btrfs_dev_item -> btrfs_dev_extent */
9676 static int check_devices(struct rb_root *dev_cache,
9677 struct device_extent_tree *dev_extent_cache)
9679 struct rb_node *dev_node;
9680 struct device_record *dev_rec;
9681 struct device_extent_record *dext_rec;
9685 dev_node = rb_first(dev_cache);
9687 dev_rec = container_of(dev_node, struct device_record, node);
9688 err = check_device_used(dev_rec, dev_extent_cache);
9692 dev_node = rb_next(dev_node);
9694 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9697 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9698 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9705 static int add_root_item_to_list(struct list_head *head,
9706 u64 objectid, u64 bytenr, u64 last_snapshot,
9707 u8 level, u8 drop_level,
9708 int level_size, struct btrfs_key *drop_key)
9711 struct root_item_record *ri_rec;
9712 ri_rec = malloc(sizeof(*ri_rec));
9715 ri_rec->bytenr = bytenr;
9716 ri_rec->objectid = objectid;
9717 ri_rec->level = level;
9718 ri_rec->level_size = level_size;
9719 ri_rec->drop_level = drop_level;
9720 ri_rec->last_snapshot = last_snapshot;
9722 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9723 list_add_tail(&ri_rec->list, head);
9728 static void free_root_item_list(struct list_head *list)
9730 struct root_item_record *ri_rec;
9732 while (!list_empty(list)) {
9733 ri_rec = list_first_entry(list, struct root_item_record,
9735 list_del_init(&ri_rec->list);
9740 static int deal_root_from_list(struct list_head *list,
9741 struct btrfs_root *root,
9742 struct block_info *bits,
9744 struct cache_tree *pending,
9745 struct cache_tree *seen,
9746 struct cache_tree *reada,
9747 struct cache_tree *nodes,
9748 struct cache_tree *extent_cache,
9749 struct cache_tree *chunk_cache,
9750 struct rb_root *dev_cache,
9751 struct block_group_tree *block_group_cache,
9752 struct device_extent_tree *dev_extent_cache)
9757 while (!list_empty(list)) {
9758 struct root_item_record *rec;
9759 struct extent_buffer *buf;
9760 rec = list_entry(list->next,
9761 struct root_item_record, list);
9763 buf = read_tree_block(root->fs_info->tree_root,
9764 rec->bytenr, rec->level_size, 0);
9765 if (!extent_buffer_uptodate(buf)) {
9766 free_extent_buffer(buf);
9770 ret = add_root_to_pending(buf, extent_cache, pending,
9771 seen, nodes, rec->objectid);
9775 * To rebuild extent tree, we need deal with snapshot
9776 * one by one, otherwise we deal with node firstly which
9777 * can maximize readahead.
9780 ret = run_next_block(root, bits, bits_nr, &last,
9781 pending, seen, reada, nodes,
9782 extent_cache, chunk_cache,
9783 dev_cache, block_group_cache,
9784 dev_extent_cache, rec);
9788 free_extent_buffer(buf);
9789 list_del(&rec->list);
9795 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9796 reada, nodes, extent_cache, chunk_cache,
9797 dev_cache, block_group_cache,
9798 dev_extent_cache, NULL);
9808 static int check_chunks_and_extents(struct btrfs_root *root)
9810 struct rb_root dev_cache;
9811 struct cache_tree chunk_cache;
9812 struct block_group_tree block_group_cache;
9813 struct device_extent_tree dev_extent_cache;
9814 struct cache_tree extent_cache;
9815 struct cache_tree seen;
9816 struct cache_tree pending;
9817 struct cache_tree reada;
9818 struct cache_tree nodes;
9819 struct extent_io_tree excluded_extents;
9820 struct cache_tree corrupt_blocks;
9821 struct btrfs_path path;
9822 struct btrfs_key key;
9823 struct btrfs_key found_key;
9825 struct block_info *bits;
9827 struct extent_buffer *leaf;
9829 struct btrfs_root_item ri;
9830 struct list_head dropping_trees;
9831 struct list_head normal_trees;
9832 struct btrfs_root *root1;
9837 dev_cache = RB_ROOT;
9838 cache_tree_init(&chunk_cache);
9839 block_group_tree_init(&block_group_cache);
9840 device_extent_tree_init(&dev_extent_cache);
9842 cache_tree_init(&extent_cache);
9843 cache_tree_init(&seen);
9844 cache_tree_init(&pending);
9845 cache_tree_init(&nodes);
9846 cache_tree_init(&reada);
9847 cache_tree_init(&corrupt_blocks);
9848 extent_io_tree_init(&excluded_extents);
9849 INIT_LIST_HEAD(&dropping_trees);
9850 INIT_LIST_HEAD(&normal_trees);
9853 root->fs_info->excluded_extents = &excluded_extents;
9854 root->fs_info->fsck_extent_cache = &extent_cache;
9855 root->fs_info->free_extent_hook = free_extent_hook;
9856 root->fs_info->corrupt_blocks = &corrupt_blocks;
9860 bits = malloc(bits_nr * sizeof(struct block_info));
9866 if (ctx.progress_enabled) {
9867 ctx.tp = TASK_EXTENTS;
9868 task_start(ctx.info);
9872 root1 = root->fs_info->tree_root;
9873 level = btrfs_header_level(root1->node);
9874 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9875 root1->node->start, 0, level, 0,
9876 root1->nodesize, NULL);
9879 root1 = root->fs_info->chunk_root;
9880 level = btrfs_header_level(root1->node);
9881 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9882 root1->node->start, 0, level, 0,
9883 root1->nodesize, NULL);
9886 btrfs_init_path(&path);
9889 key.type = BTRFS_ROOT_ITEM_KEY;
9890 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9895 leaf = path.nodes[0];
9896 slot = path.slots[0];
9897 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9898 ret = btrfs_next_leaf(root, &path);
9901 leaf = path.nodes[0];
9902 slot = path.slots[0];
9904 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9905 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9906 unsigned long offset;
9909 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9910 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9911 last_snapshot = btrfs_root_last_snapshot(&ri);
9912 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9913 level = btrfs_root_level(&ri);
9914 level_size = root->nodesize;
9915 ret = add_root_item_to_list(&normal_trees,
9917 btrfs_root_bytenr(&ri),
9918 last_snapshot, level,
9919 0, level_size, NULL);
9923 level = btrfs_root_level(&ri);
9924 level_size = root->nodesize;
9925 objectid = found_key.objectid;
9926 btrfs_disk_key_to_cpu(&found_key,
9928 ret = add_root_item_to_list(&dropping_trees,
9930 btrfs_root_bytenr(&ri),
9931 last_snapshot, level,
9933 level_size, &found_key);
9940 btrfs_release_path(&path);
9943 * check_block can return -EAGAIN if it fixes something, please keep
9944 * this in mind when dealing with return values from these functions, if
9945 * we get -EAGAIN we want to fall through and restart the loop.
9947 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9948 &seen, &reada, &nodes, &extent_cache,
9949 &chunk_cache, &dev_cache, &block_group_cache,
9956 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9957 &pending, &seen, &reada, &nodes,
9958 &extent_cache, &chunk_cache, &dev_cache,
9959 &block_group_cache, &dev_extent_cache);
9966 ret = check_chunks(&chunk_cache, &block_group_cache,
9967 &dev_extent_cache, NULL, NULL, NULL, 0);
9974 ret = check_extent_refs(root, &extent_cache);
9981 ret = check_devices(&dev_cache, &dev_extent_cache);
9986 task_stop(ctx.info);
9988 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9989 extent_io_tree_cleanup(&excluded_extents);
9990 root->fs_info->fsck_extent_cache = NULL;
9991 root->fs_info->free_extent_hook = NULL;
9992 root->fs_info->corrupt_blocks = NULL;
9993 root->fs_info->excluded_extents = NULL;
9996 free_chunk_cache_tree(&chunk_cache);
9997 free_device_cache_tree(&dev_cache);
9998 free_block_group_tree(&block_group_cache);
9999 free_device_extent_tree(&dev_extent_cache);
10000 free_extent_cache_tree(&seen);
10001 free_extent_cache_tree(&pending);
10002 free_extent_cache_tree(&reada);
10003 free_extent_cache_tree(&nodes);
10004 free_root_item_list(&normal_trees);
10005 free_root_item_list(&dropping_trees);
10008 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10009 free_extent_cache_tree(&seen);
10010 free_extent_cache_tree(&pending);
10011 free_extent_cache_tree(&reada);
10012 free_extent_cache_tree(&nodes);
10013 free_chunk_cache_tree(&chunk_cache);
10014 free_block_group_tree(&block_group_cache);
10015 free_device_cache_tree(&dev_cache);
10016 free_device_extent_tree(&dev_extent_cache);
10017 free_extent_record_cache(&extent_cache);
10018 free_root_item_list(&normal_trees);
10019 free_root_item_list(&dropping_trees);
10020 extent_io_tree_cleanup(&excluded_extents);
10025 * Check backrefs of a tree block given by @bytenr or @eb.
10027 * @root: the root containing the @bytenr or @eb
10028 * @eb: tree block extent buffer, can be NULL
10029 * @bytenr: bytenr of the tree block to search
10030 * @level: tree level of the tree block
10031 * @owner: owner of the tree block
10033 * Return >0 for any error found and output error message
10034 * Return 0 for no error found
10036 static int check_tree_block_ref(struct btrfs_root *root,
10037 struct extent_buffer *eb, u64 bytenr,
10038 int level, u64 owner)
10040 struct btrfs_key key;
10041 struct btrfs_root *extent_root = root->fs_info->extent_root;
10042 struct btrfs_path path;
10043 struct btrfs_extent_item *ei;
10044 struct btrfs_extent_inline_ref *iref;
10045 struct extent_buffer *leaf;
10051 u32 nodesize = root->nodesize;
10054 int tree_reloc_root = 0;
10059 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10060 btrfs_header_bytenr(root->node) == bytenr)
10061 tree_reloc_root = 1;
10063 btrfs_init_path(&path);
10064 key.objectid = bytenr;
10065 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10066 key.type = BTRFS_METADATA_ITEM_KEY;
10068 key.type = BTRFS_EXTENT_ITEM_KEY;
10069 key.offset = (u64)-1;
10071 /* Search for the backref in extent tree */
10072 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10074 err |= BACKREF_MISSING;
10077 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10079 err |= BACKREF_MISSING;
10083 leaf = path.nodes[0];
10084 slot = path.slots[0];
10085 btrfs_item_key_to_cpu(leaf, &key, slot);
10087 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10089 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10090 skinny_level = (int)key.offset;
10091 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10093 struct btrfs_tree_block_info *info;
10095 info = (struct btrfs_tree_block_info *)(ei + 1);
10096 skinny_level = btrfs_tree_block_level(leaf, info);
10097 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10104 if (!(btrfs_extent_flags(leaf, ei) &
10105 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10107 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10108 key.objectid, nodesize,
10109 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10110 err = BACKREF_MISMATCH;
10112 header_gen = btrfs_header_generation(eb);
10113 extent_gen = btrfs_extent_generation(leaf, ei);
10114 if (header_gen != extent_gen) {
10116 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10117 key.objectid, nodesize, header_gen,
10119 err = BACKREF_MISMATCH;
10121 if (level != skinny_level) {
10123 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10124 key.objectid, nodesize, level, skinny_level);
10125 err = BACKREF_MISMATCH;
10127 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10129 "extent[%llu %u] is referred by other roots than %llu",
10130 key.objectid, nodesize, root->objectid);
10131 err = BACKREF_MISMATCH;
10136 * Iterate the extent/metadata item to find the exact backref
10138 item_size = btrfs_item_size_nr(leaf, slot);
10139 ptr = (unsigned long)iref;
10140 end = (unsigned long)ei + item_size;
10141 while (ptr < end) {
10142 iref = (struct btrfs_extent_inline_ref *)ptr;
10143 type = btrfs_extent_inline_ref_type(leaf, iref);
10144 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10146 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10147 (offset == root->objectid || offset == owner)) {
10149 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10151 * Backref of tree reloc root points to itself, no need
10152 * to check backref any more.
10154 if (tree_reloc_root)
10157 /* Check if the backref points to valid referencer */
10158 found_ref = !check_tree_block_ref(root, NULL,
10159 offset, level + 1, owner);
10164 ptr += btrfs_extent_inline_ref_size(type);
10168 * Inlined extent item doesn't have what we need, check
10169 * TREE_BLOCK_REF_KEY
10172 btrfs_release_path(&path);
10173 key.objectid = bytenr;
10174 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10175 key.offset = root->objectid;
10177 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10182 err |= BACKREF_MISSING;
10184 btrfs_release_path(&path);
10185 if (eb && (err & BACKREF_MISSING))
10186 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10187 bytenr, nodesize, owner, level);
10192 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10194 * Return >0 any error found and output error message
10195 * Return 0 for no error found
10197 static int check_extent_data_item(struct btrfs_root *root,
10198 struct extent_buffer *eb, int slot)
10200 struct btrfs_file_extent_item *fi;
10201 struct btrfs_path path;
10202 struct btrfs_root *extent_root = root->fs_info->extent_root;
10203 struct btrfs_key fi_key;
10204 struct btrfs_key dbref_key;
10205 struct extent_buffer *leaf;
10206 struct btrfs_extent_item *ei;
10207 struct btrfs_extent_inline_ref *iref;
10208 struct btrfs_extent_data_ref *dref;
10211 u64 disk_num_bytes;
10212 u64 extent_num_bytes;
10219 int found_dbackref = 0;
10223 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10224 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10226 /* Nothing to check for hole and inline data extents */
10227 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10228 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10231 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10232 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10233 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10235 /* Check unaligned disk_num_bytes and num_bytes */
10236 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
10238 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10239 fi_key.objectid, fi_key.offset, disk_num_bytes,
10241 err |= BYTES_UNALIGNED;
10243 data_bytes_allocated += disk_num_bytes;
10245 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
10247 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10248 fi_key.objectid, fi_key.offset, extent_num_bytes,
10250 err |= BYTES_UNALIGNED;
10252 data_bytes_referenced += extent_num_bytes;
10254 owner = btrfs_header_owner(eb);
10256 /* Check the extent item of the file extent in extent tree */
10257 btrfs_init_path(&path);
10258 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10259 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10260 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10262 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10266 leaf = path.nodes[0];
10267 slot = path.slots[0];
10268 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10270 extent_flags = btrfs_extent_flags(leaf, ei);
10272 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10274 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10275 disk_bytenr, disk_num_bytes,
10276 BTRFS_EXTENT_FLAG_DATA);
10277 err |= BACKREF_MISMATCH;
10280 /* Check data backref inside that extent item */
10281 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10282 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10283 ptr = (unsigned long)iref;
10284 end = (unsigned long)ei + item_size;
10285 while (ptr < end) {
10286 iref = (struct btrfs_extent_inline_ref *)ptr;
10287 type = btrfs_extent_inline_ref_type(leaf, iref);
10288 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10290 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10291 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10292 if (ref_root == owner || ref_root == root->objectid)
10293 found_dbackref = 1;
10294 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10295 found_dbackref = !check_tree_block_ref(root, NULL,
10296 btrfs_extent_inline_ref_offset(leaf, iref),
10300 if (found_dbackref)
10302 ptr += btrfs_extent_inline_ref_size(type);
10305 if (!found_dbackref) {
10306 btrfs_release_path(&path);
10308 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10309 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10310 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10311 dbref_key.offset = hash_extent_data_ref(root->objectid,
10312 fi_key.objectid, fi_key.offset);
10314 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10315 &dbref_key, &path, 0, 0);
10317 found_dbackref = 1;
10321 btrfs_release_path(&path);
10324 * Neither inlined nor EXTENT_DATA_REF found, try
10325 * SHARED_DATA_REF as last chance.
10327 dbref_key.objectid = disk_bytenr;
10328 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10329 dbref_key.offset = eb->start;
10331 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10332 &dbref_key, &path, 0, 0);
10334 found_dbackref = 1;
10340 if (!found_dbackref)
10341 err |= BACKREF_MISSING;
10342 btrfs_release_path(&path);
10343 if (err & BACKREF_MISSING) {
10344 error("data extent[%llu %llu] backref lost",
10345 disk_bytenr, disk_num_bytes);
10351 * Get real tree block level for the case like shared block
10352 * Return >= 0 as tree level
10353 * Return <0 for error
10355 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10357 struct extent_buffer *eb;
10358 struct btrfs_path path;
10359 struct btrfs_key key;
10360 struct btrfs_extent_item *ei;
10363 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10368 /* Search extent tree for extent generation and level */
10369 key.objectid = bytenr;
10370 key.type = BTRFS_METADATA_ITEM_KEY;
10371 key.offset = (u64)-1;
10373 btrfs_init_path(&path);
10374 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10377 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10385 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10386 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10387 struct btrfs_extent_item);
10388 flags = btrfs_extent_flags(path.nodes[0], ei);
10389 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10394 /* Get transid for later read_tree_block() check */
10395 transid = btrfs_extent_generation(path.nodes[0], ei);
10397 /* Get backref level as one source */
10398 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10399 backref_level = key.offset;
10401 struct btrfs_tree_block_info *info;
10403 info = (struct btrfs_tree_block_info *)(ei + 1);
10404 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10406 btrfs_release_path(&path);
10408 /* Get level from tree block as an alternative source */
10409 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10410 if (!extent_buffer_uptodate(eb)) {
10411 free_extent_buffer(eb);
10414 header_level = btrfs_header_level(eb);
10415 free_extent_buffer(eb);
10417 if (header_level != backref_level)
10419 return header_level;
10422 btrfs_release_path(&path);
10427 * Check if a tree block backref is valid (points to a valid tree block)
10428 * if level == -1, level will be resolved
10429 * Return >0 for any error found and print error message
10431 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10432 u64 bytenr, int level)
10434 struct btrfs_root *root;
10435 struct btrfs_key key;
10436 struct btrfs_path path;
10437 struct extent_buffer *eb;
10438 struct extent_buffer *node;
10439 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10443 /* Query level for level == -1 special case */
10445 level = query_tree_block_level(fs_info, bytenr);
10447 err |= REFERENCER_MISSING;
10451 key.objectid = root_id;
10452 key.type = BTRFS_ROOT_ITEM_KEY;
10453 key.offset = (u64)-1;
10455 root = btrfs_read_fs_root(fs_info, &key);
10456 if (IS_ERR(root)) {
10457 err |= REFERENCER_MISSING;
10461 /* Read out the tree block to get item/node key */
10462 eb = read_tree_block(root, bytenr, root->nodesize, 0);
10463 if (!extent_buffer_uptodate(eb)) {
10464 err |= REFERENCER_MISSING;
10465 free_extent_buffer(eb);
10469 /* Empty tree, no need to check key */
10470 if (!btrfs_header_nritems(eb) && !level) {
10471 free_extent_buffer(eb);
10476 btrfs_node_key_to_cpu(eb, &key, 0);
10478 btrfs_item_key_to_cpu(eb, &key, 0);
10480 free_extent_buffer(eb);
10482 btrfs_init_path(&path);
10483 path.lowest_level = level;
10484 /* Search with the first key, to ensure we can reach it */
10485 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10487 err |= REFERENCER_MISSING;
10491 node = path.nodes[level];
10492 if (btrfs_header_bytenr(node) != bytenr) {
10494 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10495 bytenr, nodesize, bytenr,
10496 btrfs_header_bytenr(node));
10497 err |= REFERENCER_MISMATCH;
10499 if (btrfs_header_level(node) != level) {
10501 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10502 bytenr, nodesize, level,
10503 btrfs_header_level(node));
10504 err |= REFERENCER_MISMATCH;
10508 btrfs_release_path(&path);
10510 if (err & REFERENCER_MISSING) {
10512 error("extent [%llu %d] lost referencer (owner: %llu)",
10513 bytenr, nodesize, root_id);
10516 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10517 bytenr, nodesize, root_id, level);
10524 * Check if tree block @eb is tree reloc root.
10525 * Return 0 if it's not or any problem happens
10526 * Return 1 if it's a tree reloc root
10528 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10529 struct extent_buffer *eb)
10531 struct btrfs_root *tree_reloc_root;
10532 struct btrfs_key key;
10533 u64 bytenr = btrfs_header_bytenr(eb);
10534 u64 owner = btrfs_header_owner(eb);
10537 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10538 key.offset = owner;
10539 key.type = BTRFS_ROOT_ITEM_KEY;
10541 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10542 if (IS_ERR(tree_reloc_root))
10545 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10547 btrfs_free_fs_root(tree_reloc_root);
10552 * Check referencer for shared block backref
10553 * If level == -1, this function will resolve the level.
10555 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10556 u64 parent, u64 bytenr, int level)
10558 struct extent_buffer *eb;
10559 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10561 int found_parent = 0;
10564 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10565 if (!extent_buffer_uptodate(eb))
10569 level = query_tree_block_level(fs_info, bytenr);
10573 /* It's possible it's a tree reloc root */
10574 if (parent == bytenr) {
10575 if (is_tree_reloc_root(fs_info, eb))
10580 if (level + 1 != btrfs_header_level(eb))
10583 nr = btrfs_header_nritems(eb);
10584 for (i = 0; i < nr; i++) {
10585 if (bytenr == btrfs_node_blockptr(eb, i)) {
10591 free_extent_buffer(eb);
10592 if (!found_parent) {
10594 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10595 bytenr, nodesize, parent, level);
10596 return REFERENCER_MISSING;
10602 * Check referencer for normal (inlined) data ref
10603 * If len == 0, it will be resolved by searching in extent tree
10605 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10606 u64 root_id, u64 objectid, u64 offset,
10607 u64 bytenr, u64 len, u32 count)
10609 struct btrfs_root *root;
10610 struct btrfs_root *extent_root = fs_info->extent_root;
10611 struct btrfs_key key;
10612 struct btrfs_path path;
10613 struct extent_buffer *leaf;
10614 struct btrfs_file_extent_item *fi;
10615 u32 found_count = 0;
10620 key.objectid = bytenr;
10621 key.type = BTRFS_EXTENT_ITEM_KEY;
10622 key.offset = (u64)-1;
10624 btrfs_init_path(&path);
10625 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10628 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10631 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10632 if (key.objectid != bytenr ||
10633 key.type != BTRFS_EXTENT_ITEM_KEY)
10636 btrfs_release_path(&path);
10638 key.objectid = root_id;
10639 key.type = BTRFS_ROOT_ITEM_KEY;
10640 key.offset = (u64)-1;
10641 btrfs_init_path(&path);
10643 root = btrfs_read_fs_root(fs_info, &key);
10647 key.objectid = objectid;
10648 key.type = BTRFS_EXTENT_DATA_KEY;
10650 * It can be nasty as data backref offset is
10651 * file offset - file extent offset, which is smaller or
10652 * equal to original backref offset. The only special case is
10653 * overflow. So we need to special check and do further search.
10655 key.offset = offset & (1ULL << 63) ? 0 : offset;
10657 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10662 * Search afterwards to get correct one
10663 * NOTE: As we must do a comprehensive check on the data backref to
10664 * make sure the dref count also matches, we must iterate all file
10665 * extents for that inode.
10668 leaf = path.nodes[0];
10669 slot = path.slots[0];
10671 if (slot >= btrfs_header_nritems(leaf))
10673 btrfs_item_key_to_cpu(leaf, &key, slot);
10674 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10676 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10678 * Except normal disk bytenr and disk num bytes, we still
10679 * need to do extra check on dbackref offset as
10680 * dbackref offset = file_offset - file_extent_offset
10682 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10683 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10684 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10689 ret = btrfs_next_item(root, &path);
10694 btrfs_release_path(&path);
10695 if (found_count != count) {
10697 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10698 bytenr, len, root_id, objectid, offset, count, found_count);
10699 return REFERENCER_MISSING;
10705 * Check if the referencer of a shared data backref exists
10707 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10708 u64 parent, u64 bytenr)
10710 struct extent_buffer *eb;
10711 struct btrfs_key key;
10712 struct btrfs_file_extent_item *fi;
10713 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10715 int found_parent = 0;
10718 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10719 if (!extent_buffer_uptodate(eb))
10722 nr = btrfs_header_nritems(eb);
10723 for (i = 0; i < nr; i++) {
10724 btrfs_item_key_to_cpu(eb, &key, i);
10725 if (key.type != BTRFS_EXTENT_DATA_KEY)
10728 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10729 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10732 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10739 free_extent_buffer(eb);
10740 if (!found_parent) {
10741 error("shared extent %llu referencer lost (parent: %llu)",
10743 return REFERENCER_MISSING;
10749 * This function will check a given extent item, including its backref and
10750 * itself (like crossing stripe boundary and type)
10752 * Since we don't use extent_record anymore, introduce new error bit
10754 static int check_extent_item(struct btrfs_fs_info *fs_info,
10755 struct extent_buffer *eb, int slot)
10757 struct btrfs_extent_item *ei;
10758 struct btrfs_extent_inline_ref *iref;
10759 struct btrfs_extent_data_ref *dref;
10763 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10764 u32 item_size = btrfs_item_size_nr(eb, slot);
10769 struct btrfs_key key;
10773 btrfs_item_key_to_cpu(eb, &key, slot);
10774 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10775 bytes_used += key.offset;
10777 bytes_used += nodesize;
10779 if (item_size < sizeof(*ei)) {
10781 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10782 * old thing when on disk format is still un-determined.
10783 * No need to care about it anymore
10785 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10789 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10790 flags = btrfs_extent_flags(eb, ei);
10792 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10794 if (metadata && check_crossing_stripes(global_info, key.objectid,
10796 error("bad metadata [%llu, %llu) crossing stripe boundary",
10797 key.objectid, key.objectid + nodesize);
10798 err |= CROSSING_STRIPE_BOUNDARY;
10801 ptr = (unsigned long)(ei + 1);
10803 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10804 /* Old EXTENT_ITEM metadata */
10805 struct btrfs_tree_block_info *info;
10807 info = (struct btrfs_tree_block_info *)ptr;
10808 level = btrfs_tree_block_level(eb, info);
10809 ptr += sizeof(struct btrfs_tree_block_info);
10811 /* New METADATA_ITEM */
10812 level = key.offset;
10814 end = (unsigned long)ei + item_size;
10817 /* Reached extent item end normally */
10821 /* Beyond extent item end, wrong item size */
10823 err |= ITEM_SIZE_MISMATCH;
10824 error("extent item at bytenr %llu slot %d has wrong size",
10829 /* Now check every backref in this extent item */
10830 iref = (struct btrfs_extent_inline_ref *)ptr;
10831 type = btrfs_extent_inline_ref_type(eb, iref);
10832 offset = btrfs_extent_inline_ref_offset(eb, iref);
10834 case BTRFS_TREE_BLOCK_REF_KEY:
10835 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10839 case BTRFS_SHARED_BLOCK_REF_KEY:
10840 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10844 case BTRFS_EXTENT_DATA_REF_KEY:
10845 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10846 ret = check_extent_data_backref(fs_info,
10847 btrfs_extent_data_ref_root(eb, dref),
10848 btrfs_extent_data_ref_objectid(eb, dref),
10849 btrfs_extent_data_ref_offset(eb, dref),
10850 key.objectid, key.offset,
10851 btrfs_extent_data_ref_count(eb, dref));
10854 case BTRFS_SHARED_DATA_REF_KEY:
10855 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10859 error("extent[%llu %d %llu] has unknown ref type: %d",
10860 key.objectid, key.type, key.offset, type);
10861 err |= UNKNOWN_TYPE;
10865 ptr += btrfs_extent_inline_ref_size(type);
10873 * Check if a dev extent item is referred correctly by its chunk
10875 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10876 struct extent_buffer *eb, int slot)
10878 struct btrfs_root *chunk_root = fs_info->chunk_root;
10879 struct btrfs_dev_extent *ptr;
10880 struct btrfs_path path;
10881 struct btrfs_key chunk_key;
10882 struct btrfs_key devext_key;
10883 struct btrfs_chunk *chunk;
10884 struct extent_buffer *l;
10888 int found_chunk = 0;
10891 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10892 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10893 length = btrfs_dev_extent_length(eb, ptr);
10895 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10896 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10897 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10899 btrfs_init_path(&path);
10900 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10905 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10906 if (btrfs_chunk_length(l, chunk) != length)
10909 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10910 for (i = 0; i < num_stripes; i++) {
10911 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10912 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10914 if (devid == devext_key.objectid &&
10915 offset == devext_key.offset) {
10921 btrfs_release_path(&path);
10922 if (!found_chunk) {
10924 "device extent[%llu, %llu, %llu] did not find the related chunk",
10925 devext_key.objectid, devext_key.offset, length);
10926 return REFERENCER_MISSING;
10932 * Check if the used space is correct with the dev item
10934 static int check_dev_item(struct btrfs_fs_info *fs_info,
10935 struct extent_buffer *eb, int slot)
10937 struct btrfs_root *dev_root = fs_info->dev_root;
10938 struct btrfs_dev_item *dev_item;
10939 struct btrfs_path path;
10940 struct btrfs_key key;
10941 struct btrfs_dev_extent *ptr;
10947 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10948 dev_id = btrfs_device_id(eb, dev_item);
10949 used = btrfs_device_bytes_used(eb, dev_item);
10951 key.objectid = dev_id;
10952 key.type = BTRFS_DEV_EXTENT_KEY;
10955 btrfs_init_path(&path);
10956 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10958 btrfs_item_key_to_cpu(eb, &key, slot);
10959 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10960 key.objectid, key.type, key.offset);
10961 btrfs_release_path(&path);
10962 return REFERENCER_MISSING;
10965 /* Iterate dev_extents to calculate the used space of a device */
10967 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10970 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10971 if (key.objectid > dev_id)
10973 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10976 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10977 struct btrfs_dev_extent);
10978 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10980 ret = btrfs_next_item(dev_root, &path);
10984 btrfs_release_path(&path);
10986 if (used != total) {
10987 btrfs_item_key_to_cpu(eb, &key, slot);
10989 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10990 total, used, BTRFS_ROOT_TREE_OBJECTID,
10991 BTRFS_DEV_EXTENT_KEY, dev_id);
10992 return ACCOUNTING_MISMATCH;
10998 * Check a block group item with its referener (chunk) and its used space
10999 * with extent/metadata item
11001 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11002 struct extent_buffer *eb, int slot)
11004 struct btrfs_root *extent_root = fs_info->extent_root;
11005 struct btrfs_root *chunk_root = fs_info->chunk_root;
11006 struct btrfs_block_group_item *bi;
11007 struct btrfs_block_group_item bg_item;
11008 struct btrfs_path path;
11009 struct btrfs_key bg_key;
11010 struct btrfs_key chunk_key;
11011 struct btrfs_key extent_key;
11012 struct btrfs_chunk *chunk;
11013 struct extent_buffer *leaf;
11014 struct btrfs_extent_item *ei;
11015 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11023 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11024 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11025 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11026 used = btrfs_block_group_used(&bg_item);
11027 bg_flags = btrfs_block_group_flags(&bg_item);
11029 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11030 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11031 chunk_key.offset = bg_key.objectid;
11033 btrfs_init_path(&path);
11034 /* Search for the referencer chunk */
11035 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11038 "block group[%llu %llu] did not find the related chunk item",
11039 bg_key.objectid, bg_key.offset);
11040 err |= REFERENCER_MISSING;
11042 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11043 struct btrfs_chunk);
11044 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11047 "block group[%llu %llu] related chunk item length does not match",
11048 bg_key.objectid, bg_key.offset);
11049 err |= REFERENCER_MISMATCH;
11052 btrfs_release_path(&path);
11054 /* Search from the block group bytenr */
11055 extent_key.objectid = bg_key.objectid;
11056 extent_key.type = 0;
11057 extent_key.offset = 0;
11059 btrfs_init_path(&path);
11060 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11064 /* Iterate extent tree to account used space */
11066 leaf = path.nodes[0];
11068 /* Search slot can point to the last item beyond leaf nritems */
11069 if (path.slots[0] >= btrfs_header_nritems(leaf))
11072 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11073 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11076 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11077 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11079 if (extent_key.objectid < bg_key.objectid)
11082 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11085 total += extent_key.offset;
11087 ei = btrfs_item_ptr(leaf, path.slots[0],
11088 struct btrfs_extent_item);
11089 flags = btrfs_extent_flags(leaf, ei);
11090 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11091 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11093 "bad extent[%llu, %llu) type mismatch with chunk",
11094 extent_key.objectid,
11095 extent_key.objectid + extent_key.offset);
11096 err |= CHUNK_TYPE_MISMATCH;
11098 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11099 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11100 BTRFS_BLOCK_GROUP_METADATA))) {
11102 "bad extent[%llu, %llu) type mismatch with chunk",
11103 extent_key.objectid,
11104 extent_key.objectid + nodesize);
11105 err |= CHUNK_TYPE_MISMATCH;
11109 ret = btrfs_next_item(extent_root, &path);
11115 btrfs_release_path(&path);
11117 if (total != used) {
11119 "block group[%llu %llu] used %llu but extent items used %llu",
11120 bg_key.objectid, bg_key.offset, used, total);
11121 err |= ACCOUNTING_MISMATCH;
11127 * Check a chunk item.
11128 * Including checking all referred dev_extents and block group
11130 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11131 struct extent_buffer *eb, int slot)
11133 struct btrfs_root *extent_root = fs_info->extent_root;
11134 struct btrfs_root *dev_root = fs_info->dev_root;
11135 struct btrfs_path path;
11136 struct btrfs_key chunk_key;
11137 struct btrfs_key bg_key;
11138 struct btrfs_key devext_key;
11139 struct btrfs_chunk *chunk;
11140 struct extent_buffer *leaf;
11141 struct btrfs_block_group_item *bi;
11142 struct btrfs_block_group_item bg_item;
11143 struct btrfs_dev_extent *ptr;
11144 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11156 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11157 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11158 length = btrfs_chunk_length(eb, chunk);
11159 chunk_end = chunk_key.offset + length;
11160 if (!IS_ALIGNED(length, sectorsize)) {
11161 error("chunk[%llu %llu) not aligned to %u",
11162 chunk_key.offset, chunk_end, sectorsize);
11163 err |= BYTES_UNALIGNED;
11167 type = btrfs_chunk_type(eb, chunk);
11168 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11169 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11170 error("chunk[%llu %llu) has no chunk type",
11171 chunk_key.offset, chunk_end);
11172 err |= UNKNOWN_TYPE;
11174 if (profile && (profile & (profile - 1))) {
11175 error("chunk[%llu %llu) multiple profiles detected: %llx",
11176 chunk_key.offset, chunk_end, profile);
11177 err |= UNKNOWN_TYPE;
11180 bg_key.objectid = chunk_key.offset;
11181 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11182 bg_key.offset = length;
11184 btrfs_init_path(&path);
11185 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11188 "chunk[%llu %llu) did not find the related block group item",
11189 chunk_key.offset, chunk_end);
11190 err |= REFERENCER_MISSING;
11192 leaf = path.nodes[0];
11193 bi = btrfs_item_ptr(leaf, path.slots[0],
11194 struct btrfs_block_group_item);
11195 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11197 if (btrfs_block_group_flags(&bg_item) != type) {
11199 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11200 chunk_key.offset, chunk_end, type,
11201 btrfs_block_group_flags(&bg_item));
11202 err |= REFERENCER_MISSING;
11206 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11207 for (i = 0; i < num_stripes; i++) {
11208 btrfs_release_path(&path);
11209 btrfs_init_path(&path);
11210 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11211 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11212 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11214 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11217 goto not_match_dev;
11219 leaf = path.nodes[0];
11220 ptr = btrfs_item_ptr(leaf, path.slots[0],
11221 struct btrfs_dev_extent);
11222 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11223 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11224 if (objectid != chunk_key.objectid ||
11225 offset != chunk_key.offset ||
11226 btrfs_dev_extent_length(leaf, ptr) != length)
11227 goto not_match_dev;
11230 err |= BACKREF_MISSING;
11232 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11233 chunk_key.objectid, chunk_end, i);
11236 btrfs_release_path(&path);
11242 * Main entry function to check known items and update related accounting info
11244 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11246 struct btrfs_fs_info *fs_info = root->fs_info;
11247 struct btrfs_key key;
11250 struct btrfs_extent_data_ref *dref;
11255 btrfs_item_key_to_cpu(eb, &key, slot);
11259 case BTRFS_EXTENT_DATA_KEY:
11260 ret = check_extent_data_item(root, eb, slot);
11263 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11264 ret = check_block_group_item(fs_info, eb, slot);
11267 case BTRFS_DEV_ITEM_KEY:
11268 ret = check_dev_item(fs_info, eb, slot);
11271 case BTRFS_CHUNK_ITEM_KEY:
11272 ret = check_chunk_item(fs_info, eb, slot);
11275 case BTRFS_DEV_EXTENT_KEY:
11276 ret = check_dev_extent_item(fs_info, eb, slot);
11279 case BTRFS_EXTENT_ITEM_KEY:
11280 case BTRFS_METADATA_ITEM_KEY:
11281 ret = check_extent_item(fs_info, eb, slot);
11284 case BTRFS_EXTENT_CSUM_KEY:
11285 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11287 case BTRFS_TREE_BLOCK_REF_KEY:
11288 ret = check_tree_block_backref(fs_info, key.offset,
11292 case BTRFS_EXTENT_DATA_REF_KEY:
11293 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11294 ret = check_extent_data_backref(fs_info,
11295 btrfs_extent_data_ref_root(eb, dref),
11296 btrfs_extent_data_ref_objectid(eb, dref),
11297 btrfs_extent_data_ref_offset(eb, dref),
11299 btrfs_extent_data_ref_count(eb, dref));
11302 case BTRFS_SHARED_BLOCK_REF_KEY:
11303 ret = check_shared_block_backref(fs_info, key.offset,
11307 case BTRFS_SHARED_DATA_REF_KEY:
11308 ret = check_shared_data_backref(fs_info, key.offset,
11316 if (++slot < btrfs_header_nritems(eb))
11323 * Helper function for later fs/subvol tree check. To determine if a tree
11324 * block should be checked.
11325 * This function will ensure only the direct referencer with lowest rootid to
11326 * check a fs/subvolume tree block.
11328 * Backref check at extent tree would detect errors like missing subvolume
11329 * tree, so we can do aggressive check to reduce duplicated checks.
11331 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11333 struct btrfs_root *extent_root = root->fs_info->extent_root;
11334 struct btrfs_key key;
11335 struct btrfs_path path;
11336 struct extent_buffer *leaf;
11338 struct btrfs_extent_item *ei;
11344 struct btrfs_extent_inline_ref *iref;
11347 btrfs_init_path(&path);
11348 key.objectid = btrfs_header_bytenr(eb);
11349 key.type = BTRFS_METADATA_ITEM_KEY;
11350 key.offset = (u64)-1;
11353 * Any failure in backref resolving means we can't determine
11354 * whom the tree block belongs to.
11355 * So in that case, we need to check that tree block
11357 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11361 ret = btrfs_previous_extent_item(extent_root, &path,
11362 btrfs_header_bytenr(eb));
11366 leaf = path.nodes[0];
11367 slot = path.slots[0];
11368 btrfs_item_key_to_cpu(leaf, &key, slot);
11369 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11371 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11372 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11374 struct btrfs_tree_block_info *info;
11376 info = (struct btrfs_tree_block_info *)(ei + 1);
11377 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11380 item_size = btrfs_item_size_nr(leaf, slot);
11381 ptr = (unsigned long)iref;
11382 end = (unsigned long)ei + item_size;
11383 while (ptr < end) {
11384 iref = (struct btrfs_extent_inline_ref *)ptr;
11385 type = btrfs_extent_inline_ref_type(leaf, iref);
11386 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11389 * We only check the tree block if current root is
11390 * the lowest referencer of it.
11392 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11393 offset < root->objectid) {
11394 btrfs_release_path(&path);
11398 ptr += btrfs_extent_inline_ref_size(type);
11401 * Normally we should also check keyed tree block ref, but that may be
11402 * very time consuming. Inlined ref should already make us skip a lot
11403 * of refs now. So skip search keyed tree block ref.
11407 btrfs_release_path(&path);
11412 * Traversal function for tree block. We will do:
11413 * 1) Skip shared fs/subvolume tree blocks
11414 * 2) Update related bytes accounting
11415 * 3) Pre-order traversal
11417 static int traverse_tree_block(struct btrfs_root *root,
11418 struct extent_buffer *node)
11420 struct extent_buffer *eb;
11421 struct btrfs_key key;
11422 struct btrfs_key drop_key;
11430 * Skip shared fs/subvolume tree block, in that case they will
11431 * be checked by referencer with lowest rootid
11433 if (is_fstree(root->objectid) && !should_check(root, node))
11436 /* Update bytes accounting */
11437 total_btree_bytes += node->len;
11438 if (fs_root_objectid(btrfs_header_owner(node)))
11439 total_fs_tree_bytes += node->len;
11440 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11441 total_extent_tree_bytes += node->len;
11442 if (!found_old_backref &&
11443 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11444 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11445 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11446 found_old_backref = 1;
11448 /* pre-order tranversal, check itself first */
11449 level = btrfs_header_level(node);
11450 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11451 btrfs_header_level(node),
11452 btrfs_header_owner(node));
11456 "check %s failed root %llu bytenr %llu level %d, force continue check",
11457 level ? "node":"leaf", root->objectid,
11458 btrfs_header_bytenr(node), btrfs_header_level(node));
11461 btree_space_waste += btrfs_leaf_free_space(root, node);
11462 ret = check_leaf_items(root, node);
11467 nr = btrfs_header_nritems(node);
11468 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11469 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11470 sizeof(struct btrfs_key_ptr);
11472 /* Then check all its children */
11473 for (i = 0; i < nr; i++) {
11474 u64 blocknr = btrfs_node_blockptr(node, i);
11476 btrfs_node_key_to_cpu(node, &key, i);
11477 if (level == root->root_item.drop_level &&
11478 is_dropped_key(&key, &drop_key))
11482 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11483 * to call the function itself.
11485 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11486 if (extent_buffer_uptodate(eb)) {
11487 ret = traverse_tree_block(root, eb);
11490 free_extent_buffer(eb);
11497 * Low memory usage version check_chunks_and_extents.
11499 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11501 struct btrfs_path path;
11502 struct btrfs_key key;
11503 struct btrfs_root *root1;
11504 struct btrfs_root *cur_root;
11508 root1 = root->fs_info->chunk_root;
11509 ret = traverse_tree_block(root1, root1->node);
11512 root1 = root->fs_info->tree_root;
11513 ret = traverse_tree_block(root1, root1->node);
11516 btrfs_init_path(&path);
11517 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11519 key.type = BTRFS_ROOT_ITEM_KEY;
11521 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11523 error("cannot find extent treet in tree_root");
11528 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11529 if (key.type != BTRFS_ROOT_ITEM_KEY)
11531 key.offset = (u64)-1;
11533 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11534 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11537 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11538 if (IS_ERR(cur_root) || !cur_root) {
11539 error("failed to read tree: %lld", key.objectid);
11543 ret = traverse_tree_block(cur_root, cur_root->node);
11546 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11547 btrfs_free_fs_root(cur_root);
11549 ret = btrfs_next_item(root1, &path);
11555 btrfs_release_path(&path);
11559 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11560 struct btrfs_root *root, int overwrite)
11562 struct extent_buffer *c;
11563 struct extent_buffer *old = root->node;
11566 struct btrfs_disk_key disk_key = {0,0,0};
11572 extent_buffer_get(c);
11575 c = btrfs_alloc_free_block(trans, root,
11577 root->root_key.objectid,
11578 &disk_key, level, 0, 0);
11581 extent_buffer_get(c);
11585 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11586 btrfs_set_header_level(c, level);
11587 btrfs_set_header_bytenr(c, c->start);
11588 btrfs_set_header_generation(c, trans->transid);
11589 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11590 btrfs_set_header_owner(c, root->root_key.objectid);
11592 write_extent_buffer(c, root->fs_info->fsid,
11593 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11595 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11596 btrfs_header_chunk_tree_uuid(c),
11599 btrfs_mark_buffer_dirty(c);
11601 * this case can happen in the following case:
11603 * 1.overwrite previous root.
11605 * 2.reinit reloc data root, this is because we skip pin
11606 * down reloc data tree before which means we can allocate
11607 * same block bytenr here.
11609 if (old->start == c->start) {
11610 btrfs_set_root_generation(&root->root_item,
11612 root->root_item.level = btrfs_header_level(root->node);
11613 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11614 &root->root_key, &root->root_item);
11616 free_extent_buffer(c);
11620 free_extent_buffer(old);
11622 add_root_to_dirty_list(root);
11626 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11627 struct extent_buffer *eb, int tree_root)
11629 struct extent_buffer *tmp;
11630 struct btrfs_root_item *ri;
11631 struct btrfs_key key;
11634 int level = btrfs_header_level(eb);
11640 * If we have pinned this block before, don't pin it again.
11641 * This can not only avoid forever loop with broken filesystem
11642 * but also give us some speedups.
11644 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11645 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11648 btrfs_pin_extent(fs_info, eb->start, eb->len);
11650 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11651 nritems = btrfs_header_nritems(eb);
11652 for (i = 0; i < nritems; i++) {
11654 btrfs_item_key_to_cpu(eb, &key, i);
11655 if (key.type != BTRFS_ROOT_ITEM_KEY)
11657 /* Skip the extent root and reloc roots */
11658 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11659 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11660 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11662 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11663 bytenr = btrfs_disk_root_bytenr(eb, ri);
11666 * If at any point we start needing the real root we
11667 * will have to build a stump root for the root we are
11668 * in, but for now this doesn't actually use the root so
11669 * just pass in extent_root.
11671 tmp = read_tree_block(fs_info->extent_root, bytenr,
11673 if (!extent_buffer_uptodate(tmp)) {
11674 fprintf(stderr, "Error reading root block\n");
11677 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11678 free_extent_buffer(tmp);
11682 bytenr = btrfs_node_blockptr(eb, i);
11684 /* If we aren't the tree root don't read the block */
11685 if (level == 1 && !tree_root) {
11686 btrfs_pin_extent(fs_info, bytenr, nodesize);
11690 tmp = read_tree_block(fs_info->extent_root, bytenr,
11692 if (!extent_buffer_uptodate(tmp)) {
11693 fprintf(stderr, "Error reading tree block\n");
11696 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11697 free_extent_buffer(tmp);
11706 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11710 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11714 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11717 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11719 struct btrfs_block_group_cache *cache;
11720 struct btrfs_path path;
11721 struct extent_buffer *leaf;
11722 struct btrfs_chunk *chunk;
11723 struct btrfs_key key;
11727 btrfs_init_path(&path);
11729 key.type = BTRFS_CHUNK_ITEM_KEY;
11731 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11733 btrfs_release_path(&path);
11738 * We do this in case the block groups were screwed up and had alloc
11739 * bits that aren't actually set on the chunks. This happens with
11740 * restored images every time and could happen in real life I guess.
11742 fs_info->avail_data_alloc_bits = 0;
11743 fs_info->avail_metadata_alloc_bits = 0;
11744 fs_info->avail_system_alloc_bits = 0;
11746 /* First we need to create the in-memory block groups */
11748 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11749 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11751 btrfs_release_path(&path);
11759 leaf = path.nodes[0];
11760 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11761 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11766 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11767 btrfs_add_block_group(fs_info, 0,
11768 btrfs_chunk_type(leaf, chunk),
11769 key.objectid, key.offset,
11770 btrfs_chunk_length(leaf, chunk));
11771 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11772 key.offset + btrfs_chunk_length(leaf, chunk));
11777 cache = btrfs_lookup_first_block_group(fs_info, start);
11781 start = cache->key.objectid + cache->key.offset;
11784 btrfs_release_path(&path);
11788 static int reset_balance(struct btrfs_trans_handle *trans,
11789 struct btrfs_fs_info *fs_info)
11791 struct btrfs_root *root = fs_info->tree_root;
11792 struct btrfs_path path;
11793 struct extent_buffer *leaf;
11794 struct btrfs_key key;
11795 int del_slot, del_nr = 0;
11799 btrfs_init_path(&path);
11800 key.objectid = BTRFS_BALANCE_OBJECTID;
11801 key.type = BTRFS_BALANCE_ITEM_KEY;
11803 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11808 goto reinit_data_reloc;
11813 ret = btrfs_del_item(trans, root, &path);
11816 btrfs_release_path(&path);
11818 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11819 key.type = BTRFS_ROOT_ITEM_KEY;
11821 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11825 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11830 ret = btrfs_del_items(trans, root, &path,
11837 btrfs_release_path(&path);
11840 ret = btrfs_search_slot(trans, root, &key, &path,
11847 leaf = path.nodes[0];
11848 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11849 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11851 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11856 del_slot = path.slots[0];
11865 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11869 btrfs_release_path(&path);
11872 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11873 key.type = BTRFS_ROOT_ITEM_KEY;
11874 key.offset = (u64)-1;
11875 root = btrfs_read_fs_root(fs_info, &key);
11876 if (IS_ERR(root)) {
11877 fprintf(stderr, "Error reading data reloc tree\n");
11878 ret = PTR_ERR(root);
11881 record_root_in_trans(trans, root);
11882 ret = btrfs_fsck_reinit_root(trans, root, 0);
11885 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11887 btrfs_release_path(&path);
11891 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11892 struct btrfs_fs_info *fs_info)
11898 * The only reason we don't do this is because right now we're just
11899 * walking the trees we find and pinning down their bytes, we don't look
11900 * at any of the leaves. In order to do mixed groups we'd have to check
11901 * the leaves of any fs roots and pin down the bytes for any file
11902 * extents we find. Not hard but why do it if we don't have to?
11904 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11905 fprintf(stderr, "We don't support re-initing the extent tree "
11906 "for mixed block groups yet, please notify a btrfs "
11907 "developer you want to do this so they can add this "
11908 "functionality.\n");
11913 * first we need to walk all of the trees except the extent tree and pin
11914 * down the bytes that are in use so we don't overwrite any existing
11917 ret = pin_metadata_blocks(fs_info);
11919 fprintf(stderr, "error pinning down used bytes\n");
11924 * Need to drop all the block groups since we're going to recreate all
11927 btrfs_free_block_groups(fs_info);
11928 ret = reset_block_groups(fs_info);
11930 fprintf(stderr, "error resetting the block groups\n");
11934 /* Ok we can allocate now, reinit the extent root */
11935 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11937 fprintf(stderr, "extent root initialization failed\n");
11939 * When the transaction code is updated we should end the
11940 * transaction, but for now progs only knows about commit so
11941 * just return an error.
11947 * Now we have all the in-memory block groups setup so we can make
11948 * allocations properly, and the metadata we care about is safe since we
11949 * pinned all of it above.
11952 struct btrfs_block_group_cache *cache;
11954 cache = btrfs_lookup_first_block_group(fs_info, start);
11957 start = cache->key.objectid + cache->key.offset;
11958 ret = btrfs_insert_item(trans, fs_info->extent_root,
11959 &cache->key, &cache->item,
11960 sizeof(cache->item));
11962 fprintf(stderr, "Error adding block group\n");
11965 btrfs_extent_post_op(trans, fs_info->extent_root);
11968 ret = reset_balance(trans, fs_info);
11970 fprintf(stderr, "error resetting the pending balance\n");
11975 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11977 struct btrfs_path path;
11978 struct btrfs_trans_handle *trans;
11979 struct btrfs_key key;
11982 printf("Recowing metadata block %llu\n", eb->start);
11983 key.objectid = btrfs_header_owner(eb);
11984 key.type = BTRFS_ROOT_ITEM_KEY;
11985 key.offset = (u64)-1;
11987 root = btrfs_read_fs_root(root->fs_info, &key);
11988 if (IS_ERR(root)) {
11989 fprintf(stderr, "Couldn't find owner root %llu\n",
11991 return PTR_ERR(root);
11994 trans = btrfs_start_transaction(root, 1);
11996 return PTR_ERR(trans);
11998 btrfs_init_path(&path);
11999 path.lowest_level = btrfs_header_level(eb);
12000 if (path.lowest_level)
12001 btrfs_node_key_to_cpu(eb, &key, 0);
12003 btrfs_item_key_to_cpu(eb, &key, 0);
12005 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12006 btrfs_commit_transaction(trans, root);
12007 btrfs_release_path(&path);
12011 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12013 struct btrfs_path path;
12014 struct btrfs_trans_handle *trans;
12015 struct btrfs_key key;
12018 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12019 bad->key.type, bad->key.offset);
12020 key.objectid = bad->root_id;
12021 key.type = BTRFS_ROOT_ITEM_KEY;
12022 key.offset = (u64)-1;
12024 root = btrfs_read_fs_root(root->fs_info, &key);
12025 if (IS_ERR(root)) {
12026 fprintf(stderr, "Couldn't find owner root %llu\n",
12028 return PTR_ERR(root);
12031 trans = btrfs_start_transaction(root, 1);
12033 return PTR_ERR(trans);
12035 btrfs_init_path(&path);
12036 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12042 ret = btrfs_del_item(trans, root, &path);
12044 btrfs_commit_transaction(trans, root);
12045 btrfs_release_path(&path);
12049 static int zero_log_tree(struct btrfs_root *root)
12051 struct btrfs_trans_handle *trans;
12054 trans = btrfs_start_transaction(root, 1);
12055 if (IS_ERR(trans)) {
12056 ret = PTR_ERR(trans);
12059 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12060 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12061 ret = btrfs_commit_transaction(trans, root);
12065 static int populate_csum(struct btrfs_trans_handle *trans,
12066 struct btrfs_root *csum_root, char *buf, u64 start,
12073 while (offset < len) {
12074 sectorsize = csum_root->sectorsize;
12075 ret = read_extent_data(csum_root, buf, start + offset,
12079 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12080 start + offset, buf, sectorsize);
12083 offset += sectorsize;
12088 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12089 struct btrfs_root *csum_root,
12090 struct btrfs_root *cur_root)
12092 struct btrfs_path path;
12093 struct btrfs_key key;
12094 struct extent_buffer *node;
12095 struct btrfs_file_extent_item *fi;
12102 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
12106 btrfs_init_path(&path);
12110 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12113 /* Iterate all regular file extents and fill its csum */
12115 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12117 if (key.type != BTRFS_EXTENT_DATA_KEY)
12119 node = path.nodes[0];
12120 slot = path.slots[0];
12121 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12122 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12124 start = btrfs_file_extent_disk_bytenr(node, fi);
12125 len = btrfs_file_extent_disk_num_bytes(node, fi);
12127 ret = populate_csum(trans, csum_root, buf, start, len);
12128 if (ret == -EEXIST)
12134 * TODO: if next leaf is corrupted, jump to nearest next valid
12137 ret = btrfs_next_item(cur_root, &path);
12147 btrfs_release_path(&path);
12152 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12153 struct btrfs_root *csum_root)
12155 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12156 struct btrfs_path path;
12157 struct btrfs_root *tree_root = fs_info->tree_root;
12158 struct btrfs_root *cur_root;
12159 struct extent_buffer *node;
12160 struct btrfs_key key;
12164 btrfs_init_path(&path);
12165 key.objectid = BTRFS_FS_TREE_OBJECTID;
12167 key.type = BTRFS_ROOT_ITEM_KEY;
12168 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12177 node = path.nodes[0];
12178 slot = path.slots[0];
12179 btrfs_item_key_to_cpu(node, &key, slot);
12180 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12182 if (key.type != BTRFS_ROOT_ITEM_KEY)
12184 if (!is_fstree(key.objectid))
12186 key.offset = (u64)-1;
12188 cur_root = btrfs_read_fs_root(fs_info, &key);
12189 if (IS_ERR(cur_root) || !cur_root) {
12190 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12194 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12199 ret = btrfs_next_item(tree_root, &path);
12209 btrfs_release_path(&path);
12213 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12214 struct btrfs_root *csum_root)
12216 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12217 struct btrfs_path path;
12218 struct btrfs_extent_item *ei;
12219 struct extent_buffer *leaf;
12221 struct btrfs_key key;
12224 btrfs_init_path(&path);
12226 key.type = BTRFS_EXTENT_ITEM_KEY;
12228 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12230 btrfs_release_path(&path);
12234 buf = malloc(csum_root->sectorsize);
12236 btrfs_release_path(&path);
12241 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12242 ret = btrfs_next_leaf(extent_root, &path);
12250 leaf = path.nodes[0];
12252 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12253 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12258 ei = btrfs_item_ptr(leaf, path.slots[0],
12259 struct btrfs_extent_item);
12260 if (!(btrfs_extent_flags(leaf, ei) &
12261 BTRFS_EXTENT_FLAG_DATA)) {
12266 ret = populate_csum(trans, csum_root, buf, key.objectid,
12273 btrfs_release_path(&path);
12279 * Recalculate the csum and put it into the csum tree.
12281 * Extent tree init will wipe out all the extent info, so in that case, we
12282 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12283 * will use fs/subvol trees to init the csum tree.
12285 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12286 struct btrfs_root *csum_root,
12287 int search_fs_tree)
12289 if (search_fs_tree)
12290 return fill_csum_tree_from_fs(trans, csum_root);
12292 return fill_csum_tree_from_extent(trans, csum_root);
12295 static void free_roots_info_cache(void)
12297 if (!roots_info_cache)
12300 while (!cache_tree_empty(roots_info_cache)) {
12301 struct cache_extent *entry;
12302 struct root_item_info *rii;
12304 entry = first_cache_extent(roots_info_cache);
12307 remove_cache_extent(roots_info_cache, entry);
12308 rii = container_of(entry, struct root_item_info, cache_extent);
12312 free(roots_info_cache);
12313 roots_info_cache = NULL;
12316 static int build_roots_info_cache(struct btrfs_fs_info *info)
12319 struct btrfs_key key;
12320 struct extent_buffer *leaf;
12321 struct btrfs_path path;
12323 if (!roots_info_cache) {
12324 roots_info_cache = malloc(sizeof(*roots_info_cache));
12325 if (!roots_info_cache)
12327 cache_tree_init(roots_info_cache);
12330 btrfs_init_path(&path);
12332 key.type = BTRFS_EXTENT_ITEM_KEY;
12334 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12337 leaf = path.nodes[0];
12340 struct btrfs_key found_key;
12341 struct btrfs_extent_item *ei;
12342 struct btrfs_extent_inline_ref *iref;
12343 int slot = path.slots[0];
12348 struct cache_extent *entry;
12349 struct root_item_info *rii;
12351 if (slot >= btrfs_header_nritems(leaf)) {
12352 ret = btrfs_next_leaf(info->extent_root, &path);
12359 leaf = path.nodes[0];
12360 slot = path.slots[0];
12363 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12365 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12366 found_key.type != BTRFS_METADATA_ITEM_KEY)
12369 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12370 flags = btrfs_extent_flags(leaf, ei);
12372 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12373 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12376 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12377 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12378 level = found_key.offset;
12380 struct btrfs_tree_block_info *binfo;
12382 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12383 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12384 level = btrfs_tree_block_level(leaf, binfo);
12388 * For a root extent, it must be of the following type and the
12389 * first (and only one) iref in the item.
12391 type = btrfs_extent_inline_ref_type(leaf, iref);
12392 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12395 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12396 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12398 rii = malloc(sizeof(struct root_item_info));
12403 rii->cache_extent.start = root_id;
12404 rii->cache_extent.size = 1;
12405 rii->level = (u8)-1;
12406 entry = &rii->cache_extent;
12407 ret = insert_cache_extent(roots_info_cache, entry);
12410 rii = container_of(entry, struct root_item_info,
12414 ASSERT(rii->cache_extent.start == root_id);
12415 ASSERT(rii->cache_extent.size == 1);
12417 if (level > rii->level || rii->level == (u8)-1) {
12418 rii->level = level;
12419 rii->bytenr = found_key.objectid;
12420 rii->gen = btrfs_extent_generation(leaf, ei);
12421 rii->node_count = 1;
12422 } else if (level == rii->level) {
12430 btrfs_release_path(&path);
12435 static int maybe_repair_root_item(struct btrfs_path *path,
12436 const struct btrfs_key *root_key,
12437 const int read_only_mode)
12439 const u64 root_id = root_key->objectid;
12440 struct cache_extent *entry;
12441 struct root_item_info *rii;
12442 struct btrfs_root_item ri;
12443 unsigned long offset;
12445 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12448 "Error: could not find extent items for root %llu\n",
12449 root_key->objectid);
12453 rii = container_of(entry, struct root_item_info, cache_extent);
12454 ASSERT(rii->cache_extent.start == root_id);
12455 ASSERT(rii->cache_extent.size == 1);
12457 if (rii->node_count != 1) {
12459 "Error: could not find btree root extent for root %llu\n",
12464 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12465 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12467 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12468 btrfs_root_level(&ri) != rii->level ||
12469 btrfs_root_generation(&ri) != rii->gen) {
12472 * If we're in repair mode but our caller told us to not update
12473 * the root item, i.e. just check if it needs to be updated, don't
12474 * print this message, since the caller will call us again shortly
12475 * for the same root item without read only mode (the caller will
12476 * open a transaction first).
12478 if (!(read_only_mode && repair))
12480 "%sroot item for root %llu,"
12481 " current bytenr %llu, current gen %llu, current level %u,"
12482 " new bytenr %llu, new gen %llu, new level %u\n",
12483 (read_only_mode ? "" : "fixing "),
12485 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12486 btrfs_root_level(&ri),
12487 rii->bytenr, rii->gen, rii->level);
12489 if (btrfs_root_generation(&ri) > rii->gen) {
12491 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12492 root_id, btrfs_root_generation(&ri), rii->gen);
12496 if (!read_only_mode) {
12497 btrfs_set_root_bytenr(&ri, rii->bytenr);
12498 btrfs_set_root_level(&ri, rii->level);
12499 btrfs_set_root_generation(&ri, rii->gen);
12500 write_extent_buffer(path->nodes[0], &ri,
12501 offset, sizeof(ri));
12511 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12512 * caused read-only snapshots to be corrupted if they were created at a moment
12513 * when the source subvolume/snapshot had orphan items. The issue was that the
12514 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12515 * node instead of the post orphan cleanup root node.
12516 * So this function, and its callees, just detects and fixes those cases. Even
12517 * though the regression was for read-only snapshots, this function applies to
12518 * any snapshot/subvolume root.
12519 * This must be run before any other repair code - not doing it so, makes other
12520 * repair code delete or modify backrefs in the extent tree for example, which
12521 * will result in an inconsistent fs after repairing the root items.
12523 static int repair_root_items(struct btrfs_fs_info *info)
12525 struct btrfs_path path;
12526 struct btrfs_key key;
12527 struct extent_buffer *leaf;
12528 struct btrfs_trans_handle *trans = NULL;
12531 int need_trans = 0;
12533 btrfs_init_path(&path);
12535 ret = build_roots_info_cache(info);
12539 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12540 key.type = BTRFS_ROOT_ITEM_KEY;
12545 * Avoid opening and committing transactions if a leaf doesn't have
12546 * any root items that need to be fixed, so that we avoid rotating
12547 * backup roots unnecessarily.
12550 trans = btrfs_start_transaction(info->tree_root, 1);
12551 if (IS_ERR(trans)) {
12552 ret = PTR_ERR(trans);
12557 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12561 leaf = path.nodes[0];
12564 struct btrfs_key found_key;
12566 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12567 int no_more_keys = find_next_key(&path, &key);
12569 btrfs_release_path(&path);
12571 ret = btrfs_commit_transaction(trans,
12583 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12585 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12587 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12590 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12594 if (!trans && repair) {
12597 btrfs_release_path(&path);
12607 free_roots_info_cache();
12608 btrfs_release_path(&path);
12610 btrfs_commit_transaction(trans, info->tree_root);
12617 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12619 struct btrfs_trans_handle *trans;
12620 struct btrfs_block_group_cache *bg_cache;
12624 /* Clear all free space cache inodes and its extent data */
12626 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12629 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12632 current = bg_cache->key.objectid + bg_cache->key.offset;
12635 /* Don't forget to set cache_generation to -1 */
12636 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12637 if (IS_ERR(trans)) {
12638 error("failed to update super block cache generation");
12639 return PTR_ERR(trans);
12641 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12642 btrfs_commit_transaction(trans, fs_info->tree_root);
12647 const char * const cmd_check_usage[] = {
12648 "btrfs check [options] <device>",
12649 "Check structural integrity of a filesystem (unmounted).",
12650 "Check structural integrity of an unmounted filesystem. Verify internal",
12651 "trees' consistency and item connectivity. In the repair mode try to",
12652 "fix the problems found. ",
12653 "WARNING: the repair mode is considered dangerous",
12655 "-s|--super <superblock> use this superblock copy",
12656 "-b|--backup use the first valid backup root copy",
12657 "--repair try to repair the filesystem",
12658 "--readonly run in read-only mode (default)",
12659 "--init-csum-tree create a new CRC tree",
12660 "--init-extent-tree create a new extent tree",
12661 "--mode <MODE> allows choice of memory/IO trade-offs",
12662 " where MODE is one of:",
12663 " original - read inodes and extents to memory (requires",
12664 " more memory, does less IO)",
12665 " lowmem - try to use less memory but read blocks again",
12667 "--check-data-csum verify checksums of data blocks",
12668 "-Q|--qgroup-report print a report on qgroup consistency",
12669 "-E|--subvol-extents <subvolid>",
12670 " print subvolume extents and sharing state",
12671 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12672 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12673 "-p|--progress indicate progress",
12674 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12678 int cmd_check(int argc, char **argv)
12680 struct cache_tree root_cache;
12681 struct btrfs_root *root;
12682 struct btrfs_fs_info *info;
12685 u64 tree_root_bytenr = 0;
12686 u64 chunk_root_bytenr = 0;
12687 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12691 int init_csum_tree = 0;
12693 int clear_space_cache = 0;
12694 int qgroup_report = 0;
12695 int qgroups_repaired = 0;
12696 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12700 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12701 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12702 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12703 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12704 static const struct option long_options[] = {
12705 { "super", required_argument, NULL, 's' },
12706 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12707 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12708 { "init-csum-tree", no_argument, NULL,
12709 GETOPT_VAL_INIT_CSUM },
12710 { "init-extent-tree", no_argument, NULL,
12711 GETOPT_VAL_INIT_EXTENT },
12712 { "check-data-csum", no_argument, NULL,
12713 GETOPT_VAL_CHECK_CSUM },
12714 { "backup", no_argument, NULL, 'b' },
12715 { "subvol-extents", required_argument, NULL, 'E' },
12716 { "qgroup-report", no_argument, NULL, 'Q' },
12717 { "tree-root", required_argument, NULL, 'r' },
12718 { "chunk-root", required_argument, NULL,
12719 GETOPT_VAL_CHUNK_TREE },
12720 { "progress", no_argument, NULL, 'p' },
12721 { "mode", required_argument, NULL,
12723 { "clear-space-cache", required_argument, NULL,
12724 GETOPT_VAL_CLEAR_SPACE_CACHE},
12725 { NULL, 0, NULL, 0}
12728 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12732 case 'a': /* ignored */ break;
12734 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12737 num = arg_strtou64(optarg);
12738 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12740 "super mirror should be less than %d",
12741 BTRFS_SUPER_MIRROR_MAX);
12744 bytenr = btrfs_sb_offset(((int)num));
12745 printf("using SB copy %llu, bytenr %llu\n", num,
12746 (unsigned long long)bytenr);
12752 subvolid = arg_strtou64(optarg);
12755 tree_root_bytenr = arg_strtou64(optarg);
12757 case GETOPT_VAL_CHUNK_TREE:
12758 chunk_root_bytenr = arg_strtou64(optarg);
12761 ctx.progress_enabled = true;
12765 usage(cmd_check_usage);
12766 case GETOPT_VAL_REPAIR:
12767 printf("enabling repair mode\n");
12769 ctree_flags |= OPEN_CTREE_WRITES;
12771 case GETOPT_VAL_READONLY:
12774 case GETOPT_VAL_INIT_CSUM:
12775 printf("Creating a new CRC tree\n");
12776 init_csum_tree = 1;
12778 ctree_flags |= OPEN_CTREE_WRITES;
12780 case GETOPT_VAL_INIT_EXTENT:
12781 init_extent_tree = 1;
12782 ctree_flags |= (OPEN_CTREE_WRITES |
12783 OPEN_CTREE_NO_BLOCK_GROUPS);
12786 case GETOPT_VAL_CHECK_CSUM:
12787 check_data_csum = 1;
12789 case GETOPT_VAL_MODE:
12790 check_mode = parse_check_mode(optarg);
12791 if (check_mode == CHECK_MODE_UNKNOWN) {
12792 error("unknown mode: %s", optarg);
12796 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12797 if (strcmp(optarg, "v1") == 0) {
12798 clear_space_cache = 1;
12799 } else if (strcmp(optarg, "v2") == 0) {
12800 clear_space_cache = 2;
12801 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12804 "invalid argument to --clear-space-cache, must be v1 or v2");
12807 ctree_flags |= OPEN_CTREE_WRITES;
12812 if (check_argc_exact(argc - optind, 1))
12813 usage(cmd_check_usage);
12815 if (ctx.progress_enabled) {
12816 ctx.tp = TASK_NOTHING;
12817 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12820 /* This check is the only reason for --readonly to exist */
12821 if (readonly && repair) {
12822 error("repair options are not compatible with --readonly");
12827 * Not supported yet
12829 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12830 error("low memory mode doesn't support repair yet");
12835 cache_tree_init(&root_cache);
12837 if((ret = check_mounted(argv[optind])) < 0) {
12838 error("could not check mount status: %s", strerror(-ret));
12842 error("%s is currently mounted, aborting", argv[optind]);
12848 /* only allow partial opening under repair mode */
12850 ctree_flags |= OPEN_CTREE_PARTIAL;
12852 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12853 chunk_root_bytenr, ctree_flags);
12855 error("cannot open file system");
12861 global_info = info;
12862 root = info->fs_root;
12863 if (clear_space_cache == 1) {
12864 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12866 "free space cache v2 detected, use --clear-space-cache v2");
12870 printf("Clearing free space cache\n");
12871 ret = clear_free_space_cache(info);
12873 error("failed to clear free space cache");
12876 printf("Free space cache cleared\n");
12879 } else if (clear_space_cache == 2) {
12880 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12881 printf("no free space cache v2 to clear\n");
12885 printf("Clear free space cache v2\n");
12886 ret = btrfs_clear_free_space_tree(info);
12888 error("failed to clear free space cache v2: %d", ret);
12891 printf("free space cache v2 cleared\n");
12897 * repair mode will force us to commit transaction which
12898 * will make us fail to load log tree when mounting.
12900 if (repair && btrfs_super_log_root(info->super_copy)) {
12901 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12907 ret = zero_log_tree(root);
12910 error("failed to zero log tree: %d", ret);
12915 uuid_unparse(info->super_copy->fsid, uuidbuf);
12916 if (qgroup_report) {
12917 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12919 ret = qgroup_verify_all(info);
12926 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12927 subvolid, argv[optind], uuidbuf);
12928 ret = print_extent_state(info, subvolid);
12932 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12934 if (!extent_buffer_uptodate(info->tree_root->node) ||
12935 !extent_buffer_uptodate(info->dev_root->node) ||
12936 !extent_buffer_uptodate(info->chunk_root->node)) {
12937 error("critical roots corrupted, unable to check the filesystem");
12943 if (init_extent_tree || init_csum_tree) {
12944 struct btrfs_trans_handle *trans;
12946 trans = btrfs_start_transaction(info->extent_root, 0);
12947 if (IS_ERR(trans)) {
12948 error("error starting transaction");
12949 ret = PTR_ERR(trans);
12954 if (init_extent_tree) {
12955 printf("Creating a new extent tree\n");
12956 ret = reinit_extent_tree(trans, info);
12962 if (init_csum_tree) {
12963 printf("Reinitialize checksum tree\n");
12964 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12966 error("checksum tree initialization failed: %d",
12973 ret = fill_csum_tree(trans, info->csum_root,
12977 error("checksum tree refilling failed: %d", ret);
12982 * Ok now we commit and run the normal fsck, which will add
12983 * extent entries for all of the items it finds.
12985 ret = btrfs_commit_transaction(trans, info->extent_root);
12990 if (!extent_buffer_uptodate(info->extent_root->node)) {
12991 error("critical: extent_root, unable to check the filesystem");
12996 if (!extent_buffer_uptodate(info->csum_root->node)) {
12997 error("critical: csum_root, unable to check the filesystem");
13003 if (!ctx.progress_enabled)
13004 fprintf(stderr, "checking extents\n");
13005 if (check_mode == CHECK_MODE_LOWMEM)
13006 ret = check_chunks_and_extents_v2(root);
13008 ret = check_chunks_and_extents(root);
13012 "errors found in extent allocation tree or chunk allocation");
13014 ret = repair_root_items(info);
13017 error("failed to repair root items: %s", strerror(-ret));
13021 fprintf(stderr, "Fixed %d roots.\n", ret);
13023 } else if (ret > 0) {
13025 "Found %d roots with an outdated root item.\n",
13028 "Please run a filesystem check with the option --repair to fix them.\n");
13034 if (!ctx.progress_enabled) {
13035 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13036 fprintf(stderr, "checking free space tree\n");
13038 fprintf(stderr, "checking free space cache\n");
13040 ret = check_space_cache(root);
13043 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13044 error("errors found in free space tree");
13046 error("errors found in free space cache");
13051 * We used to have to have these hole extents in between our real
13052 * extents so if we don't have this flag set we need to make sure there
13053 * are no gaps in the file extents for inodes, otherwise we can just
13054 * ignore it when this happens.
13056 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13057 if (!ctx.progress_enabled)
13058 fprintf(stderr, "checking fs roots\n");
13059 if (check_mode == CHECK_MODE_LOWMEM)
13060 ret = check_fs_roots_v2(root->fs_info);
13062 ret = check_fs_roots(root, &root_cache);
13065 error("errors found in fs roots");
13069 fprintf(stderr, "checking csums\n");
13070 ret = check_csums(root);
13073 error("errors found in csum tree");
13077 fprintf(stderr, "checking root refs\n");
13078 /* For low memory mode, check_fs_roots_v2 handles root refs */
13079 if (check_mode != CHECK_MODE_LOWMEM) {
13080 ret = check_root_refs(root, &root_cache);
13083 error("errors found in root refs");
13088 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13089 struct extent_buffer *eb;
13091 eb = list_first_entry(&root->fs_info->recow_ebs,
13092 struct extent_buffer, recow);
13093 list_del_init(&eb->recow);
13094 ret = recow_extent_buffer(root, eb);
13097 error("fails to fix transid errors");
13102 while (!list_empty(&delete_items)) {
13103 struct bad_item *bad;
13105 bad = list_first_entry(&delete_items, struct bad_item, list);
13106 list_del_init(&bad->list);
13108 ret = delete_bad_item(root, bad);
13114 if (info->quota_enabled) {
13115 fprintf(stderr, "checking quota groups\n");
13116 ret = qgroup_verify_all(info);
13119 error("failed to check quota groups");
13123 ret = repair_qgroups(info, &qgroups_repaired);
13126 error("failed to repair quota groups");
13132 if (!list_empty(&root->fs_info->recow_ebs)) {
13133 error("transid errors in file system");
13138 if (found_old_backref) { /*
13139 * there was a disk format change when mixed
13140 * backref was in testing tree. The old format
13141 * existed about one week.
13143 printf("\n * Found old mixed backref format. "
13144 "The old format is not supported! *"
13145 "\n * Please mount the FS in readonly mode, "
13146 "backup data and re-format the FS. *\n\n");
13149 printf("found %llu bytes used, ",
13150 (unsigned long long)bytes_used);
13152 printf("error(s) found\n");
13154 printf("no error found\n");
13155 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13156 printf("total tree bytes: %llu\n",
13157 (unsigned long long)total_btree_bytes);
13158 printf("total fs tree bytes: %llu\n",
13159 (unsigned long long)total_fs_tree_bytes);
13160 printf("total extent tree bytes: %llu\n",
13161 (unsigned long long)total_extent_tree_bytes);
13162 printf("btree space waste bytes: %llu\n",
13163 (unsigned long long)btree_space_waste);
13164 printf("file data blocks allocated: %llu\n referenced %llu\n",
13165 (unsigned long long)data_bytes_allocated,
13166 (unsigned long long)data_bytes_referenced);
13168 free_qgroup_counts();
13169 free_root_recs_tree(&root_cache);
13173 if (ctx.progress_enabled)
13174 task_deinit(ctx.info);