2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static int low_memory = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 struct extent_backref {
80 unsigned int is_data:1;
81 unsigned int found_extent_tree:1;
82 unsigned int full_backref:1;
83 unsigned int found_ref:1;
84 unsigned int broken:1;
87 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
89 return rb_entry(node, struct extent_backref, node);
93 struct extent_backref node;
107 static inline struct data_backref* to_data_backref(struct extent_backref *back)
109 return container_of(back, struct data_backref, node);
112 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
114 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
115 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
116 struct data_backref *back1 = to_data_backref(ext1);
117 struct data_backref *back2 = to_data_backref(ext2);
119 WARN_ON(!ext1->is_data);
120 WARN_ON(!ext2->is_data);
122 /* parent and root are a union, so this covers both */
123 if (back1->parent > back2->parent)
125 if (back1->parent < back2->parent)
128 /* This is a full backref and the parents match. */
129 if (back1->node.full_backref)
132 if (back1->owner > back2->owner)
134 if (back1->owner < back2->owner)
137 if (back1->offset > back2->offset)
139 if (back1->offset < back2->offset)
142 if (back1->bytes > back2->bytes)
144 if (back1->bytes < back2->bytes)
147 if (back1->found_ref && back2->found_ref) {
148 if (back1->disk_bytenr > back2->disk_bytenr)
150 if (back1->disk_bytenr < back2->disk_bytenr)
153 if (back1->found_ref > back2->found_ref)
155 if (back1->found_ref < back2->found_ref)
163 * Much like data_backref, just removed the undetermined members
164 * and change it to use list_head.
165 * During extent scan, it is stored in root->orphan_data_extent.
166 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
168 struct orphan_data_extent {
169 struct list_head list;
177 struct tree_backref {
178 struct extent_backref node;
185 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
187 return container_of(back, struct tree_backref, node);
190 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
192 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
193 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
194 struct tree_backref *back1 = to_tree_backref(ext1);
195 struct tree_backref *back2 = to_tree_backref(ext2);
197 WARN_ON(ext1->is_data);
198 WARN_ON(ext2->is_data);
200 /* parent and root are a union, so this covers both */
201 if (back1->parent > back2->parent)
203 if (back1->parent < back2->parent)
209 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
211 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
212 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
214 if (ext1->is_data > ext2->is_data)
217 if (ext1->is_data < ext2->is_data)
220 if (ext1->full_backref > ext2->full_backref)
222 if (ext1->full_backref < ext2->full_backref)
226 return compare_data_backref(node1, node2);
228 return compare_tree_backref(node1, node2);
231 /* Explicit initialization for extent_record::flag_block_full_backref */
232 enum { FLAG_UNSET = 2 };
234 struct extent_record {
235 struct list_head backrefs;
236 struct list_head dups;
237 struct rb_root backref_tree;
238 struct list_head list;
239 struct cache_extent cache;
240 struct btrfs_disk_key parent_key;
245 u64 extent_item_refs;
247 u64 parent_generation;
251 unsigned int flag_block_full_backref:2;
252 unsigned int found_rec:1;
253 unsigned int content_checked:1;
254 unsigned int owner_ref_checked:1;
255 unsigned int is_root:1;
256 unsigned int metadata:1;
257 unsigned int bad_full_backref:1;
258 unsigned int crossing_stripes:1;
259 unsigned int wrong_chunk_type:1;
262 static inline struct extent_record* to_extent_record(struct list_head *entry)
264 return container_of(entry, struct extent_record, list);
267 struct inode_backref {
268 struct list_head list;
269 unsigned int found_dir_item:1;
270 unsigned int found_dir_index:1;
271 unsigned int found_inode_ref:1;
272 unsigned int filetype:8;
274 unsigned int ref_type;
281 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
283 return list_entry(entry, struct inode_backref, list);
286 struct root_item_record {
287 struct list_head list;
294 struct btrfs_key drop_key;
297 #define REF_ERR_NO_DIR_ITEM (1 << 0)
298 #define REF_ERR_NO_DIR_INDEX (1 << 1)
299 #define REF_ERR_NO_INODE_REF (1 << 2)
300 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
301 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
302 #define REF_ERR_DUP_INODE_REF (1 << 5)
303 #define REF_ERR_INDEX_UNMATCH (1 << 6)
304 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
305 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
306 #define REF_ERR_NO_ROOT_REF (1 << 9)
307 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
308 #define REF_ERR_DUP_ROOT_REF (1 << 11)
309 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
311 struct file_extent_hole {
317 struct inode_record {
318 struct list_head backrefs;
319 unsigned int checked:1;
320 unsigned int merging:1;
321 unsigned int found_inode_item:1;
322 unsigned int found_dir_item:1;
323 unsigned int found_file_extent:1;
324 unsigned int found_csum_item:1;
325 unsigned int some_csum_missing:1;
326 unsigned int nodatasum:1;
339 struct rb_root holes;
340 struct list_head orphan_extents;
345 #define I_ERR_NO_INODE_ITEM (1 << 0)
346 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
347 #define I_ERR_DUP_INODE_ITEM (1 << 2)
348 #define I_ERR_DUP_DIR_INDEX (1 << 3)
349 #define I_ERR_ODD_DIR_ITEM (1 << 4)
350 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
351 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
352 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
353 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
354 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
355 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
356 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
357 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
358 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
359 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
361 struct root_backref {
362 struct list_head list;
363 unsigned int found_dir_item:1;
364 unsigned int found_dir_index:1;
365 unsigned int found_back_ref:1;
366 unsigned int found_forward_ref:1;
367 unsigned int reachable:1;
376 static inline struct root_backref* to_root_backref(struct list_head *entry)
378 return list_entry(entry, struct root_backref, list);
382 struct list_head backrefs;
383 struct cache_extent cache;
384 unsigned int found_root_item:1;
390 struct cache_extent cache;
395 struct cache_extent cache;
396 struct cache_tree root_cache;
397 struct cache_tree inode_cache;
398 struct inode_record *current;
407 struct walk_control {
408 struct cache_tree shared;
409 struct shared_node *nodes[BTRFS_MAX_LEVEL];
415 struct btrfs_key key;
417 struct list_head list;
420 struct extent_entry {
425 struct list_head list;
428 struct root_item_info {
429 /* level of the root */
431 /* number of nodes at this level, must be 1 for a root */
435 struct cache_extent cache_extent;
439 * Error bit for low memory mode check.
441 * Currently no caller cares about it yet. Just internal use for error
444 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
445 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
446 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
447 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
448 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
449 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
450 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
451 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
452 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
453 #define CHUNK_TYPE_MISMATCH (1 << 8)
455 static void *print_status_check(void *p)
457 struct task_ctx *priv = p;
458 const char work_indicator[] = { '.', 'o', 'O', 'o' };
460 static char *task_position_string[] = {
462 "checking free space cache",
466 task_period_start(priv->info, 1000 /* 1s */);
468 if (priv->tp == TASK_NOTHING)
472 printf("%s [%c]\r", task_position_string[priv->tp],
473 work_indicator[count % 4]);
476 task_period_wait(priv->info);
481 static int print_status_return(void *p)
489 /* Compatible function to allow reuse of old codes */
490 static u64 first_extent_gap(struct rb_root *holes)
492 struct file_extent_hole *hole;
494 if (RB_EMPTY_ROOT(holes))
497 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
501 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
503 struct file_extent_hole *hole1;
504 struct file_extent_hole *hole2;
506 hole1 = rb_entry(node1, struct file_extent_hole, node);
507 hole2 = rb_entry(node2, struct file_extent_hole, node);
509 if (hole1->start > hole2->start)
511 if (hole1->start < hole2->start)
513 /* Now hole1->start == hole2->start */
514 if (hole1->len >= hole2->len)
516 * Hole 1 will be merge center
517 * Same hole will be merged later
520 /* Hole 2 will be merge center */
525 * Add a hole to the record
527 * This will do hole merge for copy_file_extent_holes(),
528 * which will ensure there won't be continuous holes.
530 static int add_file_extent_hole(struct rb_root *holes,
533 struct file_extent_hole *hole;
534 struct file_extent_hole *prev = NULL;
535 struct file_extent_hole *next = NULL;
537 hole = malloc(sizeof(*hole));
542 /* Since compare will not return 0, no -EEXIST will happen */
543 rb_insert(holes, &hole->node, compare_hole);
545 /* simple merge with previous hole */
546 if (rb_prev(&hole->node))
547 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
549 if (prev && prev->start + prev->len >= hole->start) {
550 hole->len = hole->start + hole->len - prev->start;
551 hole->start = prev->start;
552 rb_erase(&prev->node, holes);
557 /* iterate merge with next holes */
559 if (!rb_next(&hole->node))
561 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
563 if (hole->start + hole->len >= next->start) {
564 if (hole->start + hole->len <= next->start + next->len)
565 hole->len = next->start + next->len -
567 rb_erase(&next->node, holes);
576 static int compare_hole_range(struct rb_node *node, void *data)
578 struct file_extent_hole *hole;
581 hole = (struct file_extent_hole *)data;
584 hole = rb_entry(node, struct file_extent_hole, node);
585 if (start < hole->start)
587 if (start >= hole->start && start < hole->start + hole->len)
593 * Delete a hole in the record
595 * This will do the hole split and is much restrict than add.
597 static int del_file_extent_hole(struct rb_root *holes,
600 struct file_extent_hole *hole;
601 struct file_extent_hole tmp;
606 struct rb_node *node;
613 node = rb_search(holes, &tmp, compare_hole_range, NULL);
616 hole = rb_entry(node, struct file_extent_hole, node);
617 if (start + len > hole->start + hole->len)
621 * Now there will be no overlap, delete the hole and re-add the
622 * split(s) if they exists.
624 if (start > hole->start) {
625 prev_start = hole->start;
626 prev_len = start - hole->start;
629 if (hole->start + hole->len > start + len) {
630 next_start = start + len;
631 next_len = hole->start + hole->len - start - len;
634 rb_erase(node, holes);
637 ret = add_file_extent_hole(holes, prev_start, prev_len);
642 ret = add_file_extent_hole(holes, next_start, next_len);
649 static int copy_file_extent_holes(struct rb_root *dst,
652 struct file_extent_hole *hole;
653 struct rb_node *node;
656 node = rb_first(src);
658 hole = rb_entry(node, struct file_extent_hole, node);
659 ret = add_file_extent_hole(dst, hole->start, hole->len);
662 node = rb_next(node);
667 static void free_file_extent_holes(struct rb_root *holes)
669 struct rb_node *node;
670 struct file_extent_hole *hole;
672 node = rb_first(holes);
674 hole = rb_entry(node, struct file_extent_hole, node);
675 rb_erase(node, holes);
677 node = rb_first(holes);
681 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
683 static void record_root_in_trans(struct btrfs_trans_handle *trans,
684 struct btrfs_root *root)
686 if (root->last_trans != trans->transid) {
687 root->track_dirty = 1;
688 root->last_trans = trans->transid;
689 root->commit_root = root->node;
690 extent_buffer_get(root->node);
694 static u8 imode_to_type(u32 imode)
697 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
698 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
699 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
700 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
701 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
702 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
703 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
704 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
707 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
711 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
713 struct device_record *rec1;
714 struct device_record *rec2;
716 rec1 = rb_entry(node1, struct device_record, node);
717 rec2 = rb_entry(node2, struct device_record, node);
718 if (rec1->devid > rec2->devid)
720 else if (rec1->devid < rec2->devid)
726 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
728 struct inode_record *rec;
729 struct inode_backref *backref;
730 struct inode_backref *orig;
731 struct inode_backref *tmp;
732 struct orphan_data_extent *src_orphan;
733 struct orphan_data_extent *dst_orphan;
737 rec = malloc(sizeof(*rec));
739 return ERR_PTR(-ENOMEM);
740 memcpy(rec, orig_rec, sizeof(*rec));
742 INIT_LIST_HEAD(&rec->backrefs);
743 INIT_LIST_HEAD(&rec->orphan_extents);
744 rec->holes = RB_ROOT;
746 list_for_each_entry(orig, &orig_rec->backrefs, list) {
747 size = sizeof(*orig) + orig->namelen + 1;
748 backref = malloc(size);
753 memcpy(backref, orig, size);
754 list_add_tail(&backref->list, &rec->backrefs);
756 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
757 dst_orphan = malloc(sizeof(*dst_orphan));
762 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
763 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
765 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
771 if (!list_empty(&rec->backrefs))
772 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
773 list_del(&orig->list);
777 if (!list_empty(&rec->orphan_extents))
778 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
779 list_del(&orig->list);
788 static void print_orphan_data_extents(struct list_head *orphan_extents,
791 struct orphan_data_extent *orphan;
793 if (list_empty(orphan_extents))
795 printf("The following data extent is lost in tree %llu:\n",
797 list_for_each_entry(orphan, orphan_extents, list) {
798 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
799 orphan->objectid, orphan->offset, orphan->disk_bytenr,
804 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
806 u64 root_objectid = root->root_key.objectid;
807 int errors = rec->errors;
811 /* reloc root errors, we print its corresponding fs root objectid*/
812 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
813 root_objectid = root->root_key.offset;
814 fprintf(stderr, "reloc");
816 fprintf(stderr, "root %llu inode %llu errors %x",
817 (unsigned long long) root_objectid,
818 (unsigned long long) rec->ino, rec->errors);
820 if (errors & I_ERR_NO_INODE_ITEM)
821 fprintf(stderr, ", no inode item");
822 if (errors & I_ERR_NO_ORPHAN_ITEM)
823 fprintf(stderr, ", no orphan item");
824 if (errors & I_ERR_DUP_INODE_ITEM)
825 fprintf(stderr, ", dup inode item");
826 if (errors & I_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & I_ERR_ODD_DIR_ITEM)
829 fprintf(stderr, ", odd dir item");
830 if (errors & I_ERR_ODD_FILE_EXTENT)
831 fprintf(stderr, ", odd file extent");
832 if (errors & I_ERR_BAD_FILE_EXTENT)
833 fprintf(stderr, ", bad file extent");
834 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
835 fprintf(stderr, ", file extent overlap");
836 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
837 fprintf(stderr, ", file extent discount");
838 if (errors & I_ERR_DIR_ISIZE_WRONG)
839 fprintf(stderr, ", dir isize wrong");
840 if (errors & I_ERR_FILE_NBYTES_WRONG)
841 fprintf(stderr, ", nbytes wrong");
842 if (errors & I_ERR_ODD_CSUM_ITEM)
843 fprintf(stderr, ", odd csum item");
844 if (errors & I_ERR_SOME_CSUM_MISSING)
845 fprintf(stderr, ", some csum missing");
846 if (errors & I_ERR_LINK_COUNT_WRONG)
847 fprintf(stderr, ", link count wrong");
848 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
849 fprintf(stderr, ", orphan file extent");
850 fprintf(stderr, "\n");
851 /* Print the orphan extents if needed */
852 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
853 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
855 /* Print the holes if needed */
856 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
857 struct file_extent_hole *hole;
858 struct rb_node *node;
861 node = rb_first(&rec->holes);
862 fprintf(stderr, "Found file extent holes:\n");
865 hole = rb_entry(node, struct file_extent_hole, node);
866 fprintf(stderr, "\tstart: %llu, len: %llu\n",
867 hole->start, hole->len);
868 node = rb_next(node);
871 fprintf(stderr, "\tstart: 0, len: %llu\n",
872 round_up(rec->isize, root->sectorsize));
876 static void print_ref_error(int errors)
878 if (errors & REF_ERR_NO_DIR_ITEM)
879 fprintf(stderr, ", no dir item");
880 if (errors & REF_ERR_NO_DIR_INDEX)
881 fprintf(stderr, ", no dir index");
882 if (errors & REF_ERR_NO_INODE_REF)
883 fprintf(stderr, ", no inode ref");
884 if (errors & REF_ERR_DUP_DIR_ITEM)
885 fprintf(stderr, ", dup dir item");
886 if (errors & REF_ERR_DUP_DIR_INDEX)
887 fprintf(stderr, ", dup dir index");
888 if (errors & REF_ERR_DUP_INODE_REF)
889 fprintf(stderr, ", dup inode ref");
890 if (errors & REF_ERR_INDEX_UNMATCH)
891 fprintf(stderr, ", index mismatch");
892 if (errors & REF_ERR_FILETYPE_UNMATCH)
893 fprintf(stderr, ", filetype mismatch");
894 if (errors & REF_ERR_NAME_TOO_LONG)
895 fprintf(stderr, ", name too long");
896 if (errors & REF_ERR_NO_ROOT_REF)
897 fprintf(stderr, ", no root ref");
898 if (errors & REF_ERR_NO_ROOT_BACKREF)
899 fprintf(stderr, ", no root backref");
900 if (errors & REF_ERR_DUP_ROOT_REF)
901 fprintf(stderr, ", dup root ref");
902 if (errors & REF_ERR_DUP_ROOT_BACKREF)
903 fprintf(stderr, ", dup root backref");
904 fprintf(stderr, "\n");
907 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
910 struct ptr_node *node;
911 struct cache_extent *cache;
912 struct inode_record *rec = NULL;
915 cache = lookup_cache_extent(inode_cache, ino, 1);
917 node = container_of(cache, struct ptr_node, cache);
919 if (mod && rec->refs > 1) {
920 node->data = clone_inode_rec(rec);
921 if (IS_ERR(node->data))
927 rec = calloc(1, sizeof(*rec));
929 return ERR_PTR(-ENOMEM);
931 rec->extent_start = (u64)-1;
933 INIT_LIST_HEAD(&rec->backrefs);
934 INIT_LIST_HEAD(&rec->orphan_extents);
935 rec->holes = RB_ROOT;
937 node = malloc(sizeof(*node));
940 return ERR_PTR(-ENOMEM);
942 node->cache.start = ino;
943 node->cache.size = 1;
946 if (ino == BTRFS_FREE_INO_OBJECTID)
949 ret = insert_cache_extent(inode_cache, &node->cache);
951 return ERR_PTR(-EEXIST);
956 static void free_orphan_data_extents(struct list_head *orphan_extents)
958 struct orphan_data_extent *orphan;
960 while (!list_empty(orphan_extents)) {
961 orphan = list_entry(orphan_extents->next,
962 struct orphan_data_extent, list);
963 list_del(&orphan->list);
968 static void free_inode_rec(struct inode_record *rec)
970 struct inode_backref *backref;
975 while (!list_empty(&rec->backrefs)) {
976 backref = to_inode_backref(rec->backrefs.next);
977 list_del(&backref->list);
980 free_orphan_data_extents(&rec->orphan_extents);
981 free_file_extent_holes(&rec->holes);
985 static int can_free_inode_rec(struct inode_record *rec)
987 if (!rec->errors && rec->checked && rec->found_inode_item &&
988 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
993 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
994 struct inode_record *rec)
996 struct cache_extent *cache;
997 struct inode_backref *tmp, *backref;
998 struct ptr_node *node;
999 unsigned char filetype;
1001 if (!rec->found_inode_item)
1004 filetype = imode_to_type(rec->imode);
1005 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1006 if (backref->found_dir_item && backref->found_dir_index) {
1007 if (backref->filetype != filetype)
1008 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1009 if (!backref->errors && backref->found_inode_ref &&
1010 rec->nlink == rec->found_link) {
1011 list_del(&backref->list);
1017 if (!rec->checked || rec->merging)
1020 if (S_ISDIR(rec->imode)) {
1021 if (rec->found_size != rec->isize)
1022 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1023 if (rec->found_file_extent)
1024 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1025 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1026 if (rec->found_dir_item)
1027 rec->errors |= I_ERR_ODD_DIR_ITEM;
1028 if (rec->found_size != rec->nbytes)
1029 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1030 if (rec->nlink > 0 && !no_holes &&
1031 (rec->extent_end < rec->isize ||
1032 first_extent_gap(&rec->holes) < rec->isize))
1033 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1036 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1037 if (rec->found_csum_item && rec->nodatasum)
1038 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1039 if (rec->some_csum_missing && !rec->nodatasum)
1040 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1043 BUG_ON(rec->refs != 1);
1044 if (can_free_inode_rec(rec)) {
1045 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1046 node = container_of(cache, struct ptr_node, cache);
1047 BUG_ON(node->data != rec);
1048 remove_cache_extent(inode_cache, &node->cache);
1050 free_inode_rec(rec);
1054 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1056 struct btrfs_path path;
1057 struct btrfs_key key;
1060 key.objectid = BTRFS_ORPHAN_OBJECTID;
1061 key.type = BTRFS_ORPHAN_ITEM_KEY;
1064 btrfs_init_path(&path);
1065 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1066 btrfs_release_path(&path);
1072 static int process_inode_item(struct extent_buffer *eb,
1073 int slot, struct btrfs_key *key,
1074 struct shared_node *active_node)
1076 struct inode_record *rec;
1077 struct btrfs_inode_item *item;
1079 rec = active_node->current;
1080 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1081 if (rec->found_inode_item) {
1082 rec->errors |= I_ERR_DUP_INODE_ITEM;
1085 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1086 rec->nlink = btrfs_inode_nlink(eb, item);
1087 rec->isize = btrfs_inode_size(eb, item);
1088 rec->nbytes = btrfs_inode_nbytes(eb, item);
1089 rec->imode = btrfs_inode_mode(eb, item);
1090 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1092 rec->found_inode_item = 1;
1093 if (rec->nlink == 0)
1094 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1095 maybe_free_inode_rec(&active_node->inode_cache, rec);
1099 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1101 int namelen, u64 dir)
1103 struct inode_backref *backref;
1105 list_for_each_entry(backref, &rec->backrefs, list) {
1106 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1108 if (backref->dir != dir || backref->namelen != namelen)
1110 if (memcmp(name, backref->name, namelen))
1115 backref = malloc(sizeof(*backref) + namelen + 1);
1118 memset(backref, 0, sizeof(*backref));
1120 backref->namelen = namelen;
1121 memcpy(backref->name, name, namelen);
1122 backref->name[namelen] = '\0';
1123 list_add_tail(&backref->list, &rec->backrefs);
1127 static int add_inode_backref(struct cache_tree *inode_cache,
1128 u64 ino, u64 dir, u64 index,
1129 const char *name, int namelen,
1130 int filetype, int itemtype, int errors)
1132 struct inode_record *rec;
1133 struct inode_backref *backref;
1135 rec = get_inode_rec(inode_cache, ino, 1);
1136 BUG_ON(IS_ERR(rec));
1137 backref = get_inode_backref(rec, name, namelen, dir);
1140 backref->errors |= errors;
1141 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1142 if (backref->found_dir_index)
1143 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1144 if (backref->found_inode_ref && backref->index != index)
1145 backref->errors |= REF_ERR_INDEX_UNMATCH;
1146 if (backref->found_dir_item && backref->filetype != filetype)
1147 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1149 backref->index = index;
1150 backref->filetype = filetype;
1151 backref->found_dir_index = 1;
1152 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1154 if (backref->found_dir_item)
1155 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1156 if (backref->found_dir_index && backref->filetype != filetype)
1157 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1159 backref->filetype = filetype;
1160 backref->found_dir_item = 1;
1161 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1162 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1163 if (backref->found_inode_ref)
1164 backref->errors |= REF_ERR_DUP_INODE_REF;
1165 if (backref->found_dir_index && backref->index != index)
1166 backref->errors |= REF_ERR_INDEX_UNMATCH;
1168 backref->index = index;
1170 backref->ref_type = itemtype;
1171 backref->found_inode_ref = 1;
1176 maybe_free_inode_rec(inode_cache, rec);
1180 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1181 struct cache_tree *dst_cache)
1183 struct inode_backref *backref;
1188 list_for_each_entry(backref, &src->backrefs, list) {
1189 if (backref->found_dir_index) {
1190 add_inode_backref(dst_cache, dst->ino, backref->dir,
1191 backref->index, backref->name,
1192 backref->namelen, backref->filetype,
1193 BTRFS_DIR_INDEX_KEY, backref->errors);
1195 if (backref->found_dir_item) {
1197 add_inode_backref(dst_cache, dst->ino,
1198 backref->dir, 0, backref->name,
1199 backref->namelen, backref->filetype,
1200 BTRFS_DIR_ITEM_KEY, backref->errors);
1202 if (backref->found_inode_ref) {
1203 add_inode_backref(dst_cache, dst->ino,
1204 backref->dir, backref->index,
1205 backref->name, backref->namelen, 0,
1206 backref->ref_type, backref->errors);
1210 if (src->found_dir_item)
1211 dst->found_dir_item = 1;
1212 if (src->found_file_extent)
1213 dst->found_file_extent = 1;
1214 if (src->found_csum_item)
1215 dst->found_csum_item = 1;
1216 if (src->some_csum_missing)
1217 dst->some_csum_missing = 1;
1218 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1219 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1224 BUG_ON(src->found_link < dir_count);
1225 dst->found_link += src->found_link - dir_count;
1226 dst->found_size += src->found_size;
1227 if (src->extent_start != (u64)-1) {
1228 if (dst->extent_start == (u64)-1) {
1229 dst->extent_start = src->extent_start;
1230 dst->extent_end = src->extent_end;
1232 if (dst->extent_end > src->extent_start)
1233 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1234 else if (dst->extent_end < src->extent_start) {
1235 ret = add_file_extent_hole(&dst->holes,
1237 src->extent_start - dst->extent_end);
1239 if (dst->extent_end < src->extent_end)
1240 dst->extent_end = src->extent_end;
1244 dst->errors |= src->errors;
1245 if (src->found_inode_item) {
1246 if (!dst->found_inode_item) {
1247 dst->nlink = src->nlink;
1248 dst->isize = src->isize;
1249 dst->nbytes = src->nbytes;
1250 dst->imode = src->imode;
1251 dst->nodatasum = src->nodatasum;
1252 dst->found_inode_item = 1;
1254 dst->errors |= I_ERR_DUP_INODE_ITEM;
1262 static int splice_shared_node(struct shared_node *src_node,
1263 struct shared_node *dst_node)
1265 struct cache_extent *cache;
1266 struct ptr_node *node, *ins;
1267 struct cache_tree *src, *dst;
1268 struct inode_record *rec, *conflict;
1269 u64 current_ino = 0;
1273 if (--src_node->refs == 0)
1275 if (src_node->current)
1276 current_ino = src_node->current->ino;
1278 src = &src_node->root_cache;
1279 dst = &dst_node->root_cache;
1281 cache = search_cache_extent(src, 0);
1283 node = container_of(cache, struct ptr_node, cache);
1285 cache = next_cache_extent(cache);
1288 remove_cache_extent(src, &node->cache);
1291 ins = malloc(sizeof(*ins));
1293 ins->cache.start = node->cache.start;
1294 ins->cache.size = node->cache.size;
1298 ret = insert_cache_extent(dst, &ins->cache);
1299 if (ret == -EEXIST) {
1300 conflict = get_inode_rec(dst, rec->ino, 1);
1301 BUG_ON(IS_ERR(conflict));
1302 merge_inode_recs(rec, conflict, dst);
1304 conflict->checked = 1;
1305 if (dst_node->current == conflict)
1306 dst_node->current = NULL;
1308 maybe_free_inode_rec(dst, conflict);
1309 free_inode_rec(rec);
1316 if (src == &src_node->root_cache) {
1317 src = &src_node->inode_cache;
1318 dst = &dst_node->inode_cache;
1322 if (current_ino > 0 && (!dst_node->current ||
1323 current_ino > dst_node->current->ino)) {
1324 if (dst_node->current) {
1325 dst_node->current->checked = 1;
1326 maybe_free_inode_rec(dst, dst_node->current);
1328 dst_node->current = get_inode_rec(dst, current_ino, 1);
1329 BUG_ON(IS_ERR(dst_node->current));
1334 static void free_inode_ptr(struct cache_extent *cache)
1336 struct ptr_node *node;
1337 struct inode_record *rec;
1339 node = container_of(cache, struct ptr_node, cache);
1341 free_inode_rec(rec);
1345 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1347 static struct shared_node *find_shared_node(struct cache_tree *shared,
1350 struct cache_extent *cache;
1351 struct shared_node *node;
1353 cache = lookup_cache_extent(shared, bytenr, 1);
1355 node = container_of(cache, struct shared_node, cache);
1361 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1364 struct shared_node *node;
1366 node = calloc(1, sizeof(*node));
1369 node->cache.start = bytenr;
1370 node->cache.size = 1;
1371 cache_tree_init(&node->root_cache);
1372 cache_tree_init(&node->inode_cache);
1375 ret = insert_cache_extent(shared, &node->cache);
1380 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1381 struct walk_control *wc, int level)
1383 struct shared_node *node;
1384 struct shared_node *dest;
1387 if (level == wc->active_node)
1390 BUG_ON(wc->active_node <= level);
1391 node = find_shared_node(&wc->shared, bytenr);
1393 ret = add_shared_node(&wc->shared, bytenr, refs);
1395 node = find_shared_node(&wc->shared, bytenr);
1396 wc->nodes[level] = node;
1397 wc->active_node = level;
1401 if (wc->root_level == wc->active_node &&
1402 btrfs_root_refs(&root->root_item) == 0) {
1403 if (--node->refs == 0) {
1404 free_inode_recs_tree(&node->root_cache);
1405 free_inode_recs_tree(&node->inode_cache);
1406 remove_cache_extent(&wc->shared, &node->cache);
1412 dest = wc->nodes[wc->active_node];
1413 splice_shared_node(node, dest);
1414 if (node->refs == 0) {
1415 remove_cache_extent(&wc->shared, &node->cache);
1421 static int leave_shared_node(struct btrfs_root *root,
1422 struct walk_control *wc, int level)
1424 struct shared_node *node;
1425 struct shared_node *dest;
1428 if (level == wc->root_level)
1431 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1435 BUG_ON(i >= BTRFS_MAX_LEVEL);
1437 node = wc->nodes[wc->active_node];
1438 wc->nodes[wc->active_node] = NULL;
1439 wc->active_node = i;
1441 dest = wc->nodes[wc->active_node];
1442 if (wc->active_node < wc->root_level ||
1443 btrfs_root_refs(&root->root_item) > 0) {
1444 BUG_ON(node->refs <= 1);
1445 splice_shared_node(node, dest);
1447 BUG_ON(node->refs < 2);
1456 * 1 - if the root with id child_root_id is a child of root parent_root_id
1457 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1458 * has other root(s) as parent(s)
1459 * 2 - if the root child_root_id doesn't have any parent roots
1461 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1464 struct btrfs_path path;
1465 struct btrfs_key key;
1466 struct extent_buffer *leaf;
1470 btrfs_init_path(&path);
1472 key.objectid = parent_root_id;
1473 key.type = BTRFS_ROOT_REF_KEY;
1474 key.offset = child_root_id;
1475 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1479 btrfs_release_path(&path);
1483 key.objectid = child_root_id;
1484 key.type = BTRFS_ROOT_BACKREF_KEY;
1486 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1492 leaf = path.nodes[0];
1493 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1494 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1497 leaf = path.nodes[0];
1500 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1501 if (key.objectid != child_root_id ||
1502 key.type != BTRFS_ROOT_BACKREF_KEY)
1507 if (key.offset == parent_root_id) {
1508 btrfs_release_path(&path);
1515 btrfs_release_path(&path);
1518 return has_parent ? 0 : 2;
1521 static int process_dir_item(struct btrfs_root *root,
1522 struct extent_buffer *eb,
1523 int slot, struct btrfs_key *key,
1524 struct shared_node *active_node)
1534 struct btrfs_dir_item *di;
1535 struct inode_record *rec;
1536 struct cache_tree *root_cache;
1537 struct cache_tree *inode_cache;
1538 struct btrfs_key location;
1539 char namebuf[BTRFS_NAME_LEN];
1541 root_cache = &active_node->root_cache;
1542 inode_cache = &active_node->inode_cache;
1543 rec = active_node->current;
1544 rec->found_dir_item = 1;
1546 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1547 total = btrfs_item_size_nr(eb, slot);
1548 while (cur < total) {
1550 btrfs_dir_item_key_to_cpu(eb, di, &location);
1551 name_len = btrfs_dir_name_len(eb, di);
1552 data_len = btrfs_dir_data_len(eb, di);
1553 filetype = btrfs_dir_type(eb, di);
1555 rec->found_size += name_len;
1556 if (name_len <= BTRFS_NAME_LEN) {
1560 len = BTRFS_NAME_LEN;
1561 error = REF_ERR_NAME_TOO_LONG;
1563 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1565 if (location.type == BTRFS_INODE_ITEM_KEY) {
1566 add_inode_backref(inode_cache, location.objectid,
1567 key->objectid, key->offset, namebuf,
1568 len, filetype, key->type, error);
1569 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1570 add_inode_backref(root_cache, location.objectid,
1571 key->objectid, key->offset,
1572 namebuf, len, filetype,
1575 fprintf(stderr, "invalid location in dir item %u\n",
1577 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1578 key->objectid, key->offset, namebuf,
1579 len, filetype, key->type, error);
1582 len = sizeof(*di) + name_len + data_len;
1583 di = (struct btrfs_dir_item *)((char *)di + len);
1586 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1587 rec->errors |= I_ERR_DUP_DIR_INDEX;
1592 static int process_inode_ref(struct extent_buffer *eb,
1593 int slot, struct btrfs_key *key,
1594 struct shared_node *active_node)
1602 struct cache_tree *inode_cache;
1603 struct btrfs_inode_ref *ref;
1604 char namebuf[BTRFS_NAME_LEN];
1606 inode_cache = &active_node->inode_cache;
1608 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1609 total = btrfs_item_size_nr(eb, slot);
1610 while (cur < total) {
1611 name_len = btrfs_inode_ref_name_len(eb, ref);
1612 index = btrfs_inode_ref_index(eb, ref);
1613 if (name_len <= BTRFS_NAME_LEN) {
1617 len = BTRFS_NAME_LEN;
1618 error = REF_ERR_NAME_TOO_LONG;
1620 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1621 add_inode_backref(inode_cache, key->objectid, key->offset,
1622 index, namebuf, len, 0, key->type, error);
1624 len = sizeof(*ref) + name_len;
1625 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1631 static int process_inode_extref(struct extent_buffer *eb,
1632 int slot, struct btrfs_key *key,
1633 struct shared_node *active_node)
1642 struct cache_tree *inode_cache;
1643 struct btrfs_inode_extref *extref;
1644 char namebuf[BTRFS_NAME_LEN];
1646 inode_cache = &active_node->inode_cache;
1648 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1649 total = btrfs_item_size_nr(eb, slot);
1650 while (cur < total) {
1651 name_len = btrfs_inode_extref_name_len(eb, extref);
1652 index = btrfs_inode_extref_index(eb, extref);
1653 parent = btrfs_inode_extref_parent(eb, extref);
1654 if (name_len <= BTRFS_NAME_LEN) {
1658 len = BTRFS_NAME_LEN;
1659 error = REF_ERR_NAME_TOO_LONG;
1661 read_extent_buffer(eb, namebuf,
1662 (unsigned long)(extref + 1), len);
1663 add_inode_backref(inode_cache, key->objectid, parent,
1664 index, namebuf, len, 0, key->type, error);
1666 len = sizeof(*extref) + name_len;
1667 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1674 static int count_csum_range(struct btrfs_root *root, u64 start,
1675 u64 len, u64 *found)
1677 struct btrfs_key key;
1678 struct btrfs_path path;
1679 struct extent_buffer *leaf;
1684 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1686 btrfs_init_path(&path);
1688 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1690 key.type = BTRFS_EXTENT_CSUM_KEY;
1692 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1696 if (ret > 0 && path.slots[0] > 0) {
1697 leaf = path.nodes[0];
1698 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1699 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1700 key.type == BTRFS_EXTENT_CSUM_KEY)
1705 leaf = path.nodes[0];
1706 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1707 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1712 leaf = path.nodes[0];
1715 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1716 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1717 key.type != BTRFS_EXTENT_CSUM_KEY)
1720 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1721 if (key.offset >= start + len)
1724 if (key.offset > start)
1727 size = btrfs_item_size_nr(leaf, path.slots[0]);
1728 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1729 if (csum_end > start) {
1730 size = min(csum_end - start, len);
1739 btrfs_release_path(&path);
1745 static int process_file_extent(struct btrfs_root *root,
1746 struct extent_buffer *eb,
1747 int slot, struct btrfs_key *key,
1748 struct shared_node *active_node)
1750 struct inode_record *rec;
1751 struct btrfs_file_extent_item *fi;
1753 u64 disk_bytenr = 0;
1754 u64 extent_offset = 0;
1755 u64 mask = root->sectorsize - 1;
1759 rec = active_node->current;
1760 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1761 rec->found_file_extent = 1;
1763 if (rec->extent_start == (u64)-1) {
1764 rec->extent_start = key->offset;
1765 rec->extent_end = key->offset;
1768 if (rec->extent_end > key->offset)
1769 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1770 else if (rec->extent_end < key->offset) {
1771 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1772 key->offset - rec->extent_end);
1777 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1778 extent_type = btrfs_file_extent_type(eb, fi);
1780 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1781 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1783 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784 rec->found_size += num_bytes;
1785 num_bytes = (num_bytes + mask) & ~mask;
1786 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1787 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1788 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1789 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1790 extent_offset = btrfs_file_extent_offset(eb, fi);
1791 if (num_bytes == 0 || (num_bytes & mask))
1792 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1793 if (num_bytes + extent_offset >
1794 btrfs_file_extent_ram_bytes(eb, fi))
1795 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1796 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1797 (btrfs_file_extent_compression(eb, fi) ||
1798 btrfs_file_extent_encryption(eb, fi) ||
1799 btrfs_file_extent_other_encoding(eb, fi)))
1800 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1801 if (disk_bytenr > 0)
1802 rec->found_size += num_bytes;
1804 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1806 rec->extent_end = key->offset + num_bytes;
1809 * The data reloc tree will copy full extents into its inode and then
1810 * copy the corresponding csums. Because the extent it copied could be
1811 * a preallocated extent that hasn't been written to yet there may be no
1812 * csums to copy, ergo we won't have csums for our file extent. This is
1813 * ok so just don't bother checking csums if the inode belongs to the
1816 if (disk_bytenr > 0 &&
1817 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1819 if (btrfs_file_extent_compression(eb, fi))
1820 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1822 disk_bytenr += extent_offset;
1824 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1827 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1829 rec->found_csum_item = 1;
1830 if (found < num_bytes)
1831 rec->some_csum_missing = 1;
1832 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1834 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1840 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1841 struct walk_control *wc)
1843 struct btrfs_key key;
1847 struct cache_tree *inode_cache;
1848 struct shared_node *active_node;
1850 if (wc->root_level == wc->active_node &&
1851 btrfs_root_refs(&root->root_item) == 0)
1854 active_node = wc->nodes[wc->active_node];
1855 inode_cache = &active_node->inode_cache;
1856 nritems = btrfs_header_nritems(eb);
1857 for (i = 0; i < nritems; i++) {
1858 btrfs_item_key_to_cpu(eb, &key, i);
1860 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1862 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1865 if (active_node->current == NULL ||
1866 active_node->current->ino < key.objectid) {
1867 if (active_node->current) {
1868 active_node->current->checked = 1;
1869 maybe_free_inode_rec(inode_cache,
1870 active_node->current);
1872 active_node->current = get_inode_rec(inode_cache,
1874 BUG_ON(IS_ERR(active_node->current));
1877 case BTRFS_DIR_ITEM_KEY:
1878 case BTRFS_DIR_INDEX_KEY:
1879 ret = process_dir_item(root, eb, i, &key, active_node);
1881 case BTRFS_INODE_REF_KEY:
1882 ret = process_inode_ref(eb, i, &key, active_node);
1884 case BTRFS_INODE_EXTREF_KEY:
1885 ret = process_inode_extref(eb, i, &key, active_node);
1887 case BTRFS_INODE_ITEM_KEY:
1888 ret = process_inode_item(eb, i, &key, active_node);
1890 case BTRFS_EXTENT_DATA_KEY:
1891 ret = process_file_extent(root, eb, i, &key,
1901 static void reada_walk_down(struct btrfs_root *root,
1902 struct extent_buffer *node, int slot)
1911 level = btrfs_header_level(node);
1915 nritems = btrfs_header_nritems(node);
1916 blocksize = root->nodesize;
1917 for (i = slot; i < nritems; i++) {
1918 bytenr = btrfs_node_blockptr(node, i);
1919 ptr_gen = btrfs_node_ptr_generation(node, i);
1920 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1925 * Check the child node/leaf by the following condition:
1926 * 1. the first item key of the node/leaf should be the same with the one
1928 * 2. block in parent node should match the child node/leaf.
1929 * 3. generation of parent node and child's header should be consistent.
1931 * Or the child node/leaf pointed by the key in parent is not valid.
1933 * We hope to check leaf owner too, but since subvol may share leaves,
1934 * which makes leaf owner check not so strong, key check should be
1935 * sufficient enough for that case.
1937 static int check_child_node(struct btrfs_root *root,
1938 struct extent_buffer *parent, int slot,
1939 struct extent_buffer *child)
1941 struct btrfs_key parent_key;
1942 struct btrfs_key child_key;
1945 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1946 if (btrfs_header_level(child) == 0)
1947 btrfs_item_key_to_cpu(child, &child_key, 0);
1949 btrfs_node_key_to_cpu(child, &child_key, 0);
1951 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1954 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1955 parent_key.objectid, parent_key.type, parent_key.offset,
1956 child_key.objectid, child_key.type, child_key.offset);
1958 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1960 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1961 btrfs_node_blockptr(parent, slot),
1962 btrfs_header_bytenr(child));
1964 if (btrfs_node_ptr_generation(parent, slot) !=
1965 btrfs_header_generation(child)) {
1967 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1968 btrfs_header_generation(child),
1969 btrfs_node_ptr_generation(parent, slot));
1975 u64 bytenr[BTRFS_MAX_LEVEL];
1976 u64 refs[BTRFS_MAX_LEVEL];
1979 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1980 struct walk_control *wc, int *level,
1981 struct node_refs *nrefs)
1983 enum btrfs_tree_block_status status;
1986 struct extent_buffer *next;
1987 struct extent_buffer *cur;
1992 WARN_ON(*level < 0);
1993 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1995 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1996 refs = nrefs->refs[*level];
1999 ret = btrfs_lookup_extent_info(NULL, root,
2000 path->nodes[*level]->start,
2001 *level, 1, &refs, NULL);
2006 nrefs->bytenr[*level] = path->nodes[*level]->start;
2007 nrefs->refs[*level] = refs;
2011 ret = enter_shared_node(root, path->nodes[*level]->start,
2019 while (*level >= 0) {
2020 WARN_ON(*level < 0);
2021 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2022 cur = path->nodes[*level];
2024 if (btrfs_header_level(cur) != *level)
2027 if (path->slots[*level] >= btrfs_header_nritems(cur))
2030 ret = process_one_leaf(root, cur, wc);
2035 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2036 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2037 blocksize = root->nodesize;
2039 if (bytenr == nrefs->bytenr[*level - 1]) {
2040 refs = nrefs->refs[*level - 1];
2042 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2043 *level - 1, 1, &refs, NULL);
2047 nrefs->bytenr[*level - 1] = bytenr;
2048 nrefs->refs[*level - 1] = refs;
2053 ret = enter_shared_node(root, bytenr, refs,
2056 path->slots[*level]++;
2061 next = btrfs_find_tree_block(root, bytenr, blocksize);
2062 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2063 free_extent_buffer(next);
2064 reada_walk_down(root, cur, path->slots[*level]);
2065 next = read_tree_block(root, bytenr, blocksize,
2067 if (!extent_buffer_uptodate(next)) {
2068 struct btrfs_key node_key;
2070 btrfs_node_key_to_cpu(path->nodes[*level],
2072 path->slots[*level]);
2073 btrfs_add_corrupt_extent_record(root->fs_info,
2075 path->nodes[*level]->start,
2076 root->nodesize, *level);
2082 ret = check_child_node(root, cur, path->slots[*level], next);
2088 if (btrfs_is_leaf(next))
2089 status = btrfs_check_leaf(root, NULL, next);
2091 status = btrfs_check_node(root, NULL, next);
2092 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2093 free_extent_buffer(next);
2098 *level = *level - 1;
2099 free_extent_buffer(path->nodes[*level]);
2100 path->nodes[*level] = next;
2101 path->slots[*level] = 0;
2104 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2108 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2109 struct walk_control *wc, int *level)
2112 struct extent_buffer *leaf;
2114 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2115 leaf = path->nodes[i];
2116 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2121 free_extent_buffer(path->nodes[*level]);
2122 path->nodes[*level] = NULL;
2123 BUG_ON(*level > wc->active_node);
2124 if (*level == wc->active_node)
2125 leave_shared_node(root, wc, *level);
2132 static int check_root_dir(struct inode_record *rec)
2134 struct inode_backref *backref;
2137 if (!rec->found_inode_item || rec->errors)
2139 if (rec->nlink != 1 || rec->found_link != 0)
2141 if (list_empty(&rec->backrefs))
2143 backref = to_inode_backref(rec->backrefs.next);
2144 if (!backref->found_inode_ref)
2146 if (backref->index != 0 || backref->namelen != 2 ||
2147 memcmp(backref->name, "..", 2))
2149 if (backref->found_dir_index || backref->found_dir_item)
2156 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2157 struct btrfs_root *root, struct btrfs_path *path,
2158 struct inode_record *rec)
2160 struct btrfs_inode_item *ei;
2161 struct btrfs_key key;
2164 key.objectid = rec->ino;
2165 key.type = BTRFS_INODE_ITEM_KEY;
2166 key.offset = (u64)-1;
2168 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 if (!path->slots[0]) {
2179 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2180 if (key.objectid != rec->ino) {
2185 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2186 struct btrfs_inode_item);
2187 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2188 btrfs_mark_buffer_dirty(path->nodes[0]);
2189 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2190 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2191 root->root_key.objectid);
2193 btrfs_release_path(path);
2197 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2198 struct btrfs_root *root,
2199 struct btrfs_path *path,
2200 struct inode_record *rec)
2204 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2205 btrfs_release_path(path);
2207 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2211 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2212 struct btrfs_root *root,
2213 struct btrfs_path *path,
2214 struct inode_record *rec)
2216 struct btrfs_inode_item *ei;
2217 struct btrfs_key key;
2220 key.objectid = rec->ino;
2221 key.type = BTRFS_INODE_ITEM_KEY;
2224 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2231 /* Since ret == 0, no need to check anything */
2232 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2233 struct btrfs_inode_item);
2234 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2235 btrfs_mark_buffer_dirty(path->nodes[0]);
2236 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2237 printf("reset nbytes for ino %llu root %llu\n",
2238 rec->ino, root->root_key.objectid);
2240 btrfs_release_path(path);
2244 static int add_missing_dir_index(struct btrfs_root *root,
2245 struct cache_tree *inode_cache,
2246 struct inode_record *rec,
2247 struct inode_backref *backref)
2249 struct btrfs_path *path;
2250 struct btrfs_trans_handle *trans;
2251 struct btrfs_dir_item *dir_item;
2252 struct extent_buffer *leaf;
2253 struct btrfs_key key;
2254 struct btrfs_disk_key disk_key;
2255 struct inode_record *dir_rec;
2256 unsigned long name_ptr;
2257 u32 data_size = sizeof(*dir_item) + backref->namelen;
2260 path = btrfs_alloc_path();
2264 trans = btrfs_start_transaction(root, 1);
2265 if (IS_ERR(trans)) {
2266 btrfs_free_path(path);
2267 return PTR_ERR(trans);
2270 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2271 (unsigned long long)rec->ino);
2272 key.objectid = backref->dir;
2273 key.type = BTRFS_DIR_INDEX_KEY;
2274 key.offset = backref->index;
2276 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2279 leaf = path->nodes[0];
2280 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2282 disk_key.objectid = cpu_to_le64(rec->ino);
2283 disk_key.type = BTRFS_INODE_ITEM_KEY;
2284 disk_key.offset = 0;
2286 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2287 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2288 btrfs_set_dir_data_len(leaf, dir_item, 0);
2289 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2290 name_ptr = (unsigned long)(dir_item + 1);
2291 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2292 btrfs_mark_buffer_dirty(leaf);
2293 btrfs_free_path(path);
2294 btrfs_commit_transaction(trans, root);
2296 backref->found_dir_index = 1;
2297 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2298 BUG_ON(IS_ERR(dir_rec));
2301 dir_rec->found_size += backref->namelen;
2302 if (dir_rec->found_size == dir_rec->isize &&
2303 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2304 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2305 if (dir_rec->found_size != dir_rec->isize)
2306 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2311 static int delete_dir_index(struct btrfs_root *root,
2312 struct cache_tree *inode_cache,
2313 struct inode_record *rec,
2314 struct inode_backref *backref)
2316 struct btrfs_trans_handle *trans;
2317 struct btrfs_dir_item *di;
2318 struct btrfs_path *path;
2321 path = btrfs_alloc_path();
2325 trans = btrfs_start_transaction(root, 1);
2326 if (IS_ERR(trans)) {
2327 btrfs_free_path(path);
2328 return PTR_ERR(trans);
2332 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2333 (unsigned long long)backref->dir,
2334 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2335 (unsigned long long)root->objectid);
2337 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2338 backref->name, backref->namelen,
2339 backref->index, -1);
2342 btrfs_free_path(path);
2343 btrfs_commit_transaction(trans, root);
2350 ret = btrfs_del_item(trans, root, path);
2352 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2354 btrfs_free_path(path);
2355 btrfs_commit_transaction(trans, root);
2359 static int create_inode_item(struct btrfs_root *root,
2360 struct inode_record *rec,
2361 struct inode_backref *backref, int root_dir)
2363 struct btrfs_trans_handle *trans;
2364 struct btrfs_inode_item inode_item;
2365 time_t now = time(NULL);
2368 trans = btrfs_start_transaction(root, 1);
2369 if (IS_ERR(trans)) {
2370 ret = PTR_ERR(trans);
2374 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2375 "be incomplete, please check permissions and content after "
2376 "the fsck completes.\n", (unsigned long long)root->objectid,
2377 (unsigned long long)rec->ino);
2379 memset(&inode_item, 0, sizeof(inode_item));
2380 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2382 btrfs_set_stack_inode_nlink(&inode_item, 1);
2384 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2385 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2386 if (rec->found_dir_item) {
2387 if (rec->found_file_extent)
2388 fprintf(stderr, "root %llu inode %llu has both a dir "
2389 "item and extents, unsure if it is a dir or a "
2390 "regular file so setting it as a directory\n",
2391 (unsigned long long)root->objectid,
2392 (unsigned long long)rec->ino);
2393 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2394 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2395 } else if (!rec->found_dir_item) {
2396 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2397 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2399 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2400 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2401 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2402 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2403 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2404 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2405 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2406 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2408 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2410 btrfs_commit_transaction(trans, root);
2414 static int repair_inode_backrefs(struct btrfs_root *root,
2415 struct inode_record *rec,
2416 struct cache_tree *inode_cache,
2419 struct inode_backref *tmp, *backref;
2420 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2424 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2425 if (!delete && rec->ino == root_dirid) {
2426 if (!rec->found_inode_item) {
2427 ret = create_inode_item(root, rec, backref, 1);
2434 /* Index 0 for root dir's are special, don't mess with it */
2435 if (rec->ino == root_dirid && backref->index == 0)
2439 ((backref->found_dir_index && !backref->found_inode_ref) ||
2440 (backref->found_dir_index && backref->found_inode_ref &&
2441 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2442 ret = delete_dir_index(root, inode_cache, rec, backref);
2446 list_del(&backref->list);
2450 if (!delete && !backref->found_dir_index &&
2451 backref->found_dir_item && backref->found_inode_ref) {
2452 ret = add_missing_dir_index(root, inode_cache, rec,
2457 if (backref->found_dir_item &&
2458 backref->found_dir_index &&
2459 backref->found_dir_index) {
2460 if (!backref->errors &&
2461 backref->found_inode_ref) {
2462 list_del(&backref->list);
2468 if (!delete && (!backref->found_dir_index &&
2469 !backref->found_dir_item &&
2470 backref->found_inode_ref)) {
2471 struct btrfs_trans_handle *trans;
2472 struct btrfs_key location;
2474 ret = check_dir_conflict(root, backref->name,
2480 * let nlink fixing routine to handle it,
2481 * which can do it better.
2486 location.objectid = rec->ino;
2487 location.type = BTRFS_INODE_ITEM_KEY;
2488 location.offset = 0;
2490 trans = btrfs_start_transaction(root, 1);
2491 if (IS_ERR(trans)) {
2492 ret = PTR_ERR(trans);
2495 fprintf(stderr, "adding missing dir index/item pair "
2497 (unsigned long long)rec->ino);
2498 ret = btrfs_insert_dir_item(trans, root, backref->name,
2500 backref->dir, &location,
2501 imode_to_type(rec->imode),
2504 btrfs_commit_transaction(trans, root);
2508 if (!delete && (backref->found_inode_ref &&
2509 backref->found_dir_index &&
2510 backref->found_dir_item &&
2511 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2512 !rec->found_inode_item)) {
2513 ret = create_inode_item(root, rec, backref, 0);
2520 return ret ? ret : repaired;
2524 * To determine the file type for nlink/inode_item repair
2526 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2527 * Return -ENOENT if file type is not found.
2529 static int find_file_type(struct inode_record *rec, u8 *type)
2531 struct inode_backref *backref;
2533 /* For inode item recovered case */
2534 if (rec->found_inode_item) {
2535 *type = imode_to_type(rec->imode);
2539 list_for_each_entry(backref, &rec->backrefs, list) {
2540 if (backref->found_dir_index || backref->found_dir_item) {
2541 *type = backref->filetype;
2549 * To determine the file name for nlink repair
2551 * Return 0 if file name is found, set name and namelen.
2552 * Return -ENOENT if file name is not found.
2554 static int find_file_name(struct inode_record *rec,
2555 char *name, int *namelen)
2557 struct inode_backref *backref;
2559 list_for_each_entry(backref, &rec->backrefs, list) {
2560 if (backref->found_dir_index || backref->found_dir_item ||
2561 backref->found_inode_ref) {
2562 memcpy(name, backref->name, backref->namelen);
2563 *namelen = backref->namelen;
2570 /* Reset the nlink of the inode to the correct one */
2571 static int reset_nlink(struct btrfs_trans_handle *trans,
2572 struct btrfs_root *root,
2573 struct btrfs_path *path,
2574 struct inode_record *rec)
2576 struct inode_backref *backref;
2577 struct inode_backref *tmp;
2578 struct btrfs_key key;
2579 struct btrfs_inode_item *inode_item;
2582 /* We don't believe this either, reset it and iterate backref */
2583 rec->found_link = 0;
2585 /* Remove all backref including the valid ones */
2586 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2587 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2588 backref->index, backref->name,
2589 backref->namelen, 0);
2593 /* remove invalid backref, so it won't be added back */
2594 if (!(backref->found_dir_index &&
2595 backref->found_dir_item &&
2596 backref->found_inode_ref)) {
2597 list_del(&backref->list);
2604 /* Set nlink to 0 */
2605 key.objectid = rec->ino;
2606 key.type = BTRFS_INODE_ITEM_KEY;
2608 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2615 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2616 struct btrfs_inode_item);
2617 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2618 btrfs_mark_buffer_dirty(path->nodes[0]);
2619 btrfs_release_path(path);
2622 * Add back valid inode_ref/dir_item/dir_index,
2623 * add_link() will handle the nlink inc, so new nlink must be correct
2625 list_for_each_entry(backref, &rec->backrefs, list) {
2626 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2627 backref->name, backref->namelen,
2628 backref->filetype, &backref->index, 1);
2633 btrfs_release_path(path);
2637 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2638 struct btrfs_root *root,
2639 struct btrfs_path *path,
2640 struct inode_record *rec)
2642 char *dir_name = "lost+found";
2643 char namebuf[BTRFS_NAME_LEN] = {0};
2648 int name_recovered = 0;
2649 int type_recovered = 0;
2653 * Get file name and type first before these invalid inode ref
2654 * are deleted by remove_all_invalid_backref()
2656 name_recovered = !find_file_name(rec, namebuf, &namelen);
2657 type_recovered = !find_file_type(rec, &type);
2659 if (!name_recovered) {
2660 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2661 rec->ino, rec->ino);
2662 namelen = count_digits(rec->ino);
2663 sprintf(namebuf, "%llu", rec->ino);
2666 if (!type_recovered) {
2667 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2669 type = BTRFS_FT_REG_FILE;
2673 ret = reset_nlink(trans, root, path, rec);
2676 "Failed to reset nlink for inode %llu: %s\n",
2677 rec->ino, strerror(-ret));
2681 if (rec->found_link == 0) {
2682 lost_found_ino = root->highest_inode;
2683 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2688 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2689 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2692 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2693 dir_name, strerror(-ret));
2696 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2697 namebuf, namelen, type, NULL, 1);
2699 * Add ".INO" suffix several times to handle case where
2700 * "FILENAME.INO" is already taken by another file.
2702 while (ret == -EEXIST) {
2704 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2706 if (namelen + count_digits(rec->ino) + 1 >
2711 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2713 namelen += count_digits(rec->ino) + 1;
2714 ret = btrfs_add_link(trans, root, rec->ino,
2715 lost_found_ino, namebuf,
2716 namelen, type, NULL, 1);
2720 "Failed to link the inode %llu to %s dir: %s\n",
2721 rec->ino, dir_name, strerror(-ret));
2725 * Just increase the found_link, don't actually add the
2726 * backref. This will make things easier and this inode
2727 * record will be freed after the repair is done.
2728 * So fsck will not report problem about this inode.
2731 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2732 namelen, namebuf, dir_name);
2734 printf("Fixed the nlink of inode %llu\n", rec->ino);
2737 * Clear the flag anyway, or we will loop forever for the same inode
2738 * as it will not be removed from the bad inode list and the dead loop
2741 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2742 btrfs_release_path(path);
2747 * Check if there is any normal(reg or prealloc) file extent for given
2749 * This is used to determine the file type when neither its dir_index/item or
2750 * inode_item exists.
2752 * This will *NOT* report error, if any error happens, just consider it does
2753 * not have any normal file extent.
2755 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2757 struct btrfs_path *path;
2758 struct btrfs_key key;
2759 struct btrfs_key found_key;
2760 struct btrfs_file_extent_item *fi;
2764 path = btrfs_alloc_path();
2768 key.type = BTRFS_EXTENT_DATA_KEY;
2771 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2776 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2777 ret = btrfs_next_leaf(root, path);
2784 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2786 if (found_key.objectid != ino ||
2787 found_key.type != BTRFS_EXTENT_DATA_KEY)
2789 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2790 struct btrfs_file_extent_item);
2791 type = btrfs_file_extent_type(path->nodes[0], fi);
2792 if (type != BTRFS_FILE_EXTENT_INLINE) {
2798 btrfs_free_path(path);
2802 static u32 btrfs_type_to_imode(u8 type)
2804 static u32 imode_by_btrfs_type[] = {
2805 [BTRFS_FT_REG_FILE] = S_IFREG,
2806 [BTRFS_FT_DIR] = S_IFDIR,
2807 [BTRFS_FT_CHRDEV] = S_IFCHR,
2808 [BTRFS_FT_BLKDEV] = S_IFBLK,
2809 [BTRFS_FT_FIFO] = S_IFIFO,
2810 [BTRFS_FT_SOCK] = S_IFSOCK,
2811 [BTRFS_FT_SYMLINK] = S_IFLNK,
2814 return imode_by_btrfs_type[(type)];
2817 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2818 struct btrfs_root *root,
2819 struct btrfs_path *path,
2820 struct inode_record *rec)
2824 int type_recovered = 0;
2827 printf("Trying to rebuild inode:%llu\n", rec->ino);
2829 type_recovered = !find_file_type(rec, &filetype);
2832 * Try to determine inode type if type not found.
2834 * For found regular file extent, it must be FILE.
2835 * For found dir_item/index, it must be DIR.
2837 * For undetermined one, use FILE as fallback.
2840 * 1. If found backref(inode_index/item is already handled) to it,
2842 * Need new inode-inode ref structure to allow search for that.
2844 if (!type_recovered) {
2845 if (rec->found_file_extent &&
2846 find_normal_file_extent(root, rec->ino)) {
2848 filetype = BTRFS_FT_REG_FILE;
2849 } else if (rec->found_dir_item) {
2851 filetype = BTRFS_FT_DIR;
2852 } else if (!list_empty(&rec->orphan_extents)) {
2854 filetype = BTRFS_FT_REG_FILE;
2856 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2859 filetype = BTRFS_FT_REG_FILE;
2863 ret = btrfs_new_inode(trans, root, rec->ino,
2864 mode | btrfs_type_to_imode(filetype));
2869 * Here inode rebuild is done, we only rebuild the inode item,
2870 * don't repair the nlink(like move to lost+found).
2871 * That is the job of nlink repair.
2873 * We just fill the record and return
2875 rec->found_dir_item = 1;
2876 rec->imode = mode | btrfs_type_to_imode(filetype);
2878 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2879 /* Ensure the inode_nlinks repair function will be called */
2880 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2885 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2886 struct btrfs_root *root,
2887 struct btrfs_path *path,
2888 struct inode_record *rec)
2890 struct orphan_data_extent *orphan;
2891 struct orphan_data_extent *tmp;
2894 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2896 * Check for conflicting file extents
2898 * Here we don't know whether the extents is compressed or not,
2899 * so we can only assume it not compressed nor data offset,
2900 * and use its disk_len as extent length.
2902 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2903 orphan->offset, orphan->disk_len, 0);
2904 btrfs_release_path(path);
2909 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2910 orphan->disk_bytenr, orphan->disk_len);
2911 ret = btrfs_free_extent(trans,
2912 root->fs_info->extent_root,
2913 orphan->disk_bytenr, orphan->disk_len,
2914 0, root->objectid, orphan->objectid,
2919 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2920 orphan->offset, orphan->disk_bytenr,
2921 orphan->disk_len, orphan->disk_len);
2925 /* Update file size info */
2926 rec->found_size += orphan->disk_len;
2927 if (rec->found_size == rec->nbytes)
2928 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2930 /* Update the file extent hole info too */
2931 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2935 if (RB_EMPTY_ROOT(&rec->holes))
2936 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2938 list_del(&orphan->list);
2941 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2946 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2947 struct btrfs_root *root,
2948 struct btrfs_path *path,
2949 struct inode_record *rec)
2951 struct rb_node *node;
2952 struct file_extent_hole *hole;
2956 node = rb_first(&rec->holes);
2960 hole = rb_entry(node, struct file_extent_hole, node);
2961 ret = btrfs_punch_hole(trans, root, rec->ino,
2962 hole->start, hole->len);
2965 ret = del_file_extent_hole(&rec->holes, hole->start,
2969 if (RB_EMPTY_ROOT(&rec->holes))
2970 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2971 node = rb_first(&rec->holes);
2973 /* special case for a file losing all its file extent */
2975 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2976 round_up(rec->isize, root->sectorsize));
2980 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2981 rec->ino, root->objectid);
2986 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2988 struct btrfs_trans_handle *trans;
2989 struct btrfs_path *path;
2992 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2993 I_ERR_NO_ORPHAN_ITEM |
2994 I_ERR_LINK_COUNT_WRONG |
2995 I_ERR_NO_INODE_ITEM |
2996 I_ERR_FILE_EXTENT_ORPHAN |
2997 I_ERR_FILE_EXTENT_DISCOUNT|
2998 I_ERR_FILE_NBYTES_WRONG)))
3001 path = btrfs_alloc_path();
3006 * For nlink repair, it may create a dir and add link, so
3007 * 2 for parent(256)'s dir_index and dir_item
3008 * 2 for lost+found dir's inode_item and inode_ref
3009 * 1 for the new inode_ref of the file
3010 * 2 for lost+found dir's dir_index and dir_item for the file
3012 trans = btrfs_start_transaction(root, 7);
3013 if (IS_ERR(trans)) {
3014 btrfs_free_path(path);
3015 return PTR_ERR(trans);
3018 if (rec->errors & I_ERR_NO_INODE_ITEM)
3019 ret = repair_inode_no_item(trans, root, path, rec);
3020 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3021 ret = repair_inode_orphan_extent(trans, root, path, rec);
3022 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3023 ret = repair_inode_discount_extent(trans, root, path, rec);
3024 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3025 ret = repair_inode_isize(trans, root, path, rec);
3026 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3027 ret = repair_inode_orphan_item(trans, root, path, rec);
3028 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3029 ret = repair_inode_nlinks(trans, root, path, rec);
3030 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3031 ret = repair_inode_nbytes(trans, root, path, rec);
3032 btrfs_commit_transaction(trans, root);
3033 btrfs_free_path(path);
3037 static int check_inode_recs(struct btrfs_root *root,
3038 struct cache_tree *inode_cache)
3040 struct cache_extent *cache;
3041 struct ptr_node *node;
3042 struct inode_record *rec;
3043 struct inode_backref *backref;
3048 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3050 if (btrfs_root_refs(&root->root_item) == 0) {
3051 if (!cache_tree_empty(inode_cache))
3052 fprintf(stderr, "warning line %d\n", __LINE__);
3057 * We need to record the highest inode number for later 'lost+found'
3059 * We must select an ino not used/referred by any existing inode, or
3060 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3061 * this may cause 'lost+found' dir has wrong nlinks.
3063 cache = last_cache_extent(inode_cache);
3065 node = container_of(cache, struct ptr_node, cache);
3067 if (rec->ino > root->highest_inode)
3068 root->highest_inode = rec->ino;
3072 * We need to repair backrefs first because we could change some of the
3073 * errors in the inode recs.
3075 * We also need to go through and delete invalid backrefs first and then
3076 * add the correct ones second. We do this because we may get EEXIST
3077 * when adding back the correct index because we hadn't yet deleted the
3080 * For example, if we were missing a dir index then the directories
3081 * isize would be wrong, so if we fixed the isize to what we thought it
3082 * would be and then fixed the backref we'd still have a invalid fs, so
3083 * we need to add back the dir index and then check to see if the isize
3088 if (stage == 3 && !err)
3091 cache = search_cache_extent(inode_cache, 0);
3092 while (repair && cache) {
3093 node = container_of(cache, struct ptr_node, cache);
3095 cache = next_cache_extent(cache);
3097 /* Need to free everything up and rescan */
3099 remove_cache_extent(inode_cache, &node->cache);
3101 free_inode_rec(rec);
3105 if (list_empty(&rec->backrefs))
3108 ret = repair_inode_backrefs(root, rec, inode_cache,
3122 rec = get_inode_rec(inode_cache, root_dirid, 0);
3123 BUG_ON(IS_ERR(rec));
3125 ret = check_root_dir(rec);
3127 fprintf(stderr, "root %llu root dir %llu error\n",
3128 (unsigned long long)root->root_key.objectid,
3129 (unsigned long long)root_dirid);
3130 print_inode_error(root, rec);
3135 struct btrfs_trans_handle *trans;
3137 trans = btrfs_start_transaction(root, 1);
3138 if (IS_ERR(trans)) {
3139 err = PTR_ERR(trans);
3144 "root %llu missing its root dir, recreating\n",
3145 (unsigned long long)root->objectid);
3147 ret = btrfs_make_root_dir(trans, root, root_dirid);
3150 btrfs_commit_transaction(trans, root);
3154 fprintf(stderr, "root %llu root dir %llu not found\n",
3155 (unsigned long long)root->root_key.objectid,
3156 (unsigned long long)root_dirid);
3160 cache = search_cache_extent(inode_cache, 0);
3163 node = container_of(cache, struct ptr_node, cache);
3165 remove_cache_extent(inode_cache, &node->cache);
3167 if (rec->ino == root_dirid ||
3168 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3169 free_inode_rec(rec);
3173 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3174 ret = check_orphan_item(root, rec->ino);
3176 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3177 if (can_free_inode_rec(rec)) {
3178 free_inode_rec(rec);
3183 if (!rec->found_inode_item)
3184 rec->errors |= I_ERR_NO_INODE_ITEM;
3185 if (rec->found_link != rec->nlink)
3186 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3188 ret = try_repair_inode(root, rec);
3189 if (ret == 0 && can_free_inode_rec(rec)) {
3190 free_inode_rec(rec);
3196 if (!(repair && ret == 0))
3198 print_inode_error(root, rec);
3199 list_for_each_entry(backref, &rec->backrefs, list) {
3200 if (!backref->found_dir_item)
3201 backref->errors |= REF_ERR_NO_DIR_ITEM;
3202 if (!backref->found_dir_index)
3203 backref->errors |= REF_ERR_NO_DIR_INDEX;
3204 if (!backref->found_inode_ref)
3205 backref->errors |= REF_ERR_NO_INODE_REF;
3206 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3207 " namelen %u name %s filetype %d errors %x",
3208 (unsigned long long)backref->dir,
3209 (unsigned long long)backref->index,
3210 backref->namelen, backref->name,
3211 backref->filetype, backref->errors);
3212 print_ref_error(backref->errors);
3214 free_inode_rec(rec);
3216 return (error > 0) ? -1 : 0;
3219 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3222 struct cache_extent *cache;
3223 struct root_record *rec = NULL;
3226 cache = lookup_cache_extent(root_cache, objectid, 1);
3228 rec = container_of(cache, struct root_record, cache);
3230 rec = calloc(1, sizeof(*rec));
3232 return ERR_PTR(-ENOMEM);
3233 rec->objectid = objectid;
3234 INIT_LIST_HEAD(&rec->backrefs);
3235 rec->cache.start = objectid;
3236 rec->cache.size = 1;
3238 ret = insert_cache_extent(root_cache, &rec->cache);
3240 return ERR_PTR(-EEXIST);
3245 static struct root_backref *get_root_backref(struct root_record *rec,
3246 u64 ref_root, u64 dir, u64 index,
3247 const char *name, int namelen)
3249 struct root_backref *backref;
3251 list_for_each_entry(backref, &rec->backrefs, list) {
3252 if (backref->ref_root != ref_root || backref->dir != dir ||
3253 backref->namelen != namelen)
3255 if (memcmp(name, backref->name, namelen))
3260 backref = calloc(1, sizeof(*backref) + namelen + 1);
3263 backref->ref_root = ref_root;
3265 backref->index = index;
3266 backref->namelen = namelen;
3267 memcpy(backref->name, name, namelen);
3268 backref->name[namelen] = '\0';
3269 list_add_tail(&backref->list, &rec->backrefs);
3273 static void free_root_record(struct cache_extent *cache)
3275 struct root_record *rec;
3276 struct root_backref *backref;
3278 rec = container_of(cache, struct root_record, cache);
3279 while (!list_empty(&rec->backrefs)) {
3280 backref = to_root_backref(rec->backrefs.next);
3281 list_del(&backref->list);
3288 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3290 static int add_root_backref(struct cache_tree *root_cache,
3291 u64 root_id, u64 ref_root, u64 dir, u64 index,
3292 const char *name, int namelen,
3293 int item_type, int errors)
3295 struct root_record *rec;
3296 struct root_backref *backref;
3298 rec = get_root_rec(root_cache, root_id);
3299 BUG_ON(IS_ERR(rec));
3300 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3303 backref->errors |= errors;
3305 if (item_type != BTRFS_DIR_ITEM_KEY) {
3306 if (backref->found_dir_index || backref->found_back_ref ||
3307 backref->found_forward_ref) {
3308 if (backref->index != index)
3309 backref->errors |= REF_ERR_INDEX_UNMATCH;
3311 backref->index = index;
3315 if (item_type == BTRFS_DIR_ITEM_KEY) {
3316 if (backref->found_forward_ref)
3318 backref->found_dir_item = 1;
3319 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3320 backref->found_dir_index = 1;
3321 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3322 if (backref->found_forward_ref)
3323 backref->errors |= REF_ERR_DUP_ROOT_REF;
3324 else if (backref->found_dir_item)
3326 backref->found_forward_ref = 1;
3327 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3328 if (backref->found_back_ref)
3329 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3330 backref->found_back_ref = 1;
3335 if (backref->found_forward_ref && backref->found_dir_item)
3336 backref->reachable = 1;
3340 static int merge_root_recs(struct btrfs_root *root,
3341 struct cache_tree *src_cache,
3342 struct cache_tree *dst_cache)
3344 struct cache_extent *cache;
3345 struct ptr_node *node;
3346 struct inode_record *rec;
3347 struct inode_backref *backref;
3350 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3351 free_inode_recs_tree(src_cache);
3356 cache = search_cache_extent(src_cache, 0);
3359 node = container_of(cache, struct ptr_node, cache);
3361 remove_cache_extent(src_cache, &node->cache);
3364 ret = is_child_root(root, root->objectid, rec->ino);
3370 list_for_each_entry(backref, &rec->backrefs, list) {
3371 BUG_ON(backref->found_inode_ref);
3372 if (backref->found_dir_item)
3373 add_root_backref(dst_cache, rec->ino,
3374 root->root_key.objectid, backref->dir,
3375 backref->index, backref->name,
3376 backref->namelen, BTRFS_DIR_ITEM_KEY,
3378 if (backref->found_dir_index)
3379 add_root_backref(dst_cache, rec->ino,
3380 root->root_key.objectid, backref->dir,
3381 backref->index, backref->name,
3382 backref->namelen, BTRFS_DIR_INDEX_KEY,
3386 free_inode_rec(rec);
3393 static int check_root_refs(struct btrfs_root *root,
3394 struct cache_tree *root_cache)
3396 struct root_record *rec;
3397 struct root_record *ref_root;
3398 struct root_backref *backref;
3399 struct cache_extent *cache;
3405 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3406 BUG_ON(IS_ERR(rec));
3409 /* fixme: this can not detect circular references */
3412 cache = search_cache_extent(root_cache, 0);
3416 rec = container_of(cache, struct root_record, cache);
3417 cache = next_cache_extent(cache);
3419 if (rec->found_ref == 0)
3422 list_for_each_entry(backref, &rec->backrefs, list) {
3423 if (!backref->reachable)
3426 ref_root = get_root_rec(root_cache,
3428 BUG_ON(IS_ERR(ref_root));
3429 if (ref_root->found_ref > 0)
3432 backref->reachable = 0;
3434 if (rec->found_ref == 0)
3440 cache = search_cache_extent(root_cache, 0);
3444 rec = container_of(cache, struct root_record, cache);
3445 cache = next_cache_extent(cache);
3447 if (rec->found_ref == 0 &&
3448 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3449 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3450 ret = check_orphan_item(root->fs_info->tree_root,
3456 * If we don't have a root item then we likely just have
3457 * a dir item in a snapshot for this root but no actual
3458 * ref key or anything so it's meaningless.
3460 if (!rec->found_root_item)
3463 fprintf(stderr, "fs tree %llu not referenced\n",
3464 (unsigned long long)rec->objectid);
3468 if (rec->found_ref > 0 && !rec->found_root_item)
3470 list_for_each_entry(backref, &rec->backrefs, list) {
3471 if (!backref->found_dir_item)
3472 backref->errors |= REF_ERR_NO_DIR_ITEM;
3473 if (!backref->found_dir_index)
3474 backref->errors |= REF_ERR_NO_DIR_INDEX;
3475 if (!backref->found_back_ref)
3476 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3477 if (!backref->found_forward_ref)
3478 backref->errors |= REF_ERR_NO_ROOT_REF;
3479 if (backref->reachable && backref->errors)
3486 fprintf(stderr, "fs tree %llu refs %u %s\n",
3487 (unsigned long long)rec->objectid, rec->found_ref,
3488 rec->found_root_item ? "" : "not found");
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (!backref->reachable)
3493 if (!backref->errors && rec->found_root_item)
3495 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3496 " index %llu namelen %u name %s errors %x\n",
3497 (unsigned long long)backref->ref_root,
3498 (unsigned long long)backref->dir,
3499 (unsigned long long)backref->index,
3500 backref->namelen, backref->name,
3502 print_ref_error(backref->errors);
3505 return errors > 0 ? 1 : 0;
3508 static int process_root_ref(struct extent_buffer *eb, int slot,
3509 struct btrfs_key *key,
3510 struct cache_tree *root_cache)
3516 struct btrfs_root_ref *ref;
3517 char namebuf[BTRFS_NAME_LEN];
3520 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3522 dirid = btrfs_root_ref_dirid(eb, ref);
3523 index = btrfs_root_ref_sequence(eb, ref);
3524 name_len = btrfs_root_ref_name_len(eb, ref);
3526 if (name_len <= BTRFS_NAME_LEN) {
3530 len = BTRFS_NAME_LEN;
3531 error = REF_ERR_NAME_TOO_LONG;
3533 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3535 if (key->type == BTRFS_ROOT_REF_KEY) {
3536 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3537 index, namebuf, len, key->type, error);
3539 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3540 index, namebuf, len, key->type, error);
3545 static void free_corrupt_block(struct cache_extent *cache)
3547 struct btrfs_corrupt_block *corrupt;
3549 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3553 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3556 * Repair the btree of the given root.
3558 * The fix is to remove the node key in corrupt_blocks cache_tree.
3559 * and rebalance the tree.
3560 * After the fix, the btree should be writeable.
3562 static int repair_btree(struct btrfs_root *root,
3563 struct cache_tree *corrupt_blocks)
3565 struct btrfs_trans_handle *trans;
3566 struct btrfs_path *path;
3567 struct btrfs_corrupt_block *corrupt;
3568 struct cache_extent *cache;
3569 struct btrfs_key key;
3574 if (cache_tree_empty(corrupt_blocks))
3577 path = btrfs_alloc_path();
3581 trans = btrfs_start_transaction(root, 1);
3582 if (IS_ERR(trans)) {
3583 ret = PTR_ERR(trans);
3584 fprintf(stderr, "Error starting transaction: %s\n",
3588 cache = first_cache_extent(corrupt_blocks);
3590 corrupt = container_of(cache, struct btrfs_corrupt_block,
3592 level = corrupt->level;
3593 path->lowest_level = level;
3594 key.objectid = corrupt->key.objectid;
3595 key.type = corrupt->key.type;
3596 key.offset = corrupt->key.offset;
3599 * Here we don't want to do any tree balance, since it may
3600 * cause a balance with corrupted brother leaf/node,
3601 * so ins_len set to 0 here.
3602 * Balance will be done after all corrupt node/leaf is deleted.
3604 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3607 offset = btrfs_node_blockptr(path->nodes[level],
3608 path->slots[level]);
3610 /* Remove the ptr */
3611 ret = btrfs_del_ptr(trans, root, path, level,
3612 path->slots[level]);
3616 * Remove the corresponding extent
3617 * return value is not concerned.
3619 btrfs_release_path(path);
3620 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3621 0, root->root_key.objectid,
3623 cache = next_cache_extent(cache);
3626 /* Balance the btree using btrfs_search_slot() */
3627 cache = first_cache_extent(corrupt_blocks);
3629 corrupt = container_of(cache, struct btrfs_corrupt_block,
3631 memcpy(&key, &corrupt->key, sizeof(key));
3632 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3635 /* return will always >0 since it won't find the item */
3637 btrfs_release_path(path);
3638 cache = next_cache_extent(cache);
3641 btrfs_commit_transaction(trans, root);
3643 btrfs_free_path(path);
3647 static int check_fs_root(struct btrfs_root *root,
3648 struct cache_tree *root_cache,
3649 struct walk_control *wc)
3655 struct btrfs_path path;
3656 struct shared_node root_node;
3657 struct root_record *rec;
3658 struct btrfs_root_item *root_item = &root->root_item;
3659 struct cache_tree corrupt_blocks;
3660 struct orphan_data_extent *orphan;
3661 struct orphan_data_extent *tmp;
3662 enum btrfs_tree_block_status status;
3663 struct node_refs nrefs;
3666 * Reuse the corrupt_block cache tree to record corrupted tree block
3668 * Unlike the usage in extent tree check, here we do it in a per
3669 * fs/subvol tree base.
3671 cache_tree_init(&corrupt_blocks);
3672 root->fs_info->corrupt_blocks = &corrupt_blocks;
3674 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3675 rec = get_root_rec(root_cache, root->root_key.objectid);
3676 BUG_ON(IS_ERR(rec));
3677 if (btrfs_root_refs(root_item) > 0)
3678 rec->found_root_item = 1;
3681 btrfs_init_path(&path);
3682 memset(&root_node, 0, sizeof(root_node));
3683 cache_tree_init(&root_node.root_cache);
3684 cache_tree_init(&root_node.inode_cache);
3685 memset(&nrefs, 0, sizeof(nrefs));
3687 /* Move the orphan extent record to corresponding inode_record */
3688 list_for_each_entry_safe(orphan, tmp,
3689 &root->orphan_data_extents, list) {
3690 struct inode_record *inode;
3692 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3694 BUG_ON(IS_ERR(inode));
3695 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3696 list_move(&orphan->list, &inode->orphan_extents);
3699 level = btrfs_header_level(root->node);
3700 memset(wc->nodes, 0, sizeof(wc->nodes));
3701 wc->nodes[level] = &root_node;
3702 wc->active_node = level;
3703 wc->root_level = level;
3705 /* We may not have checked the root block, lets do that now */
3706 if (btrfs_is_leaf(root->node))
3707 status = btrfs_check_leaf(root, NULL, root->node);
3709 status = btrfs_check_node(root, NULL, root->node);
3710 if (status != BTRFS_TREE_BLOCK_CLEAN)
3713 if (btrfs_root_refs(root_item) > 0 ||
3714 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3715 path.nodes[level] = root->node;
3716 extent_buffer_get(root->node);
3717 path.slots[level] = 0;
3719 struct btrfs_key key;
3720 struct btrfs_disk_key found_key;
3722 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3723 level = root_item->drop_level;
3724 path.lowest_level = level;
3725 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3728 btrfs_node_key(path.nodes[level], &found_key,
3730 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3731 sizeof(found_key)));
3735 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3741 wret = walk_up_tree(root, &path, wc, &level);
3748 btrfs_release_path(&path);
3750 if (!cache_tree_empty(&corrupt_blocks)) {
3751 struct cache_extent *cache;
3752 struct btrfs_corrupt_block *corrupt;
3754 printf("The following tree block(s) is corrupted in tree %llu:\n",
3755 root->root_key.objectid);
3756 cache = first_cache_extent(&corrupt_blocks);
3758 corrupt = container_of(cache,
3759 struct btrfs_corrupt_block,
3761 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3762 cache->start, corrupt->level,
3763 corrupt->key.objectid, corrupt->key.type,
3764 corrupt->key.offset);
3765 cache = next_cache_extent(cache);
3768 printf("Try to repair the btree for root %llu\n",
3769 root->root_key.objectid);
3770 ret = repair_btree(root, &corrupt_blocks);
3772 fprintf(stderr, "Failed to repair btree: %s\n",
3775 printf("Btree for root %llu is fixed\n",
3776 root->root_key.objectid);
3780 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3784 if (root_node.current) {
3785 root_node.current->checked = 1;
3786 maybe_free_inode_rec(&root_node.inode_cache,
3790 err = check_inode_recs(root, &root_node.inode_cache);
3794 free_corrupt_blocks_tree(&corrupt_blocks);
3795 root->fs_info->corrupt_blocks = NULL;
3796 free_orphan_data_extents(&root->orphan_data_extents);
3800 static int fs_root_objectid(u64 objectid)
3802 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3803 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3805 return is_fstree(objectid);
3808 static int check_fs_roots(struct btrfs_root *root,
3809 struct cache_tree *root_cache)
3811 struct btrfs_path path;
3812 struct btrfs_key key;
3813 struct walk_control wc;
3814 struct extent_buffer *leaf, *tree_node;
3815 struct btrfs_root *tmp_root;
3816 struct btrfs_root *tree_root = root->fs_info->tree_root;
3820 if (ctx.progress_enabled) {
3821 ctx.tp = TASK_FS_ROOTS;
3822 task_start(ctx.info);
3826 * Just in case we made any changes to the extent tree that weren't
3827 * reflected into the free space cache yet.
3830 reset_cached_block_groups(root->fs_info);
3831 memset(&wc, 0, sizeof(wc));
3832 cache_tree_init(&wc.shared);
3833 btrfs_init_path(&path);
3838 key.type = BTRFS_ROOT_ITEM_KEY;
3839 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3844 tree_node = tree_root->node;
3846 if (tree_node != tree_root->node) {
3847 free_root_recs_tree(root_cache);
3848 btrfs_release_path(&path);
3851 leaf = path.nodes[0];
3852 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3853 ret = btrfs_next_leaf(tree_root, &path);
3859 leaf = path.nodes[0];
3861 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3862 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3863 fs_root_objectid(key.objectid)) {
3864 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3865 tmp_root = btrfs_read_fs_root_no_cache(
3866 root->fs_info, &key);
3868 key.offset = (u64)-1;
3869 tmp_root = btrfs_read_fs_root(
3870 root->fs_info, &key);
3872 if (IS_ERR(tmp_root)) {
3876 ret = check_fs_root(tmp_root, root_cache, &wc);
3877 if (ret == -EAGAIN) {
3878 free_root_recs_tree(root_cache);
3879 btrfs_release_path(&path);
3884 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3885 btrfs_free_fs_root(tmp_root);
3886 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3887 key.type == BTRFS_ROOT_BACKREF_KEY) {
3888 process_root_ref(leaf, path.slots[0], &key,
3895 btrfs_release_path(&path);
3897 free_extent_cache_tree(&wc.shared);
3898 if (!cache_tree_empty(&wc.shared))
3899 fprintf(stderr, "warning line %d\n", __LINE__);
3901 task_stop(ctx.info);
3906 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3909 struct extent_backref *back;
3910 struct tree_backref *tback;
3911 struct data_backref *dback;
3915 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3916 back = rb_node_to_extent_backref(n);
3917 if (!back->found_extent_tree) {
3921 if (back->is_data) {
3922 dback = to_data_backref(back);
3923 fprintf(stderr, "Backref %llu %s %llu"
3924 " owner %llu offset %llu num_refs %lu"
3925 " not found in extent tree\n",
3926 (unsigned long long)rec->start,
3927 back->full_backref ?
3929 back->full_backref ?
3930 (unsigned long long)dback->parent:
3931 (unsigned long long)dback->root,
3932 (unsigned long long)dback->owner,
3933 (unsigned long long)dback->offset,
3934 (unsigned long)dback->num_refs);
3936 tback = to_tree_backref(back);
3937 fprintf(stderr, "Backref %llu parent %llu"
3938 " root %llu not found in extent tree\n",
3939 (unsigned long long)rec->start,
3940 (unsigned long long)tback->parent,
3941 (unsigned long long)tback->root);
3944 if (!back->is_data && !back->found_ref) {
3948 tback = to_tree_backref(back);
3949 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3950 (unsigned long long)rec->start,
3951 back->full_backref ? "parent" : "root",
3952 back->full_backref ?
3953 (unsigned long long)tback->parent :
3954 (unsigned long long)tback->root, back);
3956 if (back->is_data) {
3957 dback = to_data_backref(back);
3958 if (dback->found_ref != dback->num_refs) {
3962 fprintf(stderr, "Incorrect local backref count"
3963 " on %llu %s %llu owner %llu"
3964 " offset %llu found %u wanted %u back %p\n",
3965 (unsigned long long)rec->start,
3966 back->full_backref ?
3968 back->full_backref ?
3969 (unsigned long long)dback->parent:
3970 (unsigned long long)dback->root,
3971 (unsigned long long)dback->owner,
3972 (unsigned long long)dback->offset,
3973 dback->found_ref, dback->num_refs, back);
3975 if (dback->disk_bytenr != rec->start) {
3979 fprintf(stderr, "Backref disk bytenr does not"
3980 " match extent record, bytenr=%llu, "
3981 "ref bytenr=%llu\n",
3982 (unsigned long long)rec->start,
3983 (unsigned long long)dback->disk_bytenr);
3986 if (dback->bytes != rec->nr) {
3990 fprintf(stderr, "Backref bytes do not match "
3991 "extent backref, bytenr=%llu, ref "
3992 "bytes=%llu, backref bytes=%llu\n",
3993 (unsigned long long)rec->start,
3994 (unsigned long long)rec->nr,
3995 (unsigned long long)dback->bytes);
3998 if (!back->is_data) {
4001 dback = to_data_backref(back);
4002 found += dback->found_ref;
4005 if (found != rec->refs) {
4009 fprintf(stderr, "Incorrect global backref count "
4010 "on %llu found %llu wanted %llu\n",
4011 (unsigned long long)rec->start,
4012 (unsigned long long)found,
4013 (unsigned long long)rec->refs);
4019 static void __free_one_backref(struct rb_node *node)
4021 struct extent_backref *back = rb_node_to_extent_backref(node);
4026 static void free_all_extent_backrefs(struct extent_record *rec)
4028 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4031 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4032 struct cache_tree *extent_cache)
4034 struct cache_extent *cache;
4035 struct extent_record *rec;
4038 cache = first_cache_extent(extent_cache);
4041 rec = container_of(cache, struct extent_record, cache);
4042 remove_cache_extent(extent_cache, cache);
4043 free_all_extent_backrefs(rec);
4048 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4049 struct extent_record *rec)
4051 if (rec->content_checked && rec->owner_ref_checked &&
4052 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4053 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4054 !rec->bad_full_backref && !rec->crossing_stripes &&
4055 !rec->wrong_chunk_type) {
4056 remove_cache_extent(extent_cache, &rec->cache);
4057 free_all_extent_backrefs(rec);
4058 list_del_init(&rec->list);
4064 static int check_owner_ref(struct btrfs_root *root,
4065 struct extent_record *rec,
4066 struct extent_buffer *buf)
4068 struct extent_backref *node, *tmp;
4069 struct tree_backref *back;
4070 struct btrfs_root *ref_root;
4071 struct btrfs_key key;
4072 struct btrfs_path path;
4073 struct extent_buffer *parent;
4078 rbtree_postorder_for_each_entry_safe(node, tmp,
4079 &rec->backref_tree, node) {
4082 if (!node->found_ref)
4084 if (node->full_backref)
4086 back = to_tree_backref(node);
4087 if (btrfs_header_owner(buf) == back->root)
4090 BUG_ON(rec->is_root);
4092 /* try to find the block by search corresponding fs tree */
4093 key.objectid = btrfs_header_owner(buf);
4094 key.type = BTRFS_ROOT_ITEM_KEY;
4095 key.offset = (u64)-1;
4097 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4098 if (IS_ERR(ref_root))
4101 level = btrfs_header_level(buf);
4103 btrfs_item_key_to_cpu(buf, &key, 0);
4105 btrfs_node_key_to_cpu(buf, &key, 0);
4107 btrfs_init_path(&path);
4108 path.lowest_level = level + 1;
4109 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4113 parent = path.nodes[level + 1];
4114 if (parent && buf->start == btrfs_node_blockptr(parent,
4115 path.slots[level + 1]))
4118 btrfs_release_path(&path);
4119 return found ? 0 : 1;
4122 static int is_extent_tree_record(struct extent_record *rec)
4124 struct extent_backref *ref, *tmp;
4125 struct tree_backref *back;
4128 rbtree_postorder_for_each_entry_safe(ref, tmp,
4129 &rec->backref_tree, node) {
4132 back = to_tree_backref(ref);
4133 if (ref->full_backref)
4135 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4142 static int record_bad_block_io(struct btrfs_fs_info *info,
4143 struct cache_tree *extent_cache,
4146 struct extent_record *rec;
4147 struct cache_extent *cache;
4148 struct btrfs_key key;
4150 cache = lookup_cache_extent(extent_cache, start, len);
4154 rec = container_of(cache, struct extent_record, cache);
4155 if (!is_extent_tree_record(rec))
4158 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4159 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4162 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4163 struct extent_buffer *buf, int slot)
4165 if (btrfs_header_level(buf)) {
4166 struct btrfs_key_ptr ptr1, ptr2;
4168 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4169 sizeof(struct btrfs_key_ptr));
4170 read_extent_buffer(buf, &ptr2,
4171 btrfs_node_key_ptr_offset(slot + 1),
4172 sizeof(struct btrfs_key_ptr));
4173 write_extent_buffer(buf, &ptr1,
4174 btrfs_node_key_ptr_offset(slot + 1),
4175 sizeof(struct btrfs_key_ptr));
4176 write_extent_buffer(buf, &ptr2,
4177 btrfs_node_key_ptr_offset(slot),
4178 sizeof(struct btrfs_key_ptr));
4180 struct btrfs_disk_key key;
4181 btrfs_node_key(buf, &key, 0);
4182 btrfs_fixup_low_keys(root, path, &key,
4183 btrfs_header_level(buf) + 1);
4186 struct btrfs_item *item1, *item2;
4187 struct btrfs_key k1, k2;
4188 char *item1_data, *item2_data;
4189 u32 item1_offset, item2_offset, item1_size, item2_size;
4191 item1 = btrfs_item_nr(slot);
4192 item2 = btrfs_item_nr(slot + 1);
4193 btrfs_item_key_to_cpu(buf, &k1, slot);
4194 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4195 item1_offset = btrfs_item_offset(buf, item1);
4196 item2_offset = btrfs_item_offset(buf, item2);
4197 item1_size = btrfs_item_size(buf, item1);
4198 item2_size = btrfs_item_size(buf, item2);
4200 item1_data = malloc(item1_size);
4203 item2_data = malloc(item2_size);
4209 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4210 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4212 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4213 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4217 btrfs_set_item_offset(buf, item1, item2_offset);
4218 btrfs_set_item_offset(buf, item2, item1_offset);
4219 btrfs_set_item_size(buf, item1, item2_size);
4220 btrfs_set_item_size(buf, item2, item1_size);
4222 path->slots[0] = slot;
4223 btrfs_set_item_key_unsafe(root, path, &k2);
4224 path->slots[0] = slot + 1;
4225 btrfs_set_item_key_unsafe(root, path, &k1);
4230 static int fix_key_order(struct btrfs_trans_handle *trans,
4231 struct btrfs_root *root,
4232 struct btrfs_path *path)
4234 struct extent_buffer *buf;
4235 struct btrfs_key k1, k2;
4237 int level = path->lowest_level;
4240 buf = path->nodes[level];
4241 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4243 btrfs_node_key_to_cpu(buf, &k1, i);
4244 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4246 btrfs_item_key_to_cpu(buf, &k1, i);
4247 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4249 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4251 ret = swap_values(root, path, buf, i);
4254 btrfs_mark_buffer_dirty(buf);
4260 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4261 struct btrfs_root *root,
4262 struct btrfs_path *path,
4263 struct extent_buffer *buf, int slot)
4265 struct btrfs_key key;
4266 int nritems = btrfs_header_nritems(buf);
4268 btrfs_item_key_to_cpu(buf, &key, slot);
4270 /* These are all the keys we can deal with missing. */
4271 if (key.type != BTRFS_DIR_INDEX_KEY &&
4272 key.type != BTRFS_EXTENT_ITEM_KEY &&
4273 key.type != BTRFS_METADATA_ITEM_KEY &&
4274 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4275 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4278 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4279 (unsigned long long)key.objectid, key.type,
4280 (unsigned long long)key.offset, slot, buf->start);
4281 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4282 btrfs_item_nr_offset(slot + 1),
4283 sizeof(struct btrfs_item) *
4284 (nritems - slot - 1));
4285 btrfs_set_header_nritems(buf, nritems - 1);
4287 struct btrfs_disk_key disk_key;
4289 btrfs_item_key(buf, &disk_key, 0);
4290 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4292 btrfs_mark_buffer_dirty(buf);
4296 static int fix_item_offset(struct btrfs_trans_handle *trans,
4297 struct btrfs_root *root,
4298 struct btrfs_path *path)
4300 struct extent_buffer *buf;
4304 /* We should only get this for leaves */
4305 BUG_ON(path->lowest_level);
4306 buf = path->nodes[0];
4308 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4309 unsigned int shift = 0, offset;
4311 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4312 BTRFS_LEAF_DATA_SIZE(root)) {
4313 if (btrfs_item_end_nr(buf, i) >
4314 BTRFS_LEAF_DATA_SIZE(root)) {
4315 ret = delete_bogus_item(trans, root, path,
4319 fprintf(stderr, "item is off the end of the "
4320 "leaf, can't fix\n");
4324 shift = BTRFS_LEAF_DATA_SIZE(root) -
4325 btrfs_item_end_nr(buf, i);
4326 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4327 btrfs_item_offset_nr(buf, i - 1)) {
4328 if (btrfs_item_end_nr(buf, i) >
4329 btrfs_item_offset_nr(buf, i - 1)) {
4330 ret = delete_bogus_item(trans, root, path,
4334 fprintf(stderr, "items overlap, can't fix\n");
4338 shift = btrfs_item_offset_nr(buf, i - 1) -
4339 btrfs_item_end_nr(buf, i);
4344 printf("Shifting item nr %d by %u bytes in block %llu\n",
4345 i, shift, (unsigned long long)buf->start);
4346 offset = btrfs_item_offset_nr(buf, i);
4347 memmove_extent_buffer(buf,
4348 btrfs_leaf_data(buf) + offset + shift,
4349 btrfs_leaf_data(buf) + offset,
4350 btrfs_item_size_nr(buf, i));
4351 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4353 btrfs_mark_buffer_dirty(buf);
4357 * We may have moved things, in which case we want to exit so we don't
4358 * write those changes out. Once we have proper abort functionality in
4359 * progs this can be changed to something nicer.
4366 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4367 * then just return -EIO.
4369 static int try_to_fix_bad_block(struct btrfs_root *root,
4370 struct extent_buffer *buf,
4371 enum btrfs_tree_block_status status)
4373 struct btrfs_trans_handle *trans;
4374 struct ulist *roots;
4375 struct ulist_node *node;
4376 struct btrfs_root *search_root;
4377 struct btrfs_path *path;
4378 struct ulist_iterator iter;
4379 struct btrfs_key root_key, key;
4382 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4383 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4386 path = btrfs_alloc_path();
4390 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4393 btrfs_free_path(path);
4397 ULIST_ITER_INIT(&iter);
4398 while ((node = ulist_next(roots, &iter))) {
4399 root_key.objectid = node->val;
4400 root_key.type = BTRFS_ROOT_ITEM_KEY;
4401 root_key.offset = (u64)-1;
4403 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4410 trans = btrfs_start_transaction(search_root, 0);
4411 if (IS_ERR(trans)) {
4412 ret = PTR_ERR(trans);
4416 path->lowest_level = btrfs_header_level(buf);
4417 path->skip_check_block = 1;
4418 if (path->lowest_level)
4419 btrfs_node_key_to_cpu(buf, &key, 0);
4421 btrfs_item_key_to_cpu(buf, &key, 0);
4422 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4425 btrfs_commit_transaction(trans, search_root);
4428 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4429 ret = fix_key_order(trans, search_root, path);
4430 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4431 ret = fix_item_offset(trans, search_root, path);
4433 btrfs_commit_transaction(trans, search_root);
4436 btrfs_release_path(path);
4437 btrfs_commit_transaction(trans, search_root);
4440 btrfs_free_path(path);
4444 static int check_block(struct btrfs_root *root,
4445 struct cache_tree *extent_cache,
4446 struct extent_buffer *buf, u64 flags)
4448 struct extent_record *rec;
4449 struct cache_extent *cache;
4450 struct btrfs_key key;
4451 enum btrfs_tree_block_status status;
4455 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4458 rec = container_of(cache, struct extent_record, cache);
4459 rec->generation = btrfs_header_generation(buf);
4461 level = btrfs_header_level(buf);
4462 if (btrfs_header_nritems(buf) > 0) {
4465 btrfs_item_key_to_cpu(buf, &key, 0);
4467 btrfs_node_key_to_cpu(buf, &key, 0);
4469 rec->info_objectid = key.objectid;
4471 rec->info_level = level;
4473 if (btrfs_is_leaf(buf))
4474 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4476 status = btrfs_check_node(root, &rec->parent_key, buf);
4478 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4480 status = try_to_fix_bad_block(root, buf, status);
4481 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4483 fprintf(stderr, "bad block %llu\n",
4484 (unsigned long long)buf->start);
4487 * Signal to callers we need to start the scan over
4488 * again since we'll have cowed blocks.
4493 rec->content_checked = 1;
4494 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4495 rec->owner_ref_checked = 1;
4497 ret = check_owner_ref(root, rec, buf);
4499 rec->owner_ref_checked = 1;
4503 maybe_free_extent_rec(extent_cache, rec);
4508 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4509 u64 parent, u64 root)
4511 struct rb_node *node;
4512 struct tree_backref *back = NULL;
4513 struct tree_backref match = {
4520 match.parent = parent;
4521 match.node.full_backref = 1;
4526 node = rb_search(&rec->backref_tree, &match.node.node,
4527 (rb_compare_keys)compare_extent_backref, NULL);
4529 back = to_tree_backref(rb_node_to_extent_backref(node));
4534 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4535 u64 parent, u64 root)
4537 struct tree_backref *ref = malloc(sizeof(*ref));
4541 memset(&ref->node, 0, sizeof(ref->node));
4543 ref->parent = parent;
4544 ref->node.full_backref = 1;
4547 ref->node.full_backref = 0;
4549 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4554 static struct data_backref *find_data_backref(struct extent_record *rec,
4555 u64 parent, u64 root,
4556 u64 owner, u64 offset,
4558 u64 disk_bytenr, u64 bytes)
4560 struct rb_node *node;
4561 struct data_backref *back = NULL;
4562 struct data_backref match = {
4569 .found_ref = found_ref,
4570 .disk_bytenr = disk_bytenr,
4574 match.parent = parent;
4575 match.node.full_backref = 1;
4580 node = rb_search(&rec->backref_tree, &match.node.node,
4581 (rb_compare_keys)compare_extent_backref, NULL);
4583 back = to_data_backref(rb_node_to_extent_backref(node));
4588 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4589 u64 parent, u64 root,
4590 u64 owner, u64 offset,
4593 struct data_backref *ref = malloc(sizeof(*ref));
4597 memset(&ref->node, 0, sizeof(ref->node));
4598 ref->node.is_data = 1;
4601 ref->parent = parent;
4604 ref->node.full_backref = 1;
4608 ref->offset = offset;
4609 ref->node.full_backref = 0;
4611 ref->bytes = max_size;
4614 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4615 if (max_size > rec->max_size)
4616 rec->max_size = max_size;
4620 /* Check if the type of extent matches with its chunk */
4621 static void check_extent_type(struct extent_record *rec)
4623 struct btrfs_block_group_cache *bg_cache;
4625 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4629 /* data extent, check chunk directly*/
4630 if (!rec->metadata) {
4631 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4632 rec->wrong_chunk_type = 1;
4636 /* metadata extent, check the obvious case first */
4637 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4638 BTRFS_BLOCK_GROUP_METADATA))) {
4639 rec->wrong_chunk_type = 1;
4644 * Check SYSTEM extent, as it's also marked as metadata, we can only
4645 * make sure it's a SYSTEM extent by its backref
4647 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4648 struct extent_backref *node;
4649 struct tree_backref *tback;
4652 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4653 if (node->is_data) {
4654 /* tree block shouldn't have data backref */
4655 rec->wrong_chunk_type = 1;
4658 tback = container_of(node, struct tree_backref, node);
4660 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4661 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4663 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4664 if (!(bg_cache->flags & bg_type))
4665 rec->wrong_chunk_type = 1;
4670 * Allocate a new extent record, fill default values from @tmpl and insert int
4671 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4672 * the cache, otherwise it fails.
4674 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4675 struct extent_record *tmpl)
4677 struct extent_record *rec;
4680 rec = malloc(sizeof(*rec));
4683 rec->start = tmpl->start;
4684 rec->max_size = tmpl->max_size;
4685 rec->nr = max(tmpl->nr, tmpl->max_size);
4686 rec->found_rec = tmpl->found_rec;
4687 rec->content_checked = tmpl->content_checked;
4688 rec->owner_ref_checked = tmpl->owner_ref_checked;
4689 rec->num_duplicates = 0;
4690 rec->metadata = tmpl->metadata;
4691 rec->flag_block_full_backref = FLAG_UNSET;
4692 rec->bad_full_backref = 0;
4693 rec->crossing_stripes = 0;
4694 rec->wrong_chunk_type = 0;
4695 rec->is_root = tmpl->is_root;
4696 rec->refs = tmpl->refs;
4697 rec->extent_item_refs = tmpl->extent_item_refs;
4698 rec->parent_generation = tmpl->parent_generation;
4699 INIT_LIST_HEAD(&rec->backrefs);
4700 INIT_LIST_HEAD(&rec->dups);
4701 INIT_LIST_HEAD(&rec->list);
4702 rec->backref_tree = RB_ROOT;
4703 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4704 rec->cache.start = tmpl->start;
4705 rec->cache.size = tmpl->nr;
4706 ret = insert_cache_extent(extent_cache, &rec->cache);
4708 bytes_used += rec->nr;
4711 rec->crossing_stripes = check_crossing_stripes(rec->start,
4712 global_info->tree_root->nodesize);
4713 check_extent_type(rec);
4718 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4720 * - refs - if found, increase refs
4721 * - is_root - if found, set
4722 * - content_checked - if found, set
4723 * - owner_ref_checked - if found, set
4725 * If not found, create a new one, initialize and insert.
4727 static int add_extent_rec(struct cache_tree *extent_cache,
4728 struct extent_record *tmpl)
4730 struct extent_record *rec;
4731 struct cache_extent *cache;
4735 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4737 rec = container_of(cache, struct extent_record, cache);
4741 rec->nr = max(tmpl->nr, tmpl->max_size);
4744 * We need to make sure to reset nr to whatever the extent
4745 * record says was the real size, this way we can compare it to
4748 if (tmpl->found_rec) {
4749 if (tmpl->start != rec->start || rec->found_rec) {
4750 struct extent_record *tmp;
4753 if (list_empty(&rec->list))
4754 list_add_tail(&rec->list,
4755 &duplicate_extents);
4758 * We have to do this song and dance in case we
4759 * find an extent record that falls inside of
4760 * our current extent record but does not have
4761 * the same objectid.
4763 tmp = malloc(sizeof(*tmp));
4766 tmp->start = tmpl->start;
4767 tmp->max_size = tmpl->max_size;
4770 tmp->metadata = tmpl->metadata;
4771 tmp->extent_item_refs = tmpl->extent_item_refs;
4772 INIT_LIST_HEAD(&tmp->list);
4773 list_add_tail(&tmp->list, &rec->dups);
4774 rec->num_duplicates++;
4781 if (tmpl->extent_item_refs && !dup) {
4782 if (rec->extent_item_refs) {
4783 fprintf(stderr, "block %llu rec "
4784 "extent_item_refs %llu, passed %llu\n",
4785 (unsigned long long)tmpl->start,
4786 (unsigned long long)
4787 rec->extent_item_refs,
4788 (unsigned long long)tmpl->extent_item_refs);
4790 rec->extent_item_refs = tmpl->extent_item_refs;
4794 if (tmpl->content_checked)
4795 rec->content_checked = 1;
4796 if (tmpl->owner_ref_checked)
4797 rec->owner_ref_checked = 1;
4798 memcpy(&rec->parent_key, &tmpl->parent_key,
4799 sizeof(tmpl->parent_key));
4800 if (tmpl->parent_generation)
4801 rec->parent_generation = tmpl->parent_generation;
4802 if (rec->max_size < tmpl->max_size)
4803 rec->max_size = tmpl->max_size;
4806 * A metadata extent can't cross stripe_len boundary, otherwise
4807 * kernel scrub won't be able to handle it.
4808 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4812 rec->crossing_stripes = check_crossing_stripes(
4813 rec->start, global_info->tree_root->nodesize);
4814 check_extent_type(rec);
4815 maybe_free_extent_rec(extent_cache, rec);
4819 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4824 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4825 u64 parent, u64 root, int found_ref)
4827 struct extent_record *rec;
4828 struct tree_backref *back;
4829 struct cache_extent *cache;
4831 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4833 struct extent_record tmpl;
4835 memset(&tmpl, 0, sizeof(tmpl));
4836 tmpl.start = bytenr;
4840 add_extent_rec_nolookup(extent_cache, &tmpl);
4842 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4847 rec = container_of(cache, struct extent_record, cache);
4848 if (rec->start != bytenr) {
4852 back = find_tree_backref(rec, parent, root);
4854 back = alloc_tree_backref(rec, parent, root);
4859 if (back->node.found_ref) {
4860 fprintf(stderr, "Extent back ref already exists "
4861 "for %llu parent %llu root %llu \n",
4862 (unsigned long long)bytenr,
4863 (unsigned long long)parent,
4864 (unsigned long long)root);
4866 back->node.found_ref = 1;
4868 if (back->node.found_extent_tree) {
4869 fprintf(stderr, "Extent back ref already exists "
4870 "for %llu parent %llu root %llu \n",
4871 (unsigned long long)bytenr,
4872 (unsigned long long)parent,
4873 (unsigned long long)root);
4875 back->node.found_extent_tree = 1;
4877 check_extent_type(rec);
4878 maybe_free_extent_rec(extent_cache, rec);
4882 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4883 u64 parent, u64 root, u64 owner, u64 offset,
4884 u32 num_refs, int found_ref, u64 max_size)
4886 struct extent_record *rec;
4887 struct data_backref *back;
4888 struct cache_extent *cache;
4890 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4892 struct extent_record tmpl;
4894 memset(&tmpl, 0, sizeof(tmpl));
4895 tmpl.start = bytenr;
4897 tmpl.max_size = max_size;
4899 add_extent_rec_nolookup(extent_cache, &tmpl);
4901 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4906 rec = container_of(cache, struct extent_record, cache);
4907 if (rec->max_size < max_size)
4908 rec->max_size = max_size;
4911 * If found_ref is set then max_size is the real size and must match the
4912 * existing refs. So if we have already found a ref then we need to
4913 * make sure that this ref matches the existing one, otherwise we need
4914 * to add a new backref so we can notice that the backrefs don't match
4915 * and we need to figure out who is telling the truth. This is to
4916 * account for that awful fsync bug I introduced where we'd end up with
4917 * a btrfs_file_extent_item that would have its length include multiple
4918 * prealloc extents or point inside of a prealloc extent.
4920 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4923 back = alloc_data_backref(rec, parent, root, owner, offset,
4929 BUG_ON(num_refs != 1);
4930 if (back->node.found_ref)
4931 BUG_ON(back->bytes != max_size);
4932 back->node.found_ref = 1;
4933 back->found_ref += 1;
4934 back->bytes = max_size;
4935 back->disk_bytenr = bytenr;
4937 rec->content_checked = 1;
4938 rec->owner_ref_checked = 1;
4940 if (back->node.found_extent_tree) {
4941 fprintf(stderr, "Extent back ref already exists "
4942 "for %llu parent %llu root %llu "
4943 "owner %llu offset %llu num_refs %lu\n",
4944 (unsigned long long)bytenr,
4945 (unsigned long long)parent,
4946 (unsigned long long)root,
4947 (unsigned long long)owner,
4948 (unsigned long long)offset,
4949 (unsigned long)num_refs);
4951 back->num_refs = num_refs;
4952 back->node.found_extent_tree = 1;
4954 maybe_free_extent_rec(extent_cache, rec);
4958 static int add_pending(struct cache_tree *pending,
4959 struct cache_tree *seen, u64 bytenr, u32 size)
4962 ret = add_cache_extent(seen, bytenr, size);
4965 add_cache_extent(pending, bytenr, size);
4969 static int pick_next_pending(struct cache_tree *pending,
4970 struct cache_tree *reada,
4971 struct cache_tree *nodes,
4972 u64 last, struct block_info *bits, int bits_nr,
4975 unsigned long node_start = last;
4976 struct cache_extent *cache;
4979 cache = search_cache_extent(reada, 0);
4981 bits[0].start = cache->start;
4982 bits[0].size = cache->size;
4987 if (node_start > 32768)
4988 node_start -= 32768;
4990 cache = search_cache_extent(nodes, node_start);
4992 cache = search_cache_extent(nodes, 0);
4995 cache = search_cache_extent(pending, 0);
5000 bits[ret].start = cache->start;
5001 bits[ret].size = cache->size;
5002 cache = next_cache_extent(cache);
5004 } while (cache && ret < bits_nr);
5010 bits[ret].start = cache->start;
5011 bits[ret].size = cache->size;
5012 cache = next_cache_extent(cache);
5014 } while (cache && ret < bits_nr);
5016 if (bits_nr - ret > 8) {
5017 u64 lookup = bits[0].start + bits[0].size;
5018 struct cache_extent *next;
5019 next = search_cache_extent(pending, lookup);
5021 if (next->start - lookup > 32768)
5023 bits[ret].start = next->start;
5024 bits[ret].size = next->size;
5025 lookup = next->start + next->size;
5029 next = next_cache_extent(next);
5037 static void free_chunk_record(struct cache_extent *cache)
5039 struct chunk_record *rec;
5041 rec = container_of(cache, struct chunk_record, cache);
5042 list_del_init(&rec->list);
5043 list_del_init(&rec->dextents);
5047 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5049 cache_tree_free_extents(chunk_cache, free_chunk_record);
5052 static void free_device_record(struct rb_node *node)
5054 struct device_record *rec;
5056 rec = container_of(node, struct device_record, node);
5060 FREE_RB_BASED_TREE(device_cache, free_device_record);
5062 int insert_block_group_record(struct block_group_tree *tree,
5063 struct block_group_record *bg_rec)
5067 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5071 list_add_tail(&bg_rec->list, &tree->block_groups);
5075 static void free_block_group_record(struct cache_extent *cache)
5077 struct block_group_record *rec;
5079 rec = container_of(cache, struct block_group_record, cache);
5080 list_del_init(&rec->list);
5084 void free_block_group_tree(struct block_group_tree *tree)
5086 cache_tree_free_extents(&tree->tree, free_block_group_record);
5089 int insert_device_extent_record(struct device_extent_tree *tree,
5090 struct device_extent_record *de_rec)
5095 * Device extent is a bit different from the other extents, because
5096 * the extents which belong to the different devices may have the
5097 * same start and size, so we need use the special extent cache
5098 * search/insert functions.
5100 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5104 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5105 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5109 static void free_device_extent_record(struct cache_extent *cache)
5111 struct device_extent_record *rec;
5113 rec = container_of(cache, struct device_extent_record, cache);
5114 if (!list_empty(&rec->chunk_list))
5115 list_del_init(&rec->chunk_list);
5116 if (!list_empty(&rec->device_list))
5117 list_del_init(&rec->device_list);
5121 void free_device_extent_tree(struct device_extent_tree *tree)
5123 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5126 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5127 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5128 struct extent_buffer *leaf, int slot)
5130 struct btrfs_extent_ref_v0 *ref0;
5131 struct btrfs_key key;
5133 btrfs_item_key_to_cpu(leaf, &key, slot);
5134 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5135 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5136 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5138 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5139 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5145 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5146 struct btrfs_key *key,
5149 struct btrfs_chunk *ptr;
5150 struct chunk_record *rec;
5153 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5154 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5156 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5158 fprintf(stderr, "memory allocation failed\n");
5162 INIT_LIST_HEAD(&rec->list);
5163 INIT_LIST_HEAD(&rec->dextents);
5166 rec->cache.start = key->offset;
5167 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5169 rec->generation = btrfs_header_generation(leaf);
5171 rec->objectid = key->objectid;
5172 rec->type = key->type;
5173 rec->offset = key->offset;
5175 rec->length = rec->cache.size;
5176 rec->owner = btrfs_chunk_owner(leaf, ptr);
5177 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5178 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5179 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5180 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5181 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5182 rec->num_stripes = num_stripes;
5183 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5185 for (i = 0; i < rec->num_stripes; ++i) {
5186 rec->stripes[i].devid =
5187 btrfs_stripe_devid_nr(leaf, ptr, i);
5188 rec->stripes[i].offset =
5189 btrfs_stripe_offset_nr(leaf, ptr, i);
5190 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5191 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5198 static int process_chunk_item(struct cache_tree *chunk_cache,
5199 struct btrfs_key *key, struct extent_buffer *eb,
5202 struct chunk_record *rec;
5205 rec = btrfs_new_chunk_record(eb, key, slot);
5206 ret = insert_cache_extent(chunk_cache, &rec->cache);
5208 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5209 rec->offset, rec->length);
5216 static int process_device_item(struct rb_root *dev_cache,
5217 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5219 struct btrfs_dev_item *ptr;
5220 struct device_record *rec;
5223 ptr = btrfs_item_ptr(eb,
5224 slot, struct btrfs_dev_item);
5226 rec = malloc(sizeof(*rec));
5228 fprintf(stderr, "memory allocation failed\n");
5232 rec->devid = key->offset;
5233 rec->generation = btrfs_header_generation(eb);
5235 rec->objectid = key->objectid;
5236 rec->type = key->type;
5237 rec->offset = key->offset;
5239 rec->devid = btrfs_device_id(eb, ptr);
5240 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5241 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5243 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5245 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5252 struct block_group_record *
5253 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5256 struct btrfs_block_group_item *ptr;
5257 struct block_group_record *rec;
5259 rec = calloc(1, sizeof(*rec));
5261 fprintf(stderr, "memory allocation failed\n");
5265 rec->cache.start = key->objectid;
5266 rec->cache.size = key->offset;
5268 rec->generation = btrfs_header_generation(leaf);
5270 rec->objectid = key->objectid;
5271 rec->type = key->type;
5272 rec->offset = key->offset;
5274 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5275 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5277 INIT_LIST_HEAD(&rec->list);
5282 static int process_block_group_item(struct block_group_tree *block_group_cache,
5283 struct btrfs_key *key,
5284 struct extent_buffer *eb, int slot)
5286 struct block_group_record *rec;
5289 rec = btrfs_new_block_group_record(eb, key, slot);
5290 ret = insert_block_group_record(block_group_cache, rec);
5292 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5293 rec->objectid, rec->offset);
5300 struct device_extent_record *
5301 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5302 struct btrfs_key *key, int slot)
5304 struct device_extent_record *rec;
5305 struct btrfs_dev_extent *ptr;
5307 rec = calloc(1, sizeof(*rec));
5309 fprintf(stderr, "memory allocation failed\n");
5313 rec->cache.objectid = key->objectid;
5314 rec->cache.start = key->offset;
5316 rec->generation = btrfs_header_generation(leaf);
5318 rec->objectid = key->objectid;
5319 rec->type = key->type;
5320 rec->offset = key->offset;
5322 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5323 rec->chunk_objecteid =
5324 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5326 btrfs_dev_extent_chunk_offset(leaf, ptr);
5327 rec->length = btrfs_dev_extent_length(leaf, ptr);
5328 rec->cache.size = rec->length;
5330 INIT_LIST_HEAD(&rec->chunk_list);
5331 INIT_LIST_HEAD(&rec->device_list);
5337 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5338 struct btrfs_key *key, struct extent_buffer *eb,
5341 struct device_extent_record *rec;
5344 rec = btrfs_new_device_extent_record(eb, key, slot);
5345 ret = insert_device_extent_record(dev_extent_cache, rec);
5348 "Device extent[%llu, %llu, %llu] existed.\n",
5349 rec->objectid, rec->offset, rec->length);
5356 static int process_extent_item(struct btrfs_root *root,
5357 struct cache_tree *extent_cache,
5358 struct extent_buffer *eb, int slot)
5360 struct btrfs_extent_item *ei;
5361 struct btrfs_extent_inline_ref *iref;
5362 struct btrfs_extent_data_ref *dref;
5363 struct btrfs_shared_data_ref *sref;
5364 struct btrfs_key key;
5365 struct extent_record tmpl;
5369 u32 item_size = btrfs_item_size_nr(eb, slot);
5375 btrfs_item_key_to_cpu(eb, &key, slot);
5377 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5379 num_bytes = root->nodesize;
5381 num_bytes = key.offset;
5384 if (item_size < sizeof(*ei)) {
5385 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5386 struct btrfs_extent_item_v0 *ei0;
5387 BUG_ON(item_size != sizeof(*ei0));
5388 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5389 refs = btrfs_extent_refs_v0(eb, ei0);
5393 memset(&tmpl, 0, sizeof(tmpl));
5394 tmpl.start = key.objectid;
5395 tmpl.nr = num_bytes;
5396 tmpl.extent_item_refs = refs;
5397 tmpl.metadata = metadata;
5399 tmpl.max_size = num_bytes;
5401 return add_extent_rec(extent_cache, &tmpl);
5404 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5405 refs = btrfs_extent_refs(eb, ei);
5406 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5411 memset(&tmpl, 0, sizeof(tmpl));
5412 tmpl.start = key.objectid;
5413 tmpl.nr = num_bytes;
5414 tmpl.extent_item_refs = refs;
5415 tmpl.metadata = metadata;
5417 tmpl.max_size = num_bytes;
5418 add_extent_rec(extent_cache, &tmpl);
5420 ptr = (unsigned long)(ei + 1);
5421 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5422 key.type == BTRFS_EXTENT_ITEM_KEY)
5423 ptr += sizeof(struct btrfs_tree_block_info);
5425 end = (unsigned long)ei + item_size;
5427 iref = (struct btrfs_extent_inline_ref *)ptr;
5428 type = btrfs_extent_inline_ref_type(eb, iref);
5429 offset = btrfs_extent_inline_ref_offset(eb, iref);
5431 case BTRFS_TREE_BLOCK_REF_KEY:
5432 add_tree_backref(extent_cache, key.objectid,
5435 case BTRFS_SHARED_BLOCK_REF_KEY:
5436 add_tree_backref(extent_cache, key.objectid,
5439 case BTRFS_EXTENT_DATA_REF_KEY:
5440 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5441 add_data_backref(extent_cache, key.objectid, 0,
5442 btrfs_extent_data_ref_root(eb, dref),
5443 btrfs_extent_data_ref_objectid(eb,
5445 btrfs_extent_data_ref_offset(eb, dref),
5446 btrfs_extent_data_ref_count(eb, dref),
5449 case BTRFS_SHARED_DATA_REF_KEY:
5450 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5451 add_data_backref(extent_cache, key.objectid, offset,
5453 btrfs_shared_data_ref_count(eb, sref),
5457 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5458 key.objectid, key.type, num_bytes);
5461 ptr += btrfs_extent_inline_ref_size(type);
5468 static int check_cache_range(struct btrfs_root *root,
5469 struct btrfs_block_group_cache *cache,
5470 u64 offset, u64 bytes)
5472 struct btrfs_free_space *entry;
5478 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5479 bytenr = btrfs_sb_offset(i);
5480 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5481 cache->key.objectid, bytenr, 0,
5482 &logical, &nr, &stripe_len);
5487 if (logical[nr] + stripe_len <= offset)
5489 if (offset + bytes <= logical[nr])
5491 if (logical[nr] == offset) {
5492 if (stripe_len >= bytes) {
5496 bytes -= stripe_len;
5497 offset += stripe_len;
5498 } else if (logical[nr] < offset) {
5499 if (logical[nr] + stripe_len >=
5504 bytes = (offset + bytes) -
5505 (logical[nr] + stripe_len);
5506 offset = logical[nr] + stripe_len;
5509 * Could be tricky, the super may land in the
5510 * middle of the area we're checking. First
5511 * check the easiest case, it's at the end.
5513 if (logical[nr] + stripe_len >=
5515 bytes = logical[nr] - offset;
5519 /* Check the left side */
5520 ret = check_cache_range(root, cache,
5522 logical[nr] - offset);
5528 /* Now we continue with the right side */
5529 bytes = (offset + bytes) -
5530 (logical[nr] + stripe_len);
5531 offset = logical[nr] + stripe_len;
5538 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5540 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5541 offset, offset+bytes);
5545 if (entry->offset != offset) {
5546 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5551 if (entry->bytes != bytes) {
5552 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5553 bytes, entry->bytes, offset);
5557 unlink_free_space(cache->free_space_ctl, entry);
5562 static int verify_space_cache(struct btrfs_root *root,
5563 struct btrfs_block_group_cache *cache)
5565 struct btrfs_path *path;
5566 struct extent_buffer *leaf;
5567 struct btrfs_key key;
5571 path = btrfs_alloc_path();
5575 root = root->fs_info->extent_root;
5577 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5579 key.objectid = last;
5581 key.type = BTRFS_EXTENT_ITEM_KEY;
5583 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5588 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5589 ret = btrfs_next_leaf(root, path);
5597 leaf = path->nodes[0];
5598 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5599 if (key.objectid >= cache->key.offset + cache->key.objectid)
5601 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5602 key.type != BTRFS_METADATA_ITEM_KEY) {
5607 if (last == key.objectid) {
5608 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5609 last = key.objectid + key.offset;
5611 last = key.objectid + root->nodesize;
5616 ret = check_cache_range(root, cache, last,
5617 key.objectid - last);
5620 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5621 last = key.objectid + key.offset;
5623 last = key.objectid + root->nodesize;
5627 if (last < cache->key.objectid + cache->key.offset)
5628 ret = check_cache_range(root, cache, last,
5629 cache->key.objectid +
5630 cache->key.offset - last);
5633 btrfs_free_path(path);
5636 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5637 fprintf(stderr, "There are still entries left in the space "
5645 static int check_space_cache(struct btrfs_root *root)
5647 struct btrfs_block_group_cache *cache;
5648 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5652 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5653 btrfs_super_generation(root->fs_info->super_copy) !=
5654 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5655 printf("cache and super generation don't match, space cache "
5656 "will be invalidated\n");
5660 if (ctx.progress_enabled) {
5661 ctx.tp = TASK_FREE_SPACE;
5662 task_start(ctx.info);
5666 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5670 start = cache->key.objectid + cache->key.offset;
5671 if (!cache->free_space_ctl) {
5672 if (btrfs_init_free_space_ctl(cache,
5673 root->sectorsize)) {
5678 btrfs_remove_free_space_cache(cache);
5681 if (btrfs_fs_compat_ro(root->fs_info,
5682 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5683 ret = exclude_super_stripes(root, cache);
5685 fprintf(stderr, "could not exclude super stripes: %s\n",
5690 ret = load_free_space_tree(root->fs_info, cache);
5691 free_excluded_extents(root, cache);
5693 fprintf(stderr, "could not load free space tree: %s\n",
5700 ret = load_free_space_cache(root->fs_info, cache);
5705 ret = verify_space_cache(root, cache);
5707 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5708 cache->key.objectid);
5713 task_stop(ctx.info);
5715 return error ? -EINVAL : 0;
5718 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5719 u64 num_bytes, unsigned long leaf_offset,
5720 struct extent_buffer *eb) {
5723 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5725 unsigned long csum_offset;
5729 u64 data_checked = 0;
5735 if (num_bytes % root->sectorsize)
5738 data = malloc(num_bytes);
5742 while (offset < num_bytes) {
5745 read_len = num_bytes - offset;
5746 /* read as much space once a time */
5747 ret = read_extent_data(root, data + offset,
5748 bytenr + offset, &read_len, mirror);
5752 /* verify every 4k data's checksum */
5753 while (data_checked < read_len) {
5755 tmp = offset + data_checked;
5757 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5758 csum, root->sectorsize);
5759 btrfs_csum_final(csum, (char *)&csum);
5761 csum_offset = leaf_offset +
5762 tmp / root->sectorsize * csum_size;
5763 read_extent_buffer(eb, (char *)&csum_expected,
5764 csum_offset, csum_size);
5765 /* try another mirror */
5766 if (csum != csum_expected) {
5767 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5768 mirror, bytenr + tmp,
5769 csum, csum_expected);
5770 num_copies = btrfs_num_copies(
5771 &root->fs_info->mapping_tree,
5773 if (mirror < num_copies - 1) {
5778 data_checked += root->sectorsize;
5787 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5790 struct btrfs_path *path;
5791 struct extent_buffer *leaf;
5792 struct btrfs_key key;
5795 path = btrfs_alloc_path();
5797 fprintf(stderr, "Error allocating path\n");
5801 key.objectid = bytenr;
5802 key.type = BTRFS_EXTENT_ITEM_KEY;
5803 key.offset = (u64)-1;
5806 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5809 fprintf(stderr, "Error looking up extent record %d\n", ret);
5810 btrfs_free_path(path);
5813 if (path->slots[0] > 0) {
5816 ret = btrfs_prev_leaf(root, path);
5819 } else if (ret > 0) {
5826 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5829 * Block group items come before extent items if they have the same
5830 * bytenr, so walk back one more just in case. Dear future traveller,
5831 * first congrats on mastering time travel. Now if it's not too much
5832 * trouble could you go back to 2006 and tell Chris to make the
5833 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5834 * EXTENT_ITEM_KEY please?
5836 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5837 if (path->slots[0] > 0) {
5840 ret = btrfs_prev_leaf(root, path);
5843 } else if (ret > 0) {
5848 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5852 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5853 ret = btrfs_next_leaf(root, path);
5855 fprintf(stderr, "Error going to next leaf "
5857 btrfs_free_path(path);
5863 leaf = path->nodes[0];
5864 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5865 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5869 if (key.objectid + key.offset < bytenr) {
5873 if (key.objectid > bytenr + num_bytes)
5876 if (key.objectid == bytenr) {
5877 if (key.offset >= num_bytes) {
5881 num_bytes -= key.offset;
5882 bytenr += key.offset;
5883 } else if (key.objectid < bytenr) {
5884 if (key.objectid + key.offset >= bytenr + num_bytes) {
5888 num_bytes = (bytenr + num_bytes) -
5889 (key.objectid + key.offset);
5890 bytenr = key.objectid + key.offset;
5892 if (key.objectid + key.offset < bytenr + num_bytes) {
5893 u64 new_start = key.objectid + key.offset;
5894 u64 new_bytes = bytenr + num_bytes - new_start;
5897 * Weird case, the extent is in the middle of
5898 * our range, we'll have to search one side
5899 * and then the other. Not sure if this happens
5900 * in real life, but no harm in coding it up
5901 * anyway just in case.
5903 btrfs_release_path(path);
5904 ret = check_extent_exists(root, new_start,
5907 fprintf(stderr, "Right section didn't "
5911 num_bytes = key.objectid - bytenr;
5914 num_bytes = key.objectid - bytenr;
5921 if (num_bytes && !ret) {
5922 fprintf(stderr, "There are no extents for csum range "
5923 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5927 btrfs_free_path(path);
5931 static int check_csums(struct btrfs_root *root)
5933 struct btrfs_path *path;
5934 struct extent_buffer *leaf;
5935 struct btrfs_key key;
5936 u64 offset = 0, num_bytes = 0;
5937 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5941 unsigned long leaf_offset;
5943 root = root->fs_info->csum_root;
5944 if (!extent_buffer_uptodate(root->node)) {
5945 fprintf(stderr, "No valid csum tree found\n");
5949 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5950 key.type = BTRFS_EXTENT_CSUM_KEY;
5953 path = btrfs_alloc_path();
5957 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5959 fprintf(stderr, "Error searching csum tree %d\n", ret);
5960 btrfs_free_path(path);
5964 if (ret > 0 && path->slots[0])
5969 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5970 ret = btrfs_next_leaf(root, path);
5972 fprintf(stderr, "Error going to next leaf "
5979 leaf = path->nodes[0];
5981 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5982 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5987 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5988 csum_size) * root->sectorsize;
5989 if (!check_data_csum)
5990 goto skip_csum_check;
5991 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5992 ret = check_extent_csums(root, key.offset, data_len,
5998 offset = key.offset;
5999 } else if (key.offset != offset + num_bytes) {
6000 ret = check_extent_exists(root, offset, num_bytes);
6002 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6003 "there is no extent record\n",
6004 offset, offset+num_bytes);
6007 offset = key.offset;
6010 num_bytes += data_len;
6014 btrfs_free_path(path);
6018 static int is_dropped_key(struct btrfs_key *key,
6019 struct btrfs_key *drop_key) {
6020 if (key->objectid < drop_key->objectid)
6022 else if (key->objectid == drop_key->objectid) {
6023 if (key->type < drop_key->type)
6025 else if (key->type == drop_key->type) {
6026 if (key->offset < drop_key->offset)
6034 * Here are the rules for FULL_BACKREF.
6036 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6037 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6039 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6040 * if it happened after the relocation occurred since we'll have dropped the
6041 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6042 * have no real way to know for sure.
6044 * We process the blocks one root at a time, and we start from the lowest root
6045 * objectid and go to the highest. So we can just lookup the owner backref for
6046 * the record and if we don't find it then we know it doesn't exist and we have
6049 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6050 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6051 * be set or not and then we can check later once we've gathered all the refs.
6053 static int calc_extent_flag(struct btrfs_root *root,
6054 struct cache_tree *extent_cache,
6055 struct extent_buffer *buf,
6056 struct root_item_record *ri,
6059 struct extent_record *rec;
6060 struct cache_extent *cache;
6061 struct tree_backref *tback;
6064 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6065 /* we have added this extent before */
6067 rec = container_of(cache, struct extent_record, cache);
6070 * Except file/reloc tree, we can not have
6073 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6078 if (buf->start == ri->bytenr)
6081 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6084 owner = btrfs_header_owner(buf);
6085 if (owner == ri->objectid)
6088 tback = find_tree_backref(rec, 0, owner);
6093 if (rec->flag_block_full_backref != FLAG_UNSET &&
6094 rec->flag_block_full_backref != 0)
6095 rec->bad_full_backref = 1;
6098 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6099 if (rec->flag_block_full_backref != FLAG_UNSET &&
6100 rec->flag_block_full_backref != 1)
6101 rec->bad_full_backref = 1;
6105 static int run_next_block(struct btrfs_root *root,
6106 struct block_info *bits,
6109 struct cache_tree *pending,
6110 struct cache_tree *seen,
6111 struct cache_tree *reada,
6112 struct cache_tree *nodes,
6113 struct cache_tree *extent_cache,
6114 struct cache_tree *chunk_cache,
6115 struct rb_root *dev_cache,
6116 struct block_group_tree *block_group_cache,
6117 struct device_extent_tree *dev_extent_cache,
6118 struct root_item_record *ri)
6120 struct extent_buffer *buf;
6121 struct extent_record *rec = NULL;
6132 struct btrfs_key key;
6133 struct cache_extent *cache;
6136 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6137 bits_nr, &reada_bits);
6142 for(i = 0; i < nritems; i++) {
6143 ret = add_cache_extent(reada, bits[i].start,
6148 /* fixme, get the parent transid */
6149 readahead_tree_block(root, bits[i].start,
6153 *last = bits[0].start;
6154 bytenr = bits[0].start;
6155 size = bits[0].size;
6157 cache = lookup_cache_extent(pending, bytenr, size);
6159 remove_cache_extent(pending, cache);
6162 cache = lookup_cache_extent(reada, bytenr, size);
6164 remove_cache_extent(reada, cache);
6167 cache = lookup_cache_extent(nodes, bytenr, size);
6169 remove_cache_extent(nodes, cache);
6172 cache = lookup_cache_extent(extent_cache, bytenr, size);
6174 rec = container_of(cache, struct extent_record, cache);
6175 gen = rec->parent_generation;
6178 /* fixme, get the real parent transid */
6179 buf = read_tree_block(root, bytenr, size, gen);
6180 if (!extent_buffer_uptodate(buf)) {
6181 record_bad_block_io(root->fs_info,
6182 extent_cache, bytenr, size);
6186 nritems = btrfs_header_nritems(buf);
6189 if (!init_extent_tree) {
6190 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6191 btrfs_header_level(buf), 1, NULL,
6194 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6196 fprintf(stderr, "Couldn't calc extent flags\n");
6197 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6202 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6204 fprintf(stderr, "Couldn't calc extent flags\n");
6205 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6209 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6211 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6212 ri->objectid == btrfs_header_owner(buf)) {
6214 * Ok we got to this block from it's original owner and
6215 * we have FULL_BACKREF set. Relocation can leave
6216 * converted blocks over so this is altogether possible,
6217 * however it's not possible if the generation > the
6218 * last snapshot, so check for this case.
6220 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6221 btrfs_header_generation(buf) > ri->last_snapshot) {
6222 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6223 rec->bad_full_backref = 1;
6228 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6229 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6230 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6231 rec->bad_full_backref = 1;
6235 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6236 rec->flag_block_full_backref = 1;
6240 rec->flag_block_full_backref = 0;
6242 owner = btrfs_header_owner(buf);
6245 ret = check_block(root, extent_cache, buf, flags);
6249 if (btrfs_is_leaf(buf)) {
6250 btree_space_waste += btrfs_leaf_free_space(root, buf);
6251 for (i = 0; i < nritems; i++) {
6252 struct btrfs_file_extent_item *fi;
6253 btrfs_item_key_to_cpu(buf, &key, i);
6254 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6255 process_extent_item(root, extent_cache, buf,
6259 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6260 process_extent_item(root, extent_cache, buf,
6264 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6266 btrfs_item_size_nr(buf, i);
6269 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6270 process_chunk_item(chunk_cache, &key, buf, i);
6273 if (key.type == BTRFS_DEV_ITEM_KEY) {
6274 process_device_item(dev_cache, &key, buf, i);
6277 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6278 process_block_group_item(block_group_cache,
6282 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6283 process_device_extent_item(dev_extent_cache,
6288 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6289 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6290 process_extent_ref_v0(extent_cache, buf, i);
6297 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6298 add_tree_backref(extent_cache, key.objectid, 0,
6302 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6303 add_tree_backref(extent_cache, key.objectid,
6307 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6308 struct btrfs_extent_data_ref *ref;
6309 ref = btrfs_item_ptr(buf, i,
6310 struct btrfs_extent_data_ref);
6311 add_data_backref(extent_cache,
6313 btrfs_extent_data_ref_root(buf, ref),
6314 btrfs_extent_data_ref_objectid(buf,
6316 btrfs_extent_data_ref_offset(buf, ref),
6317 btrfs_extent_data_ref_count(buf, ref),
6318 0, root->sectorsize);
6321 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6322 struct btrfs_shared_data_ref *ref;
6323 ref = btrfs_item_ptr(buf, i,
6324 struct btrfs_shared_data_ref);
6325 add_data_backref(extent_cache,
6326 key.objectid, key.offset, 0, 0, 0,
6327 btrfs_shared_data_ref_count(buf, ref),
6328 0, root->sectorsize);
6331 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6332 struct bad_item *bad;
6334 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6338 bad = malloc(sizeof(struct bad_item));
6341 INIT_LIST_HEAD(&bad->list);
6342 memcpy(&bad->key, &key,
6343 sizeof(struct btrfs_key));
6344 bad->root_id = owner;
6345 list_add_tail(&bad->list, &delete_items);
6348 if (key.type != BTRFS_EXTENT_DATA_KEY)
6350 fi = btrfs_item_ptr(buf, i,
6351 struct btrfs_file_extent_item);
6352 if (btrfs_file_extent_type(buf, fi) ==
6353 BTRFS_FILE_EXTENT_INLINE)
6355 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6358 data_bytes_allocated +=
6359 btrfs_file_extent_disk_num_bytes(buf, fi);
6360 if (data_bytes_allocated < root->sectorsize) {
6363 data_bytes_referenced +=
6364 btrfs_file_extent_num_bytes(buf, fi);
6365 add_data_backref(extent_cache,
6366 btrfs_file_extent_disk_bytenr(buf, fi),
6367 parent, owner, key.objectid, key.offset -
6368 btrfs_file_extent_offset(buf, fi), 1, 1,
6369 btrfs_file_extent_disk_num_bytes(buf, fi));
6373 struct btrfs_key first_key;
6375 first_key.objectid = 0;
6378 btrfs_item_key_to_cpu(buf, &first_key, 0);
6379 level = btrfs_header_level(buf);
6380 for (i = 0; i < nritems; i++) {
6381 struct extent_record tmpl;
6383 ptr = btrfs_node_blockptr(buf, i);
6384 size = root->nodesize;
6385 btrfs_node_key_to_cpu(buf, &key, i);
6387 if ((level == ri->drop_level)
6388 && is_dropped_key(&key, &ri->drop_key)) {
6393 memset(&tmpl, 0, sizeof(tmpl));
6394 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6395 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6400 tmpl.max_size = size;
6401 ret = add_extent_rec(extent_cache, &tmpl);
6404 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6407 add_pending(nodes, seen, ptr, size);
6409 add_pending(pending, seen, ptr, size);
6412 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6413 nritems) * sizeof(struct btrfs_key_ptr);
6415 total_btree_bytes += buf->len;
6416 if (fs_root_objectid(btrfs_header_owner(buf)))
6417 total_fs_tree_bytes += buf->len;
6418 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6419 total_extent_tree_bytes += buf->len;
6420 if (!found_old_backref &&
6421 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6422 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6423 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6424 found_old_backref = 1;
6426 free_extent_buffer(buf);
6430 static int add_root_to_pending(struct extent_buffer *buf,
6431 struct cache_tree *extent_cache,
6432 struct cache_tree *pending,
6433 struct cache_tree *seen,
6434 struct cache_tree *nodes,
6437 struct extent_record tmpl;
6439 if (btrfs_header_level(buf) > 0)
6440 add_pending(nodes, seen, buf->start, buf->len);
6442 add_pending(pending, seen, buf->start, buf->len);
6444 memset(&tmpl, 0, sizeof(tmpl));
6445 tmpl.start = buf->start;
6450 tmpl.max_size = buf->len;
6451 add_extent_rec(extent_cache, &tmpl);
6453 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6454 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6455 add_tree_backref(extent_cache, buf->start, buf->start,
6458 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6462 /* as we fix the tree, we might be deleting blocks that
6463 * we're tracking for repair. This hook makes sure we
6464 * remove any backrefs for blocks as we are fixing them.
6466 static int free_extent_hook(struct btrfs_trans_handle *trans,
6467 struct btrfs_root *root,
6468 u64 bytenr, u64 num_bytes, u64 parent,
6469 u64 root_objectid, u64 owner, u64 offset,
6472 struct extent_record *rec;
6473 struct cache_extent *cache;
6475 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6477 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6478 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6482 rec = container_of(cache, struct extent_record, cache);
6484 struct data_backref *back;
6485 back = find_data_backref(rec, parent, root_objectid, owner,
6486 offset, 1, bytenr, num_bytes);
6489 if (back->node.found_ref) {
6490 back->found_ref -= refs_to_drop;
6492 rec->refs -= refs_to_drop;
6494 if (back->node.found_extent_tree) {
6495 back->num_refs -= refs_to_drop;
6496 if (rec->extent_item_refs)
6497 rec->extent_item_refs -= refs_to_drop;
6499 if (back->found_ref == 0)
6500 back->node.found_ref = 0;
6501 if (back->num_refs == 0)
6502 back->node.found_extent_tree = 0;
6504 if (!back->node.found_extent_tree && back->node.found_ref) {
6505 rb_erase(&back->node.node, &rec->backref_tree);
6509 struct tree_backref *back;
6510 back = find_tree_backref(rec, parent, root_objectid);
6513 if (back->node.found_ref) {
6516 back->node.found_ref = 0;
6518 if (back->node.found_extent_tree) {
6519 if (rec->extent_item_refs)
6520 rec->extent_item_refs--;
6521 back->node.found_extent_tree = 0;
6523 if (!back->node.found_extent_tree && back->node.found_ref) {
6524 rb_erase(&back->node.node, &rec->backref_tree);
6528 maybe_free_extent_rec(extent_cache, rec);
6533 static int delete_extent_records(struct btrfs_trans_handle *trans,
6534 struct btrfs_root *root,
6535 struct btrfs_path *path,
6536 u64 bytenr, u64 new_len)
6538 struct btrfs_key key;
6539 struct btrfs_key found_key;
6540 struct extent_buffer *leaf;
6545 key.objectid = bytenr;
6547 key.offset = (u64)-1;
6550 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6557 if (path->slots[0] == 0)
6563 leaf = path->nodes[0];
6564 slot = path->slots[0];
6566 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6567 if (found_key.objectid != bytenr)
6570 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6571 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6572 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6573 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6574 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6575 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6576 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6577 btrfs_release_path(path);
6578 if (found_key.type == 0) {
6579 if (found_key.offset == 0)
6581 key.offset = found_key.offset - 1;
6582 key.type = found_key.type;
6584 key.type = found_key.type - 1;
6585 key.offset = (u64)-1;
6589 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6590 found_key.objectid, found_key.type, found_key.offset);
6592 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6595 btrfs_release_path(path);
6597 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6598 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6599 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6600 found_key.offset : root->nodesize;
6602 ret = btrfs_update_block_group(trans, root, bytenr,
6609 btrfs_release_path(path);
6614 * for a single backref, this will allocate a new extent
6615 * and add the backref to it.
6617 static int record_extent(struct btrfs_trans_handle *trans,
6618 struct btrfs_fs_info *info,
6619 struct btrfs_path *path,
6620 struct extent_record *rec,
6621 struct extent_backref *back,
6622 int allocated, u64 flags)
6625 struct btrfs_root *extent_root = info->extent_root;
6626 struct extent_buffer *leaf;
6627 struct btrfs_key ins_key;
6628 struct btrfs_extent_item *ei;
6629 struct tree_backref *tback;
6630 struct data_backref *dback;
6631 struct btrfs_tree_block_info *bi;
6634 rec->max_size = max_t(u64, rec->max_size,
6635 info->extent_root->nodesize);
6638 u32 item_size = sizeof(*ei);
6641 item_size += sizeof(*bi);
6643 ins_key.objectid = rec->start;
6644 ins_key.offset = rec->max_size;
6645 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6647 ret = btrfs_insert_empty_item(trans, extent_root, path,
6648 &ins_key, item_size);
6652 leaf = path->nodes[0];
6653 ei = btrfs_item_ptr(leaf, path->slots[0],
6654 struct btrfs_extent_item);
6656 btrfs_set_extent_refs(leaf, ei, 0);
6657 btrfs_set_extent_generation(leaf, ei, rec->generation);
6659 if (back->is_data) {
6660 btrfs_set_extent_flags(leaf, ei,
6661 BTRFS_EXTENT_FLAG_DATA);
6663 struct btrfs_disk_key copy_key;;
6665 tback = to_tree_backref(back);
6666 bi = (struct btrfs_tree_block_info *)(ei + 1);
6667 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6670 btrfs_set_disk_key_objectid(©_key,
6671 rec->info_objectid);
6672 btrfs_set_disk_key_type(©_key, 0);
6673 btrfs_set_disk_key_offset(©_key, 0);
6675 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6676 btrfs_set_tree_block_key(leaf, bi, ©_key);
6678 btrfs_set_extent_flags(leaf, ei,
6679 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6682 btrfs_mark_buffer_dirty(leaf);
6683 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6684 rec->max_size, 1, 0);
6687 btrfs_release_path(path);
6690 if (back->is_data) {
6694 dback = to_data_backref(back);
6695 if (back->full_backref)
6696 parent = dback->parent;
6700 for (i = 0; i < dback->found_ref; i++) {
6701 /* if parent != 0, we're doing a full backref
6702 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6703 * just makes the backref allocator create a data
6706 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6707 rec->start, rec->max_size,
6711 BTRFS_FIRST_FREE_OBJECTID :
6717 fprintf(stderr, "adding new data backref"
6718 " on %llu %s %llu owner %llu"
6719 " offset %llu found %d\n",
6720 (unsigned long long)rec->start,
6721 back->full_backref ?
6723 back->full_backref ?
6724 (unsigned long long)parent :
6725 (unsigned long long)dback->root,
6726 (unsigned long long)dback->owner,
6727 (unsigned long long)dback->offset,
6732 tback = to_tree_backref(back);
6733 if (back->full_backref)
6734 parent = tback->parent;
6738 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6739 rec->start, rec->max_size,
6740 parent, tback->root, 0, 0);
6741 fprintf(stderr, "adding new tree backref on "
6742 "start %llu len %llu parent %llu root %llu\n",
6743 rec->start, rec->max_size, parent, tback->root);
6746 btrfs_release_path(path);
6750 static struct extent_entry *find_entry(struct list_head *entries,
6751 u64 bytenr, u64 bytes)
6753 struct extent_entry *entry = NULL;
6755 list_for_each_entry(entry, entries, list) {
6756 if (entry->bytenr == bytenr && entry->bytes == bytes)
6763 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6765 struct extent_entry *entry, *best = NULL, *prev = NULL;
6767 list_for_each_entry(entry, entries, list) {
6774 * If there are as many broken entries as entries then we know
6775 * not to trust this particular entry.
6777 if (entry->broken == entry->count)
6781 * If our current entry == best then we can't be sure our best
6782 * is really the best, so we need to keep searching.
6784 if (best && best->count == entry->count) {
6790 /* Prev == entry, not good enough, have to keep searching */
6791 if (!prev->broken && prev->count == entry->count)
6795 best = (prev->count > entry->count) ? prev : entry;
6796 else if (best->count < entry->count)
6804 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6805 struct data_backref *dback, struct extent_entry *entry)
6807 struct btrfs_trans_handle *trans;
6808 struct btrfs_root *root;
6809 struct btrfs_file_extent_item *fi;
6810 struct extent_buffer *leaf;
6811 struct btrfs_key key;
6815 key.objectid = dback->root;
6816 key.type = BTRFS_ROOT_ITEM_KEY;
6817 key.offset = (u64)-1;
6818 root = btrfs_read_fs_root(info, &key);
6820 fprintf(stderr, "Couldn't find root for our ref\n");
6825 * The backref points to the original offset of the extent if it was
6826 * split, so we need to search down to the offset we have and then walk
6827 * forward until we find the backref we're looking for.
6829 key.objectid = dback->owner;
6830 key.type = BTRFS_EXTENT_DATA_KEY;
6831 key.offset = dback->offset;
6832 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6834 fprintf(stderr, "Error looking up ref %d\n", ret);
6839 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6840 ret = btrfs_next_leaf(root, path);
6842 fprintf(stderr, "Couldn't find our ref, next\n");
6846 leaf = path->nodes[0];
6847 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6848 if (key.objectid != dback->owner ||
6849 key.type != BTRFS_EXTENT_DATA_KEY) {
6850 fprintf(stderr, "Couldn't find our ref, search\n");
6853 fi = btrfs_item_ptr(leaf, path->slots[0],
6854 struct btrfs_file_extent_item);
6855 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6856 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6858 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6863 btrfs_release_path(path);
6865 trans = btrfs_start_transaction(root, 1);
6867 return PTR_ERR(trans);
6870 * Ok we have the key of the file extent we want to fix, now we can cow
6871 * down to the thing and fix it.
6873 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6875 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6876 key.objectid, key.type, key.offset, ret);
6880 fprintf(stderr, "Well that's odd, we just found this key "
6881 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6886 leaf = path->nodes[0];
6887 fi = btrfs_item_ptr(leaf, path->slots[0],
6888 struct btrfs_file_extent_item);
6890 if (btrfs_file_extent_compression(leaf, fi) &&
6891 dback->disk_bytenr != entry->bytenr) {
6892 fprintf(stderr, "Ref doesn't match the record start and is "
6893 "compressed, please take a btrfs-image of this file "
6894 "system and send it to a btrfs developer so they can "
6895 "complete this functionality for bytenr %Lu\n",
6896 dback->disk_bytenr);
6901 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6902 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6903 } else if (dback->disk_bytenr > entry->bytenr) {
6904 u64 off_diff, offset;
6906 off_diff = dback->disk_bytenr - entry->bytenr;
6907 offset = btrfs_file_extent_offset(leaf, fi);
6908 if (dback->disk_bytenr + offset +
6909 btrfs_file_extent_num_bytes(leaf, fi) >
6910 entry->bytenr + entry->bytes) {
6911 fprintf(stderr, "Ref is past the entry end, please "
6912 "take a btrfs-image of this file system and "
6913 "send it to a btrfs developer, ref %Lu\n",
6914 dback->disk_bytenr);
6919 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6920 btrfs_set_file_extent_offset(leaf, fi, offset);
6921 } else if (dback->disk_bytenr < entry->bytenr) {
6924 offset = btrfs_file_extent_offset(leaf, fi);
6925 if (dback->disk_bytenr + offset < entry->bytenr) {
6926 fprintf(stderr, "Ref is before the entry start, please"
6927 " take a btrfs-image of this file system and "
6928 "send it to a btrfs developer, ref %Lu\n",
6929 dback->disk_bytenr);
6934 offset += dback->disk_bytenr;
6935 offset -= entry->bytenr;
6936 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6937 btrfs_set_file_extent_offset(leaf, fi, offset);
6940 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6943 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6944 * only do this if we aren't using compression, otherwise it's a
6947 if (!btrfs_file_extent_compression(leaf, fi))
6948 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6950 printf("ram bytes may be wrong?\n");
6951 btrfs_mark_buffer_dirty(leaf);
6953 err = btrfs_commit_transaction(trans, root);
6954 btrfs_release_path(path);
6955 return ret ? ret : err;
6958 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6959 struct extent_record *rec)
6961 struct extent_backref *back, *tmp;
6962 struct data_backref *dback;
6963 struct extent_entry *entry, *best = NULL;
6966 int broken_entries = 0;
6971 * Metadata is easy and the backrefs should always agree on bytenr and
6972 * size, if not we've got bigger issues.
6977 rbtree_postorder_for_each_entry_safe(back, tmp,
6978 &rec->backref_tree, node) {
6979 if (back->full_backref || !back->is_data)
6982 dback = to_data_backref(back);
6985 * We only pay attention to backrefs that we found a real
6988 if (dback->found_ref == 0)
6992 * For now we only catch when the bytes don't match, not the
6993 * bytenr. We can easily do this at the same time, but I want
6994 * to have a fs image to test on before we just add repair
6995 * functionality willy-nilly so we know we won't screw up the
6999 entry = find_entry(&entries, dback->disk_bytenr,
7002 entry = malloc(sizeof(struct extent_entry));
7007 memset(entry, 0, sizeof(*entry));
7008 entry->bytenr = dback->disk_bytenr;
7009 entry->bytes = dback->bytes;
7010 list_add_tail(&entry->list, &entries);
7015 * If we only have on entry we may think the entries agree when
7016 * in reality they don't so we have to do some extra checking.
7018 if (dback->disk_bytenr != rec->start ||
7019 dback->bytes != rec->nr || back->broken)
7030 /* Yay all the backrefs agree, carry on good sir */
7031 if (nr_entries <= 1 && !mismatch)
7034 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7035 "%Lu\n", rec->start);
7038 * First we want to see if the backrefs can agree amongst themselves who
7039 * is right, so figure out which one of the entries has the highest
7042 best = find_most_right_entry(&entries);
7045 * Ok so we may have an even split between what the backrefs think, so
7046 * this is where we use the extent ref to see what it thinks.
7049 entry = find_entry(&entries, rec->start, rec->nr);
7050 if (!entry && (!broken_entries || !rec->found_rec)) {
7051 fprintf(stderr, "Backrefs don't agree with each other "
7052 "and extent record doesn't agree with anybody,"
7053 " so we can't fix bytenr %Lu bytes %Lu\n",
7054 rec->start, rec->nr);
7057 } else if (!entry) {
7059 * Ok our backrefs were broken, we'll assume this is the
7060 * correct value and add an entry for this range.
7062 entry = malloc(sizeof(struct extent_entry));
7067 memset(entry, 0, sizeof(*entry));
7068 entry->bytenr = rec->start;
7069 entry->bytes = rec->nr;
7070 list_add_tail(&entry->list, &entries);
7074 best = find_most_right_entry(&entries);
7076 fprintf(stderr, "Backrefs and extent record evenly "
7077 "split on who is right, this is going to "
7078 "require user input to fix bytenr %Lu bytes "
7079 "%Lu\n", rec->start, rec->nr);
7086 * I don't think this can happen currently as we'll abort() if we catch
7087 * this case higher up, but in case somebody removes that we still can't
7088 * deal with it properly here yet, so just bail out of that's the case.
7090 if (best->bytenr != rec->start) {
7091 fprintf(stderr, "Extent start and backref starts don't match, "
7092 "please use btrfs-image on this file system and send "
7093 "it to a btrfs developer so they can make fsck fix "
7094 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7095 rec->start, rec->nr);
7101 * Ok great we all agreed on an extent record, let's go find the real
7102 * references and fix up the ones that don't match.
7104 rbtree_postorder_for_each_entry_safe(back, tmp,
7105 &rec->backref_tree, node) {
7106 if (back->full_backref || !back->is_data)
7109 dback = to_data_backref(back);
7112 * Still ignoring backrefs that don't have a real ref attached
7115 if (dback->found_ref == 0)
7118 if (dback->bytes == best->bytes &&
7119 dback->disk_bytenr == best->bytenr)
7122 ret = repair_ref(info, path, dback, best);
7128 * Ok we messed with the actual refs, which means we need to drop our
7129 * entire cache and go back and rescan. I know this is a huge pain and
7130 * adds a lot of extra work, but it's the only way to be safe. Once all
7131 * the backrefs agree we may not need to do anything to the extent
7136 while (!list_empty(&entries)) {
7137 entry = list_entry(entries.next, struct extent_entry, list);
7138 list_del_init(&entry->list);
7144 static int process_duplicates(struct btrfs_root *root,
7145 struct cache_tree *extent_cache,
7146 struct extent_record *rec)
7148 struct extent_record *good, *tmp;
7149 struct cache_extent *cache;
7153 * If we found a extent record for this extent then return, or if we
7154 * have more than one duplicate we are likely going to need to delete
7157 if (rec->found_rec || rec->num_duplicates > 1)
7160 /* Shouldn't happen but just in case */
7161 BUG_ON(!rec->num_duplicates);
7164 * So this happens if we end up with a backref that doesn't match the
7165 * actual extent entry. So either the backref is bad or the extent
7166 * entry is bad. Either way we want to have the extent_record actually
7167 * reflect what we found in the extent_tree, so we need to take the
7168 * duplicate out and use that as the extent_record since the only way we
7169 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7171 remove_cache_extent(extent_cache, &rec->cache);
7173 good = to_extent_record(rec->dups.next);
7174 list_del_init(&good->list);
7175 INIT_LIST_HEAD(&good->backrefs);
7176 INIT_LIST_HEAD(&good->dups);
7177 good->cache.start = good->start;
7178 good->cache.size = good->nr;
7179 good->content_checked = 0;
7180 good->owner_ref_checked = 0;
7181 good->num_duplicates = 0;
7182 good->refs = rec->refs;
7183 list_splice_init(&rec->backrefs, &good->backrefs);
7185 cache = lookup_cache_extent(extent_cache, good->start,
7189 tmp = container_of(cache, struct extent_record, cache);
7192 * If we find another overlapping extent and it's found_rec is
7193 * set then it's a duplicate and we need to try and delete
7196 if (tmp->found_rec || tmp->num_duplicates > 0) {
7197 if (list_empty(&good->list))
7198 list_add_tail(&good->list,
7199 &duplicate_extents);
7200 good->num_duplicates += tmp->num_duplicates + 1;
7201 list_splice_init(&tmp->dups, &good->dups);
7202 list_del_init(&tmp->list);
7203 list_add_tail(&tmp->list, &good->dups);
7204 remove_cache_extent(extent_cache, &tmp->cache);
7209 * Ok we have another non extent item backed extent rec, so lets
7210 * just add it to this extent and carry on like we did above.
7212 good->refs += tmp->refs;
7213 list_splice_init(&tmp->backrefs, &good->backrefs);
7214 remove_cache_extent(extent_cache, &tmp->cache);
7217 ret = insert_cache_extent(extent_cache, &good->cache);
7220 return good->num_duplicates ? 0 : 1;
7223 static int delete_duplicate_records(struct btrfs_root *root,
7224 struct extent_record *rec)
7226 struct btrfs_trans_handle *trans;
7227 LIST_HEAD(delete_list);
7228 struct btrfs_path *path;
7229 struct extent_record *tmp, *good, *n;
7232 struct btrfs_key key;
7234 path = btrfs_alloc_path();
7241 /* Find the record that covers all of the duplicates. */
7242 list_for_each_entry(tmp, &rec->dups, list) {
7243 if (good->start < tmp->start)
7245 if (good->nr > tmp->nr)
7248 if (tmp->start + tmp->nr < good->start + good->nr) {
7249 fprintf(stderr, "Ok we have overlapping extents that "
7250 "aren't completely covered by each other, this "
7251 "is going to require more careful thought. "
7252 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7253 tmp->start, tmp->nr, good->start, good->nr);
7260 list_add_tail(&rec->list, &delete_list);
7262 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7265 list_move_tail(&tmp->list, &delete_list);
7268 root = root->fs_info->extent_root;
7269 trans = btrfs_start_transaction(root, 1);
7270 if (IS_ERR(trans)) {
7271 ret = PTR_ERR(trans);
7275 list_for_each_entry(tmp, &delete_list, list) {
7276 if (tmp->found_rec == 0)
7278 key.objectid = tmp->start;
7279 key.type = BTRFS_EXTENT_ITEM_KEY;
7280 key.offset = tmp->nr;
7282 /* Shouldn't happen but just in case */
7283 if (tmp->metadata) {
7284 fprintf(stderr, "Well this shouldn't happen, extent "
7285 "record overlaps but is metadata? "
7286 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7290 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7296 ret = btrfs_del_item(trans, root, path);
7299 btrfs_release_path(path);
7302 err = btrfs_commit_transaction(trans, root);
7306 while (!list_empty(&delete_list)) {
7307 tmp = to_extent_record(delete_list.next);
7308 list_del_init(&tmp->list);
7314 while (!list_empty(&rec->dups)) {
7315 tmp = to_extent_record(rec->dups.next);
7316 list_del_init(&tmp->list);
7320 btrfs_free_path(path);
7322 if (!ret && !nr_del)
7323 rec->num_duplicates = 0;
7325 return ret ? ret : nr_del;
7328 static int find_possible_backrefs(struct btrfs_fs_info *info,
7329 struct btrfs_path *path,
7330 struct cache_tree *extent_cache,
7331 struct extent_record *rec)
7333 struct btrfs_root *root;
7334 struct extent_backref *back, *tmp;
7335 struct data_backref *dback;
7336 struct cache_extent *cache;
7337 struct btrfs_file_extent_item *fi;
7338 struct btrfs_key key;
7342 rbtree_postorder_for_each_entry_safe(back, tmp,
7343 &rec->backref_tree, node) {
7344 /* Don't care about full backrefs (poor unloved backrefs) */
7345 if (back->full_backref || !back->is_data)
7348 dback = to_data_backref(back);
7350 /* We found this one, we don't need to do a lookup */
7351 if (dback->found_ref)
7354 key.objectid = dback->root;
7355 key.type = BTRFS_ROOT_ITEM_KEY;
7356 key.offset = (u64)-1;
7358 root = btrfs_read_fs_root(info, &key);
7360 /* No root, definitely a bad ref, skip */
7361 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7363 /* Other err, exit */
7365 return PTR_ERR(root);
7367 key.objectid = dback->owner;
7368 key.type = BTRFS_EXTENT_DATA_KEY;
7369 key.offset = dback->offset;
7370 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7372 btrfs_release_path(path);
7375 /* Didn't find it, we can carry on */
7380 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7381 struct btrfs_file_extent_item);
7382 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7383 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7384 btrfs_release_path(path);
7385 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7387 struct extent_record *tmp;
7388 tmp = container_of(cache, struct extent_record, cache);
7391 * If we found an extent record for the bytenr for this
7392 * particular backref then we can't add it to our
7393 * current extent record. We only want to add backrefs
7394 * that don't have a corresponding extent item in the
7395 * extent tree since they likely belong to this record
7396 * and we need to fix it if it doesn't match bytenrs.
7402 dback->found_ref += 1;
7403 dback->disk_bytenr = bytenr;
7404 dback->bytes = bytes;
7407 * Set this so the verify backref code knows not to trust the
7408 * values in this backref.
7417 * Record orphan data ref into corresponding root.
7419 * Return 0 if the extent item contains data ref and recorded.
7420 * Return 1 if the extent item contains no useful data ref
7421 * On that case, it may contains only shared_dataref or metadata backref
7422 * or the file extent exists(this should be handled by the extent bytenr
7424 * Return <0 if something goes wrong.
7426 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7427 struct extent_record *rec)
7429 struct btrfs_key key;
7430 struct btrfs_root *dest_root;
7431 struct extent_backref *back, *tmp;
7432 struct data_backref *dback;
7433 struct orphan_data_extent *orphan;
7434 struct btrfs_path *path;
7435 int recorded_data_ref = 0;
7440 path = btrfs_alloc_path();
7443 rbtree_postorder_for_each_entry_safe(back, tmp,
7444 &rec->backref_tree, node) {
7445 if (back->full_backref || !back->is_data ||
7446 !back->found_extent_tree)
7448 dback = to_data_backref(back);
7449 if (dback->found_ref)
7451 key.objectid = dback->root;
7452 key.type = BTRFS_ROOT_ITEM_KEY;
7453 key.offset = (u64)-1;
7455 dest_root = btrfs_read_fs_root(fs_info, &key);
7457 /* For non-exist root we just skip it */
7458 if (IS_ERR(dest_root) || !dest_root)
7461 key.objectid = dback->owner;
7462 key.type = BTRFS_EXTENT_DATA_KEY;
7463 key.offset = dback->offset;
7465 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7467 * For ret < 0, it's OK since the fs-tree may be corrupted,
7468 * we need to record it for inode/file extent rebuild.
7469 * For ret > 0, we record it only for file extent rebuild.
7470 * For ret == 0, the file extent exists but only bytenr
7471 * mismatch, let the original bytenr fix routine to handle,
7477 orphan = malloc(sizeof(*orphan));
7482 INIT_LIST_HEAD(&orphan->list);
7483 orphan->root = dback->root;
7484 orphan->objectid = dback->owner;
7485 orphan->offset = dback->offset;
7486 orphan->disk_bytenr = rec->cache.start;
7487 orphan->disk_len = rec->cache.size;
7488 list_add(&dest_root->orphan_data_extents, &orphan->list);
7489 recorded_data_ref = 1;
7492 btrfs_free_path(path);
7494 return !recorded_data_ref;
7500 * when an incorrect extent item is found, this will delete
7501 * all of the existing entries for it and recreate them
7502 * based on what the tree scan found.
7504 static int fixup_extent_refs(struct btrfs_fs_info *info,
7505 struct cache_tree *extent_cache,
7506 struct extent_record *rec)
7508 struct btrfs_trans_handle *trans = NULL;
7510 struct btrfs_path *path;
7511 struct cache_extent *cache;
7512 struct extent_backref *back, *tmp;
7516 if (rec->flag_block_full_backref)
7517 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7519 path = btrfs_alloc_path();
7523 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7525 * Sometimes the backrefs themselves are so broken they don't
7526 * get attached to any meaningful rec, so first go back and
7527 * check any of our backrefs that we couldn't find and throw
7528 * them into the list if we find the backref so that
7529 * verify_backrefs can figure out what to do.
7531 ret = find_possible_backrefs(info, path, extent_cache, rec);
7536 /* step one, make sure all of the backrefs agree */
7537 ret = verify_backrefs(info, path, rec);
7541 trans = btrfs_start_transaction(info->extent_root, 1);
7542 if (IS_ERR(trans)) {
7543 ret = PTR_ERR(trans);
7547 /* step two, delete all the existing records */
7548 ret = delete_extent_records(trans, info->extent_root, path,
7549 rec->start, rec->max_size);
7554 /* was this block corrupt? If so, don't add references to it */
7555 cache = lookup_cache_extent(info->corrupt_blocks,
7556 rec->start, rec->max_size);
7562 /* step three, recreate all the refs we did find */
7563 rbtree_postorder_for_each_entry_safe(back, tmp,
7564 &rec->backref_tree, node) {
7566 * if we didn't find any references, don't create a
7569 if (!back->found_ref)
7572 rec->bad_full_backref = 0;
7573 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7581 int err = btrfs_commit_transaction(trans, info->extent_root);
7586 btrfs_free_path(path);
7590 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7591 struct extent_record *rec)
7593 struct btrfs_trans_handle *trans;
7594 struct btrfs_root *root = fs_info->extent_root;
7595 struct btrfs_path *path;
7596 struct btrfs_extent_item *ei;
7597 struct btrfs_key key;
7601 key.objectid = rec->start;
7602 if (rec->metadata) {
7603 key.type = BTRFS_METADATA_ITEM_KEY;
7604 key.offset = rec->info_level;
7606 key.type = BTRFS_EXTENT_ITEM_KEY;
7607 key.offset = rec->max_size;
7610 path = btrfs_alloc_path();
7614 trans = btrfs_start_transaction(root, 0);
7615 if (IS_ERR(trans)) {
7616 btrfs_free_path(path);
7617 return PTR_ERR(trans);
7620 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7622 btrfs_free_path(path);
7623 btrfs_commit_transaction(trans, root);
7626 fprintf(stderr, "Didn't find extent for %llu\n",
7627 (unsigned long long)rec->start);
7628 btrfs_free_path(path);
7629 btrfs_commit_transaction(trans, root);
7633 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7634 struct btrfs_extent_item);
7635 flags = btrfs_extent_flags(path->nodes[0], ei);
7636 if (rec->flag_block_full_backref) {
7637 fprintf(stderr, "setting full backref on %llu\n",
7638 (unsigned long long)key.objectid);
7639 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7641 fprintf(stderr, "clearing full backref on %llu\n",
7642 (unsigned long long)key.objectid);
7643 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7645 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7646 btrfs_mark_buffer_dirty(path->nodes[0]);
7647 btrfs_free_path(path);
7648 return btrfs_commit_transaction(trans, root);
7651 /* right now we only prune from the extent allocation tree */
7652 static int prune_one_block(struct btrfs_trans_handle *trans,
7653 struct btrfs_fs_info *info,
7654 struct btrfs_corrupt_block *corrupt)
7657 struct btrfs_path path;
7658 struct extent_buffer *eb;
7662 int level = corrupt->level + 1;
7664 btrfs_init_path(&path);
7666 /* we want to stop at the parent to our busted block */
7667 path.lowest_level = level;
7669 ret = btrfs_search_slot(trans, info->extent_root,
7670 &corrupt->key, &path, -1, 1);
7675 eb = path.nodes[level];
7682 * hopefully the search gave us the block we want to prune,
7683 * lets try that first
7685 slot = path.slots[level];
7686 found = btrfs_node_blockptr(eb, slot);
7687 if (found == corrupt->cache.start)
7690 nritems = btrfs_header_nritems(eb);
7692 /* the search failed, lets scan this node and hope we find it */
7693 for (slot = 0; slot < nritems; slot++) {
7694 found = btrfs_node_blockptr(eb, slot);
7695 if (found == corrupt->cache.start)
7699 * we couldn't find the bad block. TODO, search all the nodes for pointers
7702 if (eb == info->extent_root->node) {
7707 btrfs_release_path(&path);
7712 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7713 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7716 btrfs_release_path(&path);
7720 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7722 struct btrfs_trans_handle *trans = NULL;
7723 struct cache_extent *cache;
7724 struct btrfs_corrupt_block *corrupt;
7727 cache = search_cache_extent(info->corrupt_blocks, 0);
7731 trans = btrfs_start_transaction(info->extent_root, 1);
7733 return PTR_ERR(trans);
7735 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7736 prune_one_block(trans, info, corrupt);
7737 remove_cache_extent(info->corrupt_blocks, cache);
7740 return btrfs_commit_transaction(trans, info->extent_root);
7744 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7746 struct btrfs_block_group_cache *cache;
7751 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7752 &start, &end, EXTENT_DIRTY);
7755 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7761 cache = btrfs_lookup_first_block_group(fs_info, start);
7766 start = cache->key.objectid + cache->key.offset;
7770 static int check_extent_refs(struct btrfs_root *root,
7771 struct cache_tree *extent_cache)
7773 struct extent_record *rec;
7774 struct cache_extent *cache;
7783 * if we're doing a repair, we have to make sure
7784 * we don't allocate from the problem extents.
7785 * In the worst case, this will be all the
7788 cache = search_cache_extent(extent_cache, 0);
7790 rec = container_of(cache, struct extent_record, cache);
7791 set_extent_dirty(root->fs_info->excluded_extents,
7793 rec->start + rec->max_size - 1,
7795 cache = next_cache_extent(cache);
7798 /* pin down all the corrupted blocks too */
7799 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7801 set_extent_dirty(root->fs_info->excluded_extents,
7803 cache->start + cache->size - 1,
7805 cache = next_cache_extent(cache);
7807 prune_corrupt_blocks(root->fs_info);
7808 reset_cached_block_groups(root->fs_info);
7811 reset_cached_block_groups(root->fs_info);
7814 * We need to delete any duplicate entries we find first otherwise we
7815 * could mess up the extent tree when we have backrefs that actually
7816 * belong to a different extent item and not the weird duplicate one.
7818 while (repair && !list_empty(&duplicate_extents)) {
7819 rec = to_extent_record(duplicate_extents.next);
7820 list_del_init(&rec->list);
7822 /* Sometimes we can find a backref before we find an actual
7823 * extent, so we need to process it a little bit to see if there
7824 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7825 * if this is a backref screwup. If we need to delete stuff
7826 * process_duplicates() will return 0, otherwise it will return
7829 if (process_duplicates(root, extent_cache, rec))
7831 ret = delete_duplicate_records(root, rec);
7835 * delete_duplicate_records will return the number of entries
7836 * deleted, so if it's greater than 0 then we know we actually
7837 * did something and we need to remove.
7851 cache = search_cache_extent(extent_cache, 0);
7854 rec = container_of(cache, struct extent_record, cache);
7855 if (rec->num_duplicates) {
7856 fprintf(stderr, "extent item %llu has multiple extent "
7857 "items\n", (unsigned long long)rec->start);
7862 if (rec->refs != rec->extent_item_refs) {
7863 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7864 (unsigned long long)rec->start,
7865 (unsigned long long)rec->nr);
7866 fprintf(stderr, "extent item %llu, found %llu\n",
7867 (unsigned long long)rec->extent_item_refs,
7868 (unsigned long long)rec->refs);
7869 ret = record_orphan_data_extents(root->fs_info, rec);
7876 * we can't use the extent to repair file
7877 * extent, let the fallback method handle it.
7879 if (!fixed && repair) {
7880 ret = fixup_extent_refs(
7891 if (all_backpointers_checked(rec, 1)) {
7892 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7893 (unsigned long long)rec->start,
7894 (unsigned long long)rec->nr);
7896 if (!fixed && !recorded && repair) {
7897 ret = fixup_extent_refs(root->fs_info,
7906 if (!rec->owner_ref_checked) {
7907 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7908 (unsigned long long)rec->start,
7909 (unsigned long long)rec->nr);
7910 if (!fixed && !recorded && repair) {
7911 ret = fixup_extent_refs(root->fs_info,
7920 if (rec->bad_full_backref) {
7921 fprintf(stderr, "bad full backref, on [%llu]\n",
7922 (unsigned long long)rec->start);
7924 ret = fixup_extent_flags(root->fs_info, rec);
7933 * Although it's not a extent ref's problem, we reuse this
7934 * routine for error reporting.
7935 * No repair function yet.
7937 if (rec->crossing_stripes) {
7939 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7940 rec->start, rec->start + rec->max_size);
7945 if (rec->wrong_chunk_type) {
7947 "bad extent [%llu, %llu), type mismatch with chunk\n",
7948 rec->start, rec->start + rec->max_size);
7953 remove_cache_extent(extent_cache, cache);
7954 free_all_extent_backrefs(rec);
7955 if (!init_extent_tree && repair && (!cur_err || fixed))
7956 clear_extent_dirty(root->fs_info->excluded_extents,
7958 rec->start + rec->max_size - 1,
7964 if (ret && ret != -EAGAIN) {
7965 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7968 struct btrfs_trans_handle *trans;
7970 root = root->fs_info->extent_root;
7971 trans = btrfs_start_transaction(root, 1);
7972 if (IS_ERR(trans)) {
7973 ret = PTR_ERR(trans);
7977 btrfs_fix_block_accounting(trans, root);
7978 ret = btrfs_commit_transaction(trans, root);
7983 fprintf(stderr, "repaired damaged extent references\n");
7989 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7993 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7994 stripe_size = length;
7995 stripe_size /= num_stripes;
7996 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7997 stripe_size = length * 2;
7998 stripe_size /= num_stripes;
7999 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8000 stripe_size = length;
8001 stripe_size /= (num_stripes - 1);
8002 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8003 stripe_size = length;
8004 stripe_size /= (num_stripes - 2);
8006 stripe_size = length;
8012 * Check the chunk with its block group/dev list ref:
8013 * Return 0 if all refs seems valid.
8014 * Return 1 if part of refs seems valid, need later check for rebuild ref
8015 * like missing block group and needs to search extent tree to rebuild them.
8016 * Return -1 if essential refs are missing and unable to rebuild.
8018 static int check_chunk_refs(struct chunk_record *chunk_rec,
8019 struct block_group_tree *block_group_cache,
8020 struct device_extent_tree *dev_extent_cache,
8023 struct cache_extent *block_group_item;
8024 struct block_group_record *block_group_rec;
8025 struct cache_extent *dev_extent_item;
8026 struct device_extent_record *dev_extent_rec;
8030 int metadump_v2 = 0;
8034 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8037 if (block_group_item) {
8038 block_group_rec = container_of(block_group_item,
8039 struct block_group_record,
8041 if (chunk_rec->length != block_group_rec->offset ||
8042 chunk_rec->offset != block_group_rec->objectid ||
8044 chunk_rec->type_flags != block_group_rec->flags)) {
8047 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8048 chunk_rec->objectid,
8053 chunk_rec->type_flags,
8054 block_group_rec->objectid,
8055 block_group_rec->type,
8056 block_group_rec->offset,
8057 block_group_rec->offset,
8058 block_group_rec->objectid,
8059 block_group_rec->flags);
8062 list_del_init(&block_group_rec->list);
8063 chunk_rec->bg_rec = block_group_rec;
8068 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8069 chunk_rec->objectid,
8074 chunk_rec->type_flags);
8081 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8082 chunk_rec->num_stripes);
8083 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8084 devid = chunk_rec->stripes[i].devid;
8085 offset = chunk_rec->stripes[i].offset;
8086 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8087 devid, offset, length);
8088 if (dev_extent_item) {
8089 dev_extent_rec = container_of(dev_extent_item,
8090 struct device_extent_record,
8092 if (dev_extent_rec->objectid != devid ||
8093 dev_extent_rec->offset != offset ||
8094 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8095 dev_extent_rec->length != length) {
8098 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8099 chunk_rec->objectid,
8102 chunk_rec->stripes[i].devid,
8103 chunk_rec->stripes[i].offset,
8104 dev_extent_rec->objectid,
8105 dev_extent_rec->offset,
8106 dev_extent_rec->length);
8109 list_move(&dev_extent_rec->chunk_list,
8110 &chunk_rec->dextents);
8115 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8116 chunk_rec->objectid,
8119 chunk_rec->stripes[i].devid,
8120 chunk_rec->stripes[i].offset);
8127 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8128 int check_chunks(struct cache_tree *chunk_cache,
8129 struct block_group_tree *block_group_cache,
8130 struct device_extent_tree *dev_extent_cache,
8131 struct list_head *good, struct list_head *bad,
8132 struct list_head *rebuild, int silent)
8134 struct cache_extent *chunk_item;
8135 struct chunk_record *chunk_rec;
8136 struct block_group_record *bg_rec;
8137 struct device_extent_record *dext_rec;
8141 chunk_item = first_cache_extent(chunk_cache);
8142 while (chunk_item) {
8143 chunk_rec = container_of(chunk_item, struct chunk_record,
8145 err = check_chunk_refs(chunk_rec, block_group_cache,
8146 dev_extent_cache, silent);
8149 if (err == 0 && good)
8150 list_add_tail(&chunk_rec->list, good);
8151 if (err > 0 && rebuild)
8152 list_add_tail(&chunk_rec->list, rebuild);
8154 list_add_tail(&chunk_rec->list, bad);
8155 chunk_item = next_cache_extent(chunk_item);
8158 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8161 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8169 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8173 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8184 static int check_device_used(struct device_record *dev_rec,
8185 struct device_extent_tree *dext_cache)
8187 struct cache_extent *cache;
8188 struct device_extent_record *dev_extent_rec;
8191 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8193 dev_extent_rec = container_of(cache,
8194 struct device_extent_record,
8196 if (dev_extent_rec->objectid != dev_rec->devid)
8199 list_del_init(&dev_extent_rec->device_list);
8200 total_byte += dev_extent_rec->length;
8201 cache = next_cache_extent(cache);
8204 if (total_byte != dev_rec->byte_used) {
8206 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8207 total_byte, dev_rec->byte_used, dev_rec->objectid,
8208 dev_rec->type, dev_rec->offset);
8215 /* check btrfs_dev_item -> btrfs_dev_extent */
8216 static int check_devices(struct rb_root *dev_cache,
8217 struct device_extent_tree *dev_extent_cache)
8219 struct rb_node *dev_node;
8220 struct device_record *dev_rec;
8221 struct device_extent_record *dext_rec;
8225 dev_node = rb_first(dev_cache);
8227 dev_rec = container_of(dev_node, struct device_record, node);
8228 err = check_device_used(dev_rec, dev_extent_cache);
8232 dev_node = rb_next(dev_node);
8234 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8237 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8238 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8245 static int add_root_item_to_list(struct list_head *head,
8246 u64 objectid, u64 bytenr, u64 last_snapshot,
8247 u8 level, u8 drop_level,
8248 int level_size, struct btrfs_key *drop_key)
8251 struct root_item_record *ri_rec;
8252 ri_rec = malloc(sizeof(*ri_rec));
8255 ri_rec->bytenr = bytenr;
8256 ri_rec->objectid = objectid;
8257 ri_rec->level = level;
8258 ri_rec->level_size = level_size;
8259 ri_rec->drop_level = drop_level;
8260 ri_rec->last_snapshot = last_snapshot;
8262 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8263 list_add_tail(&ri_rec->list, head);
8268 static void free_root_item_list(struct list_head *list)
8270 struct root_item_record *ri_rec;
8272 while (!list_empty(list)) {
8273 ri_rec = list_first_entry(list, struct root_item_record,
8275 list_del_init(&ri_rec->list);
8280 static int deal_root_from_list(struct list_head *list,
8281 struct btrfs_root *root,
8282 struct block_info *bits,
8284 struct cache_tree *pending,
8285 struct cache_tree *seen,
8286 struct cache_tree *reada,
8287 struct cache_tree *nodes,
8288 struct cache_tree *extent_cache,
8289 struct cache_tree *chunk_cache,
8290 struct rb_root *dev_cache,
8291 struct block_group_tree *block_group_cache,
8292 struct device_extent_tree *dev_extent_cache)
8297 while (!list_empty(list)) {
8298 struct root_item_record *rec;
8299 struct extent_buffer *buf;
8300 rec = list_entry(list->next,
8301 struct root_item_record, list);
8303 buf = read_tree_block(root->fs_info->tree_root,
8304 rec->bytenr, rec->level_size, 0);
8305 if (!extent_buffer_uptodate(buf)) {
8306 free_extent_buffer(buf);
8310 add_root_to_pending(buf, extent_cache, pending,
8311 seen, nodes, rec->objectid);
8313 * To rebuild extent tree, we need deal with snapshot
8314 * one by one, otherwise we deal with node firstly which
8315 * can maximize readahead.
8318 ret = run_next_block(root, bits, bits_nr, &last,
8319 pending, seen, reada, nodes,
8320 extent_cache, chunk_cache,
8321 dev_cache, block_group_cache,
8322 dev_extent_cache, rec);
8326 free_extent_buffer(buf);
8327 list_del(&rec->list);
8333 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8334 reada, nodes, extent_cache, chunk_cache,
8335 dev_cache, block_group_cache,
8336 dev_extent_cache, NULL);
8346 static int check_chunks_and_extents(struct btrfs_root *root)
8348 struct rb_root dev_cache;
8349 struct cache_tree chunk_cache;
8350 struct block_group_tree block_group_cache;
8351 struct device_extent_tree dev_extent_cache;
8352 struct cache_tree extent_cache;
8353 struct cache_tree seen;
8354 struct cache_tree pending;
8355 struct cache_tree reada;
8356 struct cache_tree nodes;
8357 struct extent_io_tree excluded_extents;
8358 struct cache_tree corrupt_blocks;
8359 struct btrfs_path path;
8360 struct btrfs_key key;
8361 struct btrfs_key found_key;
8363 struct block_info *bits;
8365 struct extent_buffer *leaf;
8367 struct btrfs_root_item ri;
8368 struct list_head dropping_trees;
8369 struct list_head normal_trees;
8370 struct btrfs_root *root1;
8375 dev_cache = RB_ROOT;
8376 cache_tree_init(&chunk_cache);
8377 block_group_tree_init(&block_group_cache);
8378 device_extent_tree_init(&dev_extent_cache);
8380 cache_tree_init(&extent_cache);
8381 cache_tree_init(&seen);
8382 cache_tree_init(&pending);
8383 cache_tree_init(&nodes);
8384 cache_tree_init(&reada);
8385 cache_tree_init(&corrupt_blocks);
8386 extent_io_tree_init(&excluded_extents);
8387 INIT_LIST_HEAD(&dropping_trees);
8388 INIT_LIST_HEAD(&normal_trees);
8391 root->fs_info->excluded_extents = &excluded_extents;
8392 root->fs_info->fsck_extent_cache = &extent_cache;
8393 root->fs_info->free_extent_hook = free_extent_hook;
8394 root->fs_info->corrupt_blocks = &corrupt_blocks;
8398 bits = malloc(bits_nr * sizeof(struct block_info));
8404 if (ctx.progress_enabled) {
8405 ctx.tp = TASK_EXTENTS;
8406 task_start(ctx.info);
8410 root1 = root->fs_info->tree_root;
8411 level = btrfs_header_level(root1->node);
8412 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8413 root1->node->start, 0, level, 0,
8414 root1->nodesize, NULL);
8417 root1 = root->fs_info->chunk_root;
8418 level = btrfs_header_level(root1->node);
8419 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8420 root1->node->start, 0, level, 0,
8421 root1->nodesize, NULL);
8424 btrfs_init_path(&path);
8427 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8428 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8433 leaf = path.nodes[0];
8434 slot = path.slots[0];
8435 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8436 ret = btrfs_next_leaf(root, &path);
8439 leaf = path.nodes[0];
8440 slot = path.slots[0];
8442 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8443 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8444 unsigned long offset;
8447 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8448 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8449 last_snapshot = btrfs_root_last_snapshot(&ri);
8450 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8451 level = btrfs_root_level(&ri);
8452 level_size = root->nodesize;
8453 ret = add_root_item_to_list(&normal_trees,
8455 btrfs_root_bytenr(&ri),
8456 last_snapshot, level,
8457 0, level_size, NULL);
8461 level = btrfs_root_level(&ri);
8462 level_size = root->nodesize;
8463 objectid = found_key.objectid;
8464 btrfs_disk_key_to_cpu(&found_key,
8466 ret = add_root_item_to_list(&dropping_trees,
8468 btrfs_root_bytenr(&ri),
8469 last_snapshot, level,
8471 level_size, &found_key);
8478 btrfs_release_path(&path);
8481 * check_block can return -EAGAIN if it fixes something, please keep
8482 * this in mind when dealing with return values from these functions, if
8483 * we get -EAGAIN we want to fall through and restart the loop.
8485 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8486 &seen, &reada, &nodes, &extent_cache,
8487 &chunk_cache, &dev_cache, &block_group_cache,
8494 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8495 &pending, &seen, &reada, &nodes,
8496 &extent_cache, &chunk_cache, &dev_cache,
8497 &block_group_cache, &dev_extent_cache);
8504 ret = check_chunks(&chunk_cache, &block_group_cache,
8505 &dev_extent_cache, NULL, NULL, NULL, 0);
8512 ret = check_extent_refs(root, &extent_cache);
8519 ret = check_devices(&dev_cache, &dev_extent_cache);
8524 task_stop(ctx.info);
8526 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8527 extent_io_tree_cleanup(&excluded_extents);
8528 root->fs_info->fsck_extent_cache = NULL;
8529 root->fs_info->free_extent_hook = NULL;
8530 root->fs_info->corrupt_blocks = NULL;
8531 root->fs_info->excluded_extents = NULL;
8534 free_chunk_cache_tree(&chunk_cache);
8535 free_device_cache_tree(&dev_cache);
8536 free_block_group_tree(&block_group_cache);
8537 free_device_extent_tree(&dev_extent_cache);
8538 free_extent_cache_tree(&seen);
8539 free_extent_cache_tree(&pending);
8540 free_extent_cache_tree(&reada);
8541 free_extent_cache_tree(&nodes);
8544 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8545 free_extent_cache_tree(&seen);
8546 free_extent_cache_tree(&pending);
8547 free_extent_cache_tree(&reada);
8548 free_extent_cache_tree(&nodes);
8549 free_chunk_cache_tree(&chunk_cache);
8550 free_block_group_tree(&block_group_cache);
8551 free_device_cache_tree(&dev_cache);
8552 free_device_extent_tree(&dev_extent_cache);
8553 free_extent_record_cache(root->fs_info, &extent_cache);
8554 free_root_item_list(&normal_trees);
8555 free_root_item_list(&dropping_trees);
8556 extent_io_tree_cleanup(&excluded_extents);
8561 * Check backrefs of a tree block given by @bytenr or @eb.
8563 * @root: the root containing the @bytenr or @eb
8564 * @eb: tree block extent buffer, can be NULL
8565 * @bytenr: bytenr of the tree block to search
8566 * @level: tree level of the tree block
8567 * @owner: owner of the tree block
8569 * Return >0 for any error found and output error message
8570 * Return 0 for no error found
8572 static int check_tree_block_ref(struct btrfs_root *root,
8573 struct extent_buffer *eb, u64 bytenr,
8574 int level, u64 owner)
8576 struct btrfs_key key;
8577 struct btrfs_root *extent_root = root->fs_info->extent_root;
8578 struct btrfs_path path;
8579 struct btrfs_extent_item *ei;
8580 struct btrfs_extent_inline_ref *iref;
8581 struct extent_buffer *leaf;
8587 u32 nodesize = root->nodesize;
8594 btrfs_init_path(&path);
8595 key.objectid = bytenr;
8596 if (btrfs_fs_incompat(root->fs_info,
8597 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8598 key.type = BTRFS_METADATA_ITEM_KEY;
8600 key.type = BTRFS_EXTENT_ITEM_KEY;
8601 key.offset = (u64)-1;
8603 /* Search for the backref in extent tree */
8604 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8606 err |= BACKREF_MISSING;
8609 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8611 err |= BACKREF_MISSING;
8615 leaf = path.nodes[0];
8616 slot = path.slots[0];
8617 btrfs_item_key_to_cpu(leaf, &key, slot);
8619 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8621 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8622 skinny_level = (int)key.offset;
8623 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8625 struct btrfs_tree_block_info *info;
8627 info = (struct btrfs_tree_block_info *)(ei + 1);
8628 skinny_level = btrfs_tree_block_level(leaf, info);
8629 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8636 if (!(btrfs_extent_flags(leaf, ei) &
8637 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8639 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8640 key.objectid, nodesize,
8641 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8642 err = BACKREF_MISMATCH;
8644 header_gen = btrfs_header_generation(eb);
8645 extent_gen = btrfs_extent_generation(leaf, ei);
8646 if (header_gen != extent_gen) {
8648 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8649 key.objectid, nodesize, header_gen,
8651 err = BACKREF_MISMATCH;
8653 if (level != skinny_level) {
8655 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8656 key.objectid, nodesize, level, skinny_level);
8657 err = BACKREF_MISMATCH;
8659 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8661 "extent[%llu %u] is referred by other roots than %llu",
8662 key.objectid, nodesize, root->objectid);
8663 err = BACKREF_MISMATCH;
8668 * Iterate the extent/metadata item to find the exact backref
8670 item_size = btrfs_item_size_nr(leaf, slot);
8671 ptr = (unsigned long)iref;
8672 end = (unsigned long)ei + item_size;
8674 iref = (struct btrfs_extent_inline_ref *)ptr;
8675 type = btrfs_extent_inline_ref_type(leaf, iref);
8676 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8678 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8679 (offset == root->objectid || offset == owner)) {
8681 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8682 /* Check if the backref points to valid referencer */
8683 found_ref = !check_tree_block_ref(root, NULL, offset,
8689 ptr += btrfs_extent_inline_ref_size(type);
8693 * Inlined extent item doesn't have what we need, check
8694 * TREE_BLOCK_REF_KEY
8697 btrfs_release_path(&path);
8698 key.objectid = bytenr;
8699 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8700 key.offset = root->objectid;
8702 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8707 err |= BACKREF_MISSING;
8709 btrfs_release_path(&path);
8710 if (eb && (err & BACKREF_MISSING))
8711 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8712 bytenr, nodesize, owner, level);
8717 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8719 * Return >0 any error found and output error message
8720 * Return 0 for no error found
8722 static int check_extent_data_item(struct btrfs_root *root,
8723 struct extent_buffer *eb, int slot)
8725 struct btrfs_file_extent_item *fi;
8726 struct btrfs_path path;
8727 struct btrfs_root *extent_root = root->fs_info->extent_root;
8728 struct btrfs_key fi_key;
8729 struct btrfs_key dbref_key;
8730 struct extent_buffer *leaf;
8731 struct btrfs_extent_item *ei;
8732 struct btrfs_extent_inline_ref *iref;
8733 struct btrfs_extent_data_ref *dref;
8735 u64 file_extent_gen;
8738 u64 extent_num_bytes;
8746 int found_dbackref = 0;
8750 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8751 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8752 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8754 /* Nothing to check for hole and inline data extents */
8755 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8756 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8759 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8760 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8761 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8763 /* Check unaligned disk_num_bytes and num_bytes */
8764 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8766 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8767 fi_key.objectid, fi_key.offset, disk_num_bytes,
8769 err |= BYTES_UNALIGNED;
8771 data_bytes_allocated += disk_num_bytes;
8773 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8775 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8776 fi_key.objectid, fi_key.offset, extent_num_bytes,
8778 err |= BYTES_UNALIGNED;
8780 data_bytes_referenced += extent_num_bytes;
8782 owner = btrfs_header_owner(eb);
8784 /* Check the extent item of the file extent in extent tree */
8785 btrfs_init_path(&path);
8786 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8787 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8788 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8790 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8792 err |= BACKREF_MISSING;
8796 leaf = path.nodes[0];
8797 slot = path.slots[0];
8798 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8800 extent_flags = btrfs_extent_flags(leaf, ei);
8801 extent_gen = btrfs_extent_generation(leaf, ei);
8803 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8805 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8806 disk_bytenr, disk_num_bytes,
8807 BTRFS_EXTENT_FLAG_DATA);
8808 err |= BACKREF_MISMATCH;
8811 if (file_extent_gen < extent_gen) {
8813 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8814 disk_bytenr, disk_num_bytes, file_extent_gen,
8816 err |= BACKREF_MISMATCH;
8819 /* Check data backref inside that extent item */
8820 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8821 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8822 ptr = (unsigned long)iref;
8823 end = (unsigned long)ei + item_size;
8825 iref = (struct btrfs_extent_inline_ref *)ptr;
8826 type = btrfs_extent_inline_ref_type(leaf, iref);
8827 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8829 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8830 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8831 if (ref_root == owner || ref_root == root->objectid)
8833 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8834 found_dbackref = !check_tree_block_ref(root, NULL,
8835 btrfs_extent_inline_ref_offset(leaf, iref),
8841 ptr += btrfs_extent_inline_ref_size(type);
8844 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8845 if (!found_dbackref) {
8846 btrfs_release_path(&path);
8848 btrfs_init_path(&path);
8849 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8850 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8851 dbref_key.offset = hash_extent_data_ref(root->objectid,
8852 fi_key.objectid, fi_key.offset);
8854 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8855 &dbref_key, &path, 0, 0);
8860 if (!found_dbackref)
8861 err |= BACKREF_MISSING;
8863 btrfs_release_path(&path);
8864 if (err & BACKREF_MISSING) {
8865 error("data extent[%llu %llu] backref lost",
8866 disk_bytenr, disk_num_bytes);
8872 * Get real tree block level for the case like shared block
8873 * Return >= 0 as tree level
8874 * Return <0 for error
8876 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8878 struct extent_buffer *eb;
8879 struct btrfs_path path;
8880 struct btrfs_key key;
8881 struct btrfs_extent_item *ei;
8884 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8889 /* Search extent tree for extent generation and level */
8890 key.objectid = bytenr;
8891 key.type = BTRFS_METADATA_ITEM_KEY;
8892 key.offset = (u64)-1;
8894 btrfs_init_path(&path);
8895 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8898 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8906 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8907 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8908 struct btrfs_extent_item);
8909 flags = btrfs_extent_flags(path.nodes[0], ei);
8910 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8915 /* Get transid for later read_tree_block() check */
8916 transid = btrfs_extent_generation(path.nodes[0], ei);
8918 /* Get backref level as one source */
8919 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8920 backref_level = key.offset;
8922 struct btrfs_tree_block_info *info;
8924 info = (struct btrfs_tree_block_info *)(ei + 1);
8925 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8927 btrfs_release_path(&path);
8929 /* Get level from tree block as an alternative source */
8930 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8931 if (!extent_buffer_uptodate(eb)) {
8932 free_extent_buffer(eb);
8935 header_level = btrfs_header_level(eb);
8936 free_extent_buffer(eb);
8938 if (header_level != backref_level)
8940 return header_level;
8943 btrfs_release_path(&path);
8948 * Check if a tree block backref is valid (points to a valid tree block)
8949 * if level == -1, level will be resolved
8950 * Return >0 for any error found and print error message
8952 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8953 u64 bytenr, int level)
8955 struct btrfs_root *root;
8956 struct btrfs_key key;
8957 struct btrfs_path path;
8958 struct extent_buffer *eb;
8959 struct extent_buffer *node;
8960 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8964 /* Query level for level == -1 special case */
8966 level = query_tree_block_level(fs_info, bytenr);
8968 err |= REFERENCER_MISSING;
8972 key.objectid = root_id;
8973 key.type = BTRFS_ROOT_ITEM_KEY;
8974 key.offset = (u64)-1;
8976 root = btrfs_read_fs_root(fs_info, &key);
8978 err |= REFERENCER_MISSING;
8982 /* Read out the tree block to get item/node key */
8983 eb = read_tree_block(root, bytenr, root->nodesize, 0);
8984 if (!extent_buffer_uptodate(eb)) {
8985 err |= REFERENCER_MISSING;
8986 free_extent_buffer(eb);
8990 /* Empty tree, no need to check key */
8991 if (!btrfs_header_nritems(eb) && !level) {
8992 free_extent_buffer(eb);
8997 btrfs_node_key_to_cpu(eb, &key, 0);
8999 btrfs_item_key_to_cpu(eb, &key, 0);
9001 free_extent_buffer(eb);
9003 btrfs_init_path(&path);
9004 /* Search with the first key, to ensure we can reach it */
9005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9007 err |= REFERENCER_MISSING;
9011 node = path.nodes[level];
9012 if (btrfs_header_bytenr(node) != bytenr) {
9014 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9015 bytenr, nodesize, bytenr,
9016 btrfs_header_bytenr(node));
9017 err |= REFERENCER_MISMATCH;
9019 if (btrfs_header_level(node) != level) {
9021 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9022 bytenr, nodesize, level,
9023 btrfs_header_level(node));
9024 err |= REFERENCER_MISMATCH;
9028 btrfs_release_path(&path);
9030 if (err & REFERENCER_MISSING) {
9032 error("extent [%llu %d] lost referencer (owner: %llu)",
9033 bytenr, nodesize, root_id);
9036 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9037 bytenr, nodesize, root_id, level);
9044 * Check referencer for shared block backref
9045 * If level == -1, this function will resolve the level.
9047 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9048 u64 parent, u64 bytenr, int level)
9050 struct extent_buffer *eb;
9051 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9053 int found_parent = 0;
9056 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9057 if (!extent_buffer_uptodate(eb))
9061 level = query_tree_block_level(fs_info, bytenr);
9065 if (level + 1 != btrfs_header_level(eb))
9068 nr = btrfs_header_nritems(eb);
9069 for (i = 0; i < nr; i++) {
9070 if (bytenr == btrfs_node_blockptr(eb, i)) {
9076 free_extent_buffer(eb);
9077 if (!found_parent) {
9079 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9080 bytenr, nodesize, parent, level);
9081 return REFERENCER_MISSING;
9087 * Check referencer for normal (inlined) data ref
9088 * If len == 0, it will be resolved by searching in extent tree
9090 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9091 u64 root_id, u64 objectid, u64 offset,
9092 u64 bytenr, u64 len, u32 count)
9094 struct btrfs_root *root;
9095 struct btrfs_root *extent_root = fs_info->extent_root;
9096 struct btrfs_key key;
9097 struct btrfs_path path;
9098 struct extent_buffer *leaf;
9099 struct btrfs_file_extent_item *fi;
9100 u32 found_count = 0;
9105 key.objectid = bytenr;
9106 key.type = BTRFS_EXTENT_ITEM_KEY;
9107 key.offset = (u64)-1;
9109 btrfs_init_path(&path);
9110 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9113 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9116 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9117 if (key.objectid != bytenr ||
9118 key.type != BTRFS_EXTENT_ITEM_KEY)
9121 btrfs_release_path(&path);
9123 key.objectid = root_id;
9124 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9125 key.offset = (u64)-1;
9126 btrfs_init_path(&path);
9128 root = btrfs_read_fs_root(fs_info, &key);
9132 key.objectid = objectid;
9133 key.type = BTRFS_EXTENT_DATA_KEY;
9135 * It can be nasty as data backref offset is
9136 * file offset - file extent offset, which is smaller or
9137 * equal to original backref offset. The only special case is
9138 * overflow. So we need to special check and do further search.
9140 key.offset = offset & (1ULL << 63) ? 0 : offset;
9142 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9147 * Search afterwards to get correct one
9148 * NOTE: As we must do a comprehensive check on the data backref to
9149 * make sure the dref count also matches, we must iterate all file
9150 * extents for that inode.
9153 leaf = path.nodes[0];
9154 slot = path.slots[0];
9156 btrfs_item_key_to_cpu(leaf, &key, slot);
9157 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9159 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9161 * Except normal disk bytenr and disk num bytes, we still
9162 * need to do extra check on dbackref offset as
9163 * dbackref offset = file_offset - file_extent_offset
9165 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9166 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9167 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9171 ret = btrfs_next_item(root, &path);
9176 btrfs_release_path(&path);
9177 if (found_count != count) {
9179 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9180 bytenr, len, root_id, objectid, offset, count, found_count);
9181 return REFERENCER_MISSING;
9187 * Check if the referencer of a shared data backref exists
9189 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9190 u64 parent, u64 bytenr)
9192 struct extent_buffer *eb;
9193 struct btrfs_key key;
9194 struct btrfs_file_extent_item *fi;
9195 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9197 int found_parent = 0;
9200 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9201 if (!extent_buffer_uptodate(eb))
9204 nr = btrfs_header_nritems(eb);
9205 for (i = 0; i < nr; i++) {
9206 btrfs_item_key_to_cpu(eb, &key, i);
9207 if (key.type != BTRFS_EXTENT_DATA_KEY)
9210 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9211 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9214 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9221 free_extent_buffer(eb);
9222 if (!found_parent) {
9223 error("shared extent %llu referencer lost (parent: %llu)",
9225 return REFERENCER_MISSING;
9231 * This function will check a given extent item, including its backref and
9232 * itself (like crossing stripe boundary and type)
9234 * Since we don't use extent_record anymore, introduce new error bit
9236 static int check_extent_item(struct btrfs_fs_info *fs_info,
9237 struct extent_buffer *eb, int slot)
9239 struct btrfs_extent_item *ei;
9240 struct btrfs_extent_inline_ref *iref;
9241 struct btrfs_extent_data_ref *dref;
9245 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9246 u32 item_size = btrfs_item_size_nr(eb, slot);
9251 struct btrfs_key key;
9255 btrfs_item_key_to_cpu(eb, &key, slot);
9256 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9257 bytes_used += key.offset;
9259 bytes_used += nodesize;
9261 if (item_size < sizeof(*ei)) {
9263 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9264 * old thing when on disk format is still un-determined.
9265 * No need to care about it anymore
9267 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9271 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9272 flags = btrfs_extent_flags(eb, ei);
9274 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9276 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9277 error("bad metadata [%llu, %llu) crossing stripe boundary",
9278 key.objectid, key.objectid + nodesize);
9279 err |= CROSSING_STRIPE_BOUNDARY;
9282 ptr = (unsigned long)(ei + 1);
9284 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9285 /* Old EXTENT_ITEM metadata */
9286 struct btrfs_tree_block_info *info;
9288 info = (struct btrfs_tree_block_info *)ptr;
9289 level = btrfs_tree_block_level(eb, info);
9290 ptr += sizeof(struct btrfs_tree_block_info);
9292 /* New METADATA_ITEM */
9295 end = (unsigned long)ei + item_size;
9298 err |= ITEM_SIZE_MISMATCH;
9302 /* Now check every backref in this extent item */
9304 iref = (struct btrfs_extent_inline_ref *)ptr;
9305 type = btrfs_extent_inline_ref_type(eb, iref);
9306 offset = btrfs_extent_inline_ref_offset(eb, iref);
9308 case BTRFS_TREE_BLOCK_REF_KEY:
9309 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9313 case BTRFS_SHARED_BLOCK_REF_KEY:
9314 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9318 case BTRFS_EXTENT_DATA_REF_KEY:
9319 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9320 ret = check_extent_data_backref(fs_info,
9321 btrfs_extent_data_ref_root(eb, dref),
9322 btrfs_extent_data_ref_objectid(eb, dref),
9323 btrfs_extent_data_ref_offset(eb, dref),
9324 key.objectid, key.offset,
9325 btrfs_extent_data_ref_count(eb, dref));
9328 case BTRFS_SHARED_DATA_REF_KEY:
9329 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9333 error("extent[%llu %d %llu] has unknown ref type: %d",
9334 key.objectid, key.type, key.offset, type);
9335 err |= UNKNOWN_TYPE;
9339 ptr += btrfs_extent_inline_ref_size(type);
9348 * Check if a dev extent item is referred correctly by its chunk
9350 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9351 struct extent_buffer *eb, int slot)
9353 struct btrfs_root *chunk_root = fs_info->chunk_root;
9354 struct btrfs_dev_extent *ptr;
9355 struct btrfs_path path;
9356 struct btrfs_key chunk_key;
9357 struct btrfs_key devext_key;
9358 struct btrfs_chunk *chunk;
9359 struct extent_buffer *l;
9363 int found_chunk = 0;
9366 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9367 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9368 length = btrfs_dev_extent_length(eb, ptr);
9370 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9371 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9372 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9374 btrfs_init_path(&path);
9375 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9380 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9381 if (btrfs_chunk_length(l, chunk) != length)
9384 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9385 for (i = 0; i < num_stripes; i++) {
9386 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9387 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9389 if (devid == devext_key.objectid &&
9390 offset == devext_key.offset) {
9396 btrfs_release_path(&path);
9399 "device extent[%llu, %llu, %llu] did not find the related chunk",
9400 devext_key.objectid, devext_key.offset, length);
9401 return REFERENCER_MISSING;
9407 * Check if the used space is correct with the dev item
9409 static int check_dev_item(struct btrfs_fs_info *fs_info,
9410 struct extent_buffer *eb, int slot)
9412 struct btrfs_root *dev_root = fs_info->dev_root;
9413 struct btrfs_dev_item *dev_item;
9414 struct btrfs_path path;
9415 struct btrfs_key key;
9416 struct btrfs_dev_extent *ptr;
9422 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9423 dev_id = btrfs_device_id(eb, dev_item);
9424 used = btrfs_device_bytes_used(eb, dev_item);
9426 key.objectid = dev_id;
9427 key.type = BTRFS_DEV_EXTENT_KEY;
9430 btrfs_init_path(&path);
9431 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9433 btrfs_item_key_to_cpu(eb, &key, slot);
9434 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9435 key.objectid, key.type, key.offset);
9436 btrfs_release_path(&path);
9437 return REFERENCER_MISSING;
9440 /* Iterate dev_extents to calculate the used space of a device */
9442 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9444 if (key.objectid > dev_id)
9446 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9449 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9450 struct btrfs_dev_extent);
9451 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9453 ret = btrfs_next_item(dev_root, &path);
9457 btrfs_release_path(&path);
9459 if (used != total) {
9460 btrfs_item_key_to_cpu(eb, &key, slot);
9462 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9463 total, used, BTRFS_ROOT_TREE_OBJECTID,
9464 BTRFS_DEV_EXTENT_KEY, dev_id);
9465 return ACCOUNTING_MISMATCH;
9471 * Check a block group item with its referener (chunk) and its used space
9472 * with extent/metadata item
9474 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9475 struct extent_buffer *eb, int slot)
9477 struct btrfs_root *extent_root = fs_info->extent_root;
9478 struct btrfs_root *chunk_root = fs_info->chunk_root;
9479 struct btrfs_block_group_item *bi;
9480 struct btrfs_block_group_item bg_item;
9481 struct btrfs_path path;
9482 struct btrfs_key bg_key;
9483 struct btrfs_key chunk_key;
9484 struct btrfs_key extent_key;
9485 struct btrfs_chunk *chunk;
9486 struct extent_buffer *leaf;
9487 struct btrfs_extent_item *ei;
9488 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9496 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9497 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9498 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9499 used = btrfs_block_group_used(&bg_item);
9500 bg_flags = btrfs_block_group_flags(&bg_item);
9502 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9503 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9504 chunk_key.offset = bg_key.objectid;
9506 btrfs_init_path(&path);
9507 /* Search for the referencer chunk */
9508 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9511 "block group[%llu %llu] did not find the related chunk item",
9512 bg_key.objectid, bg_key.offset);
9513 err |= REFERENCER_MISSING;
9515 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9516 struct btrfs_chunk);
9517 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9520 "block group[%llu %llu] related chunk item length does not match",
9521 bg_key.objectid, bg_key.offset);
9522 err |= REFERENCER_MISMATCH;
9525 btrfs_release_path(&path);
9527 /* Search from the block group bytenr */
9528 extent_key.objectid = bg_key.objectid;
9529 extent_key.type = 0;
9530 extent_key.offset = 0;
9532 btrfs_init_path(&path);
9533 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9537 /* Iterate extent tree to account used space */
9539 leaf = path.nodes[0];
9540 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9541 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9544 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9545 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9547 if (extent_key.objectid < bg_key.objectid)
9550 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9553 total += extent_key.offset;
9555 ei = btrfs_item_ptr(leaf, path.slots[0],
9556 struct btrfs_extent_item);
9557 flags = btrfs_extent_flags(leaf, ei);
9558 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9559 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9561 "bad extent[%llu, %llu) type mismatch with chunk",
9562 extent_key.objectid,
9563 extent_key.objectid + extent_key.offset);
9564 err |= CHUNK_TYPE_MISMATCH;
9566 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9567 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9568 BTRFS_BLOCK_GROUP_METADATA))) {
9570 "bad extent[%llu, %llu) type mismatch with chunk",
9571 extent_key.objectid,
9572 extent_key.objectid + nodesize);
9573 err |= CHUNK_TYPE_MISMATCH;
9577 ret = btrfs_next_item(extent_root, &path);
9583 btrfs_release_path(&path);
9585 if (total != used) {
9587 "block group[%llu %llu] used %llu but extent items used %llu",
9588 bg_key.objectid, bg_key.offset, used, total);
9589 err |= ACCOUNTING_MISMATCH;
9595 * Check a chunk item.
9596 * Including checking all referred dev_extents and block group
9598 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9599 struct extent_buffer *eb, int slot)
9601 struct btrfs_root *extent_root = fs_info->extent_root;
9602 struct btrfs_root *dev_root = fs_info->dev_root;
9603 struct btrfs_path path;
9604 struct btrfs_key chunk_key;
9605 struct btrfs_key bg_key;
9606 struct btrfs_key devext_key;
9607 struct btrfs_chunk *chunk;
9608 struct extent_buffer *leaf;
9609 struct btrfs_block_group_item *bi;
9610 struct btrfs_block_group_item bg_item;
9611 struct btrfs_dev_extent *ptr;
9612 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9624 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9625 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9626 length = btrfs_chunk_length(eb, chunk);
9627 chunk_end = chunk_key.offset + length;
9628 if (!IS_ALIGNED(length, sectorsize)) {
9629 error("chunk[%llu %llu) not aligned to %u",
9630 chunk_key.offset, chunk_end, sectorsize);
9631 err |= BYTES_UNALIGNED;
9635 type = btrfs_chunk_type(eb, chunk);
9636 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9637 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9638 error("chunk[%llu %llu) has no chunk type",
9639 chunk_key.offset, chunk_end);
9640 err |= UNKNOWN_TYPE;
9642 if (profile && (profile & (profile - 1))) {
9643 error("chunk[%llu %llu) multiple profiles detected: %llx",
9644 chunk_key.offset, chunk_end, profile);
9645 err |= UNKNOWN_TYPE;
9648 bg_key.objectid = chunk_key.offset;
9649 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9650 bg_key.offset = length;
9652 btrfs_init_path(&path);
9653 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9656 "chunk[%llu %llu) did not find the related block group item",
9657 chunk_key.offset, chunk_end);
9658 err |= REFERENCER_MISSING;
9660 leaf = path.nodes[0];
9661 bi = btrfs_item_ptr(leaf, path.slots[0],
9662 struct btrfs_block_group_item);
9663 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9665 if (btrfs_block_group_flags(&bg_item) != type) {
9667 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9668 chunk_key.offset, chunk_end, type,
9669 btrfs_block_group_flags(&bg_item));
9670 err |= REFERENCER_MISSING;
9674 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9675 for (i = 0; i < num_stripes; i++) {
9676 btrfs_release_path(&path);
9677 btrfs_init_path(&path);
9678 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9679 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9680 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9682 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9687 leaf = path.nodes[0];
9688 ptr = btrfs_item_ptr(leaf, path.slots[0],
9689 struct btrfs_dev_extent);
9690 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9691 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9692 if (objectid != chunk_key.objectid ||
9693 offset != chunk_key.offset ||
9694 btrfs_dev_extent_length(leaf, ptr) != length)
9698 err |= BACKREF_MISSING;
9700 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9701 chunk_key.objectid, chunk_end, i);
9704 btrfs_release_path(&path);
9710 * Main entry function to check known items and update related accounting info
9712 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9714 struct btrfs_fs_info *fs_info = root->fs_info;
9715 struct btrfs_key key;
9718 struct btrfs_extent_data_ref *dref;
9723 btrfs_item_key_to_cpu(eb, &key, slot);
9724 type = btrfs_key_type(&key);
9727 case BTRFS_EXTENT_DATA_KEY:
9728 ret = check_extent_data_item(root, eb, slot);
9731 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9732 ret = check_block_group_item(fs_info, eb, slot);
9735 case BTRFS_DEV_ITEM_KEY:
9736 ret = check_dev_item(fs_info, eb, slot);
9739 case BTRFS_CHUNK_ITEM_KEY:
9740 ret = check_chunk_item(fs_info, eb, slot);
9743 case BTRFS_DEV_EXTENT_KEY:
9744 ret = check_dev_extent_item(fs_info, eb, slot);
9747 case BTRFS_EXTENT_ITEM_KEY:
9748 case BTRFS_METADATA_ITEM_KEY:
9749 ret = check_extent_item(fs_info, eb, slot);
9752 case BTRFS_EXTENT_CSUM_KEY:
9753 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9755 case BTRFS_TREE_BLOCK_REF_KEY:
9756 ret = check_tree_block_backref(fs_info, key.offset,
9760 case BTRFS_EXTENT_DATA_REF_KEY:
9761 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9762 ret = check_extent_data_backref(fs_info,
9763 btrfs_extent_data_ref_root(eb, dref),
9764 btrfs_extent_data_ref_objectid(eb, dref),
9765 btrfs_extent_data_ref_offset(eb, dref),
9767 btrfs_extent_data_ref_count(eb, dref));
9770 case BTRFS_SHARED_BLOCK_REF_KEY:
9771 ret = check_shared_block_backref(fs_info, key.offset,
9775 case BTRFS_SHARED_DATA_REF_KEY:
9776 ret = check_shared_data_backref(fs_info, key.offset,
9784 if (++slot < btrfs_header_nritems(eb))
9791 * Helper function for later fs/subvol tree check. To determine if a tree
9792 * block should be checked.
9793 * This function will ensure only the direct referencer with lowest rootid to
9794 * check a fs/subvolume tree block.
9796 * Backref check at extent tree would detect errors like missing subvolume
9797 * tree, so we can do aggressive check to reduce duplicated checks.
9799 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9801 struct btrfs_root *extent_root = root->fs_info->extent_root;
9802 struct btrfs_key key;
9803 struct btrfs_path path;
9804 struct extent_buffer *leaf;
9806 struct btrfs_extent_item *ei;
9812 struct btrfs_extent_inline_ref *iref;
9815 btrfs_init_path(&path);
9816 key.objectid = btrfs_header_bytenr(eb);
9817 key.type = BTRFS_METADATA_ITEM_KEY;
9818 key.offset = (u64)-1;
9821 * Any failure in backref resolving means we can't determine
9822 * whom the tree block belongs to.
9823 * So in that case, we need to check that tree block
9825 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9829 ret = btrfs_previous_extent_item(extent_root, &path,
9830 btrfs_header_bytenr(eb));
9834 leaf = path.nodes[0];
9835 slot = path.slots[0];
9836 btrfs_item_key_to_cpu(leaf, &key, slot);
9837 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9839 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9840 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9842 struct btrfs_tree_block_info *info;
9844 info = (struct btrfs_tree_block_info *)(ei + 1);
9845 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9848 item_size = btrfs_item_size_nr(leaf, slot);
9849 ptr = (unsigned long)iref;
9850 end = (unsigned long)ei + item_size;
9852 iref = (struct btrfs_extent_inline_ref *)ptr;
9853 type = btrfs_extent_inline_ref_type(leaf, iref);
9854 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9857 * We only check the tree block if current root is
9858 * the lowest referencer of it.
9860 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9861 offset < root->objectid) {
9862 btrfs_release_path(&path);
9866 ptr += btrfs_extent_inline_ref_size(type);
9869 * Normally we should also check keyed tree block ref, but that may be
9870 * very time consuming. Inlined ref should already make us skip a lot
9871 * of refs now. So skip search keyed tree block ref.
9875 btrfs_release_path(&path);
9880 * Traversal function for tree block. We will do:
9881 * 1) Skip shared fs/subvolume tree blocks
9882 * 2) Update related bytes accounting
9883 * 3) Pre-order traversal
9885 static int traverse_tree_block(struct btrfs_root *root,
9886 struct extent_buffer *node)
9888 struct extent_buffer *eb;
9896 * Skip shared fs/subvolume tree block, in that case they will
9897 * be checked by referencer with lowest rootid
9899 if (is_fstree(root->objectid) && !should_check(root, node))
9902 /* Update bytes accounting */
9903 total_btree_bytes += node->len;
9904 if (fs_root_objectid(btrfs_header_owner(node)))
9905 total_fs_tree_bytes += node->len;
9906 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9907 total_extent_tree_bytes += node->len;
9908 if (!found_old_backref &&
9909 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9910 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9911 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9912 found_old_backref = 1;
9914 /* pre-order tranversal, check itself first */
9915 level = btrfs_header_level(node);
9916 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9917 btrfs_header_level(node),
9918 btrfs_header_owner(node));
9922 "check %s failed root %llu bytenr %llu level %d, force continue check",
9923 level ? "node":"leaf", root->objectid,
9924 btrfs_header_bytenr(node), btrfs_header_level(node));
9927 btree_space_waste += btrfs_leaf_free_space(root, node);
9928 ret = check_leaf_items(root, node);
9933 nr = btrfs_header_nritems(node);
9934 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
9935 sizeof(struct btrfs_key_ptr);
9937 /* Then check all its children */
9938 for (i = 0; i < nr; i++) {
9939 u64 blocknr = btrfs_node_blockptr(node, i);
9942 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
9943 * to call the function itself.
9945 eb = read_tree_block(root, blocknr, root->nodesize, 0);
9946 if (extent_buffer_uptodate(eb)) {
9947 ret = traverse_tree_block(root, eb);
9950 free_extent_buffer(eb);
9957 * Low memory usage version check_chunks_and_extents.
9959 static int check_chunks_and_extents_v2(struct btrfs_root *root)
9961 struct btrfs_path path;
9962 struct btrfs_key key;
9963 struct btrfs_root *root1;
9964 struct btrfs_root *cur_root;
9968 root1 = root->fs_info->chunk_root;
9969 ret = traverse_tree_block(root1, root1->node);
9972 root1 = root->fs_info->tree_root;
9973 ret = traverse_tree_block(root1, root1->node);
9976 btrfs_init_path(&path);
9977 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
9979 key.type = BTRFS_ROOT_ITEM_KEY;
9981 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
9983 error("cannot find extent treet in tree_root");
9988 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9989 if (key.type != BTRFS_ROOT_ITEM_KEY)
9991 key.offset = (u64)-1;
9993 cur_root = btrfs_read_fs_root(root->fs_info, &key);
9994 if (IS_ERR(cur_root) || !cur_root) {
9995 error("failed to read tree: %lld", key.objectid);
9999 ret = traverse_tree_block(cur_root, cur_root->node);
10003 ret = btrfs_next_item(root1, &path);
10009 btrfs_release_path(&path);
10013 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10014 struct btrfs_root *root, int overwrite)
10016 struct extent_buffer *c;
10017 struct extent_buffer *old = root->node;
10020 struct btrfs_disk_key disk_key = {0,0,0};
10026 extent_buffer_get(c);
10029 c = btrfs_alloc_free_block(trans, root,
10031 root->root_key.objectid,
10032 &disk_key, level, 0, 0);
10035 extent_buffer_get(c);
10039 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10040 btrfs_set_header_level(c, level);
10041 btrfs_set_header_bytenr(c, c->start);
10042 btrfs_set_header_generation(c, trans->transid);
10043 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10044 btrfs_set_header_owner(c, root->root_key.objectid);
10046 write_extent_buffer(c, root->fs_info->fsid,
10047 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10049 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10050 btrfs_header_chunk_tree_uuid(c),
10053 btrfs_mark_buffer_dirty(c);
10055 * this case can happen in the following case:
10057 * 1.overwrite previous root.
10059 * 2.reinit reloc data root, this is because we skip pin
10060 * down reloc data tree before which means we can allocate
10061 * same block bytenr here.
10063 if (old->start == c->start) {
10064 btrfs_set_root_generation(&root->root_item,
10066 root->root_item.level = btrfs_header_level(root->node);
10067 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10068 &root->root_key, &root->root_item);
10070 free_extent_buffer(c);
10074 free_extent_buffer(old);
10076 add_root_to_dirty_list(root);
10080 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10081 struct extent_buffer *eb, int tree_root)
10083 struct extent_buffer *tmp;
10084 struct btrfs_root_item *ri;
10085 struct btrfs_key key;
10088 int level = btrfs_header_level(eb);
10094 * If we have pinned this block before, don't pin it again.
10095 * This can not only avoid forever loop with broken filesystem
10096 * but also give us some speedups.
10098 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10099 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10102 btrfs_pin_extent(fs_info, eb->start, eb->len);
10104 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10105 nritems = btrfs_header_nritems(eb);
10106 for (i = 0; i < nritems; i++) {
10108 btrfs_item_key_to_cpu(eb, &key, i);
10109 if (key.type != BTRFS_ROOT_ITEM_KEY)
10111 /* Skip the extent root and reloc roots */
10112 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10113 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10114 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10116 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10117 bytenr = btrfs_disk_root_bytenr(eb, ri);
10120 * If at any point we start needing the real root we
10121 * will have to build a stump root for the root we are
10122 * in, but for now this doesn't actually use the root so
10123 * just pass in extent_root.
10125 tmp = read_tree_block(fs_info->extent_root, bytenr,
10127 if (!extent_buffer_uptodate(tmp)) {
10128 fprintf(stderr, "Error reading root block\n");
10131 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10132 free_extent_buffer(tmp);
10136 bytenr = btrfs_node_blockptr(eb, i);
10138 /* If we aren't the tree root don't read the block */
10139 if (level == 1 && !tree_root) {
10140 btrfs_pin_extent(fs_info, bytenr, nodesize);
10144 tmp = read_tree_block(fs_info->extent_root, bytenr,
10146 if (!extent_buffer_uptodate(tmp)) {
10147 fprintf(stderr, "Error reading tree block\n");
10150 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10151 free_extent_buffer(tmp);
10160 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10164 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10168 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10171 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10173 struct btrfs_block_group_cache *cache;
10174 struct btrfs_path *path;
10175 struct extent_buffer *leaf;
10176 struct btrfs_chunk *chunk;
10177 struct btrfs_key key;
10181 path = btrfs_alloc_path();
10186 key.type = BTRFS_CHUNK_ITEM_KEY;
10189 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10191 btrfs_free_path(path);
10196 * We do this in case the block groups were screwed up and had alloc
10197 * bits that aren't actually set on the chunks. This happens with
10198 * restored images every time and could happen in real life I guess.
10200 fs_info->avail_data_alloc_bits = 0;
10201 fs_info->avail_metadata_alloc_bits = 0;
10202 fs_info->avail_system_alloc_bits = 0;
10204 /* First we need to create the in-memory block groups */
10206 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10207 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10209 btrfs_free_path(path);
10217 leaf = path->nodes[0];
10218 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10219 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10224 chunk = btrfs_item_ptr(leaf, path->slots[0],
10225 struct btrfs_chunk);
10226 btrfs_add_block_group(fs_info, 0,
10227 btrfs_chunk_type(leaf, chunk),
10228 key.objectid, key.offset,
10229 btrfs_chunk_length(leaf, chunk));
10230 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10231 key.offset + btrfs_chunk_length(leaf, chunk),
10237 cache = btrfs_lookup_first_block_group(fs_info, start);
10241 start = cache->key.objectid + cache->key.offset;
10244 btrfs_free_path(path);
10248 static int reset_balance(struct btrfs_trans_handle *trans,
10249 struct btrfs_fs_info *fs_info)
10251 struct btrfs_root *root = fs_info->tree_root;
10252 struct btrfs_path *path;
10253 struct extent_buffer *leaf;
10254 struct btrfs_key key;
10255 int del_slot, del_nr = 0;
10259 path = btrfs_alloc_path();
10263 key.objectid = BTRFS_BALANCE_OBJECTID;
10264 key.type = BTRFS_BALANCE_ITEM_KEY;
10267 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10272 goto reinit_data_reloc;
10277 ret = btrfs_del_item(trans, root, path);
10280 btrfs_release_path(path);
10282 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10283 key.type = BTRFS_ROOT_ITEM_KEY;
10286 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10290 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10295 ret = btrfs_del_items(trans, root, path,
10302 btrfs_release_path(path);
10305 ret = btrfs_search_slot(trans, root, &key, path,
10312 leaf = path->nodes[0];
10313 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10314 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10316 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10321 del_slot = path->slots[0];
10330 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10334 btrfs_release_path(path);
10337 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10338 key.type = BTRFS_ROOT_ITEM_KEY;
10339 key.offset = (u64)-1;
10340 root = btrfs_read_fs_root(fs_info, &key);
10341 if (IS_ERR(root)) {
10342 fprintf(stderr, "Error reading data reloc tree\n");
10343 ret = PTR_ERR(root);
10346 record_root_in_trans(trans, root);
10347 ret = btrfs_fsck_reinit_root(trans, root, 0);
10350 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10352 btrfs_free_path(path);
10356 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10357 struct btrfs_fs_info *fs_info)
10363 * The only reason we don't do this is because right now we're just
10364 * walking the trees we find and pinning down their bytes, we don't look
10365 * at any of the leaves. In order to do mixed groups we'd have to check
10366 * the leaves of any fs roots and pin down the bytes for any file
10367 * extents we find. Not hard but why do it if we don't have to?
10369 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10370 fprintf(stderr, "We don't support re-initing the extent tree "
10371 "for mixed block groups yet, please notify a btrfs "
10372 "developer you want to do this so they can add this "
10373 "functionality.\n");
10378 * first we need to walk all of the trees except the extent tree and pin
10379 * down the bytes that are in use so we don't overwrite any existing
10382 ret = pin_metadata_blocks(fs_info);
10384 fprintf(stderr, "error pinning down used bytes\n");
10389 * Need to drop all the block groups since we're going to recreate all
10392 btrfs_free_block_groups(fs_info);
10393 ret = reset_block_groups(fs_info);
10395 fprintf(stderr, "error resetting the block groups\n");
10399 /* Ok we can allocate now, reinit the extent root */
10400 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10402 fprintf(stderr, "extent root initialization failed\n");
10404 * When the transaction code is updated we should end the
10405 * transaction, but for now progs only knows about commit so
10406 * just return an error.
10412 * Now we have all the in-memory block groups setup so we can make
10413 * allocations properly, and the metadata we care about is safe since we
10414 * pinned all of it above.
10417 struct btrfs_block_group_cache *cache;
10419 cache = btrfs_lookup_first_block_group(fs_info, start);
10422 start = cache->key.objectid + cache->key.offset;
10423 ret = btrfs_insert_item(trans, fs_info->extent_root,
10424 &cache->key, &cache->item,
10425 sizeof(cache->item));
10427 fprintf(stderr, "Error adding block group\n");
10430 btrfs_extent_post_op(trans, fs_info->extent_root);
10433 ret = reset_balance(trans, fs_info);
10435 fprintf(stderr, "error resetting the pending balance\n");
10440 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10442 struct btrfs_path *path;
10443 struct btrfs_trans_handle *trans;
10444 struct btrfs_key key;
10447 printf("Recowing metadata block %llu\n", eb->start);
10448 key.objectid = btrfs_header_owner(eb);
10449 key.type = BTRFS_ROOT_ITEM_KEY;
10450 key.offset = (u64)-1;
10452 root = btrfs_read_fs_root(root->fs_info, &key);
10453 if (IS_ERR(root)) {
10454 fprintf(stderr, "Couldn't find owner root %llu\n",
10456 return PTR_ERR(root);
10459 path = btrfs_alloc_path();
10463 trans = btrfs_start_transaction(root, 1);
10464 if (IS_ERR(trans)) {
10465 btrfs_free_path(path);
10466 return PTR_ERR(trans);
10469 path->lowest_level = btrfs_header_level(eb);
10470 if (path->lowest_level)
10471 btrfs_node_key_to_cpu(eb, &key, 0);
10473 btrfs_item_key_to_cpu(eb, &key, 0);
10475 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10476 btrfs_commit_transaction(trans, root);
10477 btrfs_free_path(path);
10481 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10483 struct btrfs_path *path;
10484 struct btrfs_trans_handle *trans;
10485 struct btrfs_key key;
10488 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10489 bad->key.type, bad->key.offset);
10490 key.objectid = bad->root_id;
10491 key.type = BTRFS_ROOT_ITEM_KEY;
10492 key.offset = (u64)-1;
10494 root = btrfs_read_fs_root(root->fs_info, &key);
10495 if (IS_ERR(root)) {
10496 fprintf(stderr, "Couldn't find owner root %llu\n",
10498 return PTR_ERR(root);
10501 path = btrfs_alloc_path();
10505 trans = btrfs_start_transaction(root, 1);
10506 if (IS_ERR(trans)) {
10507 btrfs_free_path(path);
10508 return PTR_ERR(trans);
10511 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10517 ret = btrfs_del_item(trans, root, path);
10519 btrfs_commit_transaction(trans, root);
10520 btrfs_free_path(path);
10524 static int zero_log_tree(struct btrfs_root *root)
10526 struct btrfs_trans_handle *trans;
10529 trans = btrfs_start_transaction(root, 1);
10530 if (IS_ERR(trans)) {
10531 ret = PTR_ERR(trans);
10534 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10535 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10536 ret = btrfs_commit_transaction(trans, root);
10540 static int populate_csum(struct btrfs_trans_handle *trans,
10541 struct btrfs_root *csum_root, char *buf, u64 start,
10548 while (offset < len) {
10549 sectorsize = csum_root->sectorsize;
10550 ret = read_extent_data(csum_root, buf, start + offset,
10554 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10555 start + offset, buf, sectorsize);
10558 offset += sectorsize;
10563 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10564 struct btrfs_root *csum_root,
10565 struct btrfs_root *cur_root)
10567 struct btrfs_path *path;
10568 struct btrfs_key key;
10569 struct extent_buffer *node;
10570 struct btrfs_file_extent_item *fi;
10577 path = btrfs_alloc_path();
10580 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10590 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10593 /* Iterate all regular file extents and fill its csum */
10595 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10597 if (key.type != BTRFS_EXTENT_DATA_KEY)
10599 node = path->nodes[0];
10600 slot = path->slots[0];
10601 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10602 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10604 start = btrfs_file_extent_disk_bytenr(node, fi);
10605 len = btrfs_file_extent_disk_num_bytes(node, fi);
10607 ret = populate_csum(trans, csum_root, buf, start, len);
10608 if (ret == -EEXIST)
10614 * TODO: if next leaf is corrupted, jump to nearest next valid
10617 ret = btrfs_next_item(cur_root, path);
10627 btrfs_free_path(path);
10632 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10633 struct btrfs_root *csum_root)
10635 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10636 struct btrfs_path *path;
10637 struct btrfs_root *tree_root = fs_info->tree_root;
10638 struct btrfs_root *cur_root;
10639 struct extent_buffer *node;
10640 struct btrfs_key key;
10644 path = btrfs_alloc_path();
10648 key.objectid = BTRFS_FS_TREE_OBJECTID;
10650 key.type = BTRFS_ROOT_ITEM_KEY;
10652 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10661 node = path->nodes[0];
10662 slot = path->slots[0];
10663 btrfs_item_key_to_cpu(node, &key, slot);
10664 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10666 if (key.type != BTRFS_ROOT_ITEM_KEY)
10668 if (!is_fstree(key.objectid))
10670 key.offset = (u64)-1;
10672 cur_root = btrfs_read_fs_root(fs_info, &key);
10673 if (IS_ERR(cur_root) || !cur_root) {
10674 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10678 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10683 ret = btrfs_next_item(tree_root, path);
10693 btrfs_free_path(path);
10697 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10698 struct btrfs_root *csum_root)
10700 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10701 struct btrfs_path *path;
10702 struct btrfs_extent_item *ei;
10703 struct extent_buffer *leaf;
10705 struct btrfs_key key;
10708 path = btrfs_alloc_path();
10713 key.type = BTRFS_EXTENT_ITEM_KEY;
10716 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10718 btrfs_free_path(path);
10722 buf = malloc(csum_root->sectorsize);
10724 btrfs_free_path(path);
10729 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10730 ret = btrfs_next_leaf(extent_root, path);
10738 leaf = path->nodes[0];
10740 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10741 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10746 ei = btrfs_item_ptr(leaf, path->slots[0],
10747 struct btrfs_extent_item);
10748 if (!(btrfs_extent_flags(leaf, ei) &
10749 BTRFS_EXTENT_FLAG_DATA)) {
10754 ret = populate_csum(trans, csum_root, buf, key.objectid,
10761 btrfs_free_path(path);
10767 * Recalculate the csum and put it into the csum tree.
10769 * Extent tree init will wipe out all the extent info, so in that case, we
10770 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10771 * will use fs/subvol trees to init the csum tree.
10773 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10774 struct btrfs_root *csum_root,
10775 int search_fs_tree)
10777 if (search_fs_tree)
10778 return fill_csum_tree_from_fs(trans, csum_root);
10780 return fill_csum_tree_from_extent(trans, csum_root);
10783 static void free_roots_info_cache(void)
10785 if (!roots_info_cache)
10788 while (!cache_tree_empty(roots_info_cache)) {
10789 struct cache_extent *entry;
10790 struct root_item_info *rii;
10792 entry = first_cache_extent(roots_info_cache);
10795 remove_cache_extent(roots_info_cache, entry);
10796 rii = container_of(entry, struct root_item_info, cache_extent);
10800 free(roots_info_cache);
10801 roots_info_cache = NULL;
10804 static int build_roots_info_cache(struct btrfs_fs_info *info)
10807 struct btrfs_key key;
10808 struct extent_buffer *leaf;
10809 struct btrfs_path *path;
10811 if (!roots_info_cache) {
10812 roots_info_cache = malloc(sizeof(*roots_info_cache));
10813 if (!roots_info_cache)
10815 cache_tree_init(roots_info_cache);
10818 path = btrfs_alloc_path();
10823 key.type = BTRFS_EXTENT_ITEM_KEY;
10826 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10829 leaf = path->nodes[0];
10832 struct btrfs_key found_key;
10833 struct btrfs_extent_item *ei;
10834 struct btrfs_extent_inline_ref *iref;
10835 int slot = path->slots[0];
10840 struct cache_extent *entry;
10841 struct root_item_info *rii;
10843 if (slot >= btrfs_header_nritems(leaf)) {
10844 ret = btrfs_next_leaf(info->extent_root, path);
10851 leaf = path->nodes[0];
10852 slot = path->slots[0];
10855 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10857 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10858 found_key.type != BTRFS_METADATA_ITEM_KEY)
10861 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10862 flags = btrfs_extent_flags(leaf, ei);
10864 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10865 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10868 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10869 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10870 level = found_key.offset;
10872 struct btrfs_tree_block_info *binfo;
10874 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10875 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10876 level = btrfs_tree_block_level(leaf, binfo);
10880 * For a root extent, it must be of the following type and the
10881 * first (and only one) iref in the item.
10883 type = btrfs_extent_inline_ref_type(leaf, iref);
10884 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10887 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10888 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10890 rii = malloc(sizeof(struct root_item_info));
10895 rii->cache_extent.start = root_id;
10896 rii->cache_extent.size = 1;
10897 rii->level = (u8)-1;
10898 entry = &rii->cache_extent;
10899 ret = insert_cache_extent(roots_info_cache, entry);
10902 rii = container_of(entry, struct root_item_info,
10906 ASSERT(rii->cache_extent.start == root_id);
10907 ASSERT(rii->cache_extent.size == 1);
10909 if (level > rii->level || rii->level == (u8)-1) {
10910 rii->level = level;
10911 rii->bytenr = found_key.objectid;
10912 rii->gen = btrfs_extent_generation(leaf, ei);
10913 rii->node_count = 1;
10914 } else if (level == rii->level) {
10922 btrfs_free_path(path);
10927 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10928 struct btrfs_path *path,
10929 const struct btrfs_key *root_key,
10930 const int read_only_mode)
10932 const u64 root_id = root_key->objectid;
10933 struct cache_extent *entry;
10934 struct root_item_info *rii;
10935 struct btrfs_root_item ri;
10936 unsigned long offset;
10938 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10941 "Error: could not find extent items for root %llu\n",
10942 root_key->objectid);
10946 rii = container_of(entry, struct root_item_info, cache_extent);
10947 ASSERT(rii->cache_extent.start == root_id);
10948 ASSERT(rii->cache_extent.size == 1);
10950 if (rii->node_count != 1) {
10952 "Error: could not find btree root extent for root %llu\n",
10957 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10958 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10960 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10961 btrfs_root_level(&ri) != rii->level ||
10962 btrfs_root_generation(&ri) != rii->gen) {
10965 * If we're in repair mode but our caller told us to not update
10966 * the root item, i.e. just check if it needs to be updated, don't
10967 * print this message, since the caller will call us again shortly
10968 * for the same root item without read only mode (the caller will
10969 * open a transaction first).
10971 if (!(read_only_mode && repair))
10973 "%sroot item for root %llu,"
10974 " current bytenr %llu, current gen %llu, current level %u,"
10975 " new bytenr %llu, new gen %llu, new level %u\n",
10976 (read_only_mode ? "" : "fixing "),
10978 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10979 btrfs_root_level(&ri),
10980 rii->bytenr, rii->gen, rii->level);
10982 if (btrfs_root_generation(&ri) > rii->gen) {
10984 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10985 root_id, btrfs_root_generation(&ri), rii->gen);
10989 if (!read_only_mode) {
10990 btrfs_set_root_bytenr(&ri, rii->bytenr);
10991 btrfs_set_root_level(&ri, rii->level);
10992 btrfs_set_root_generation(&ri, rii->gen);
10993 write_extent_buffer(path->nodes[0], &ri,
10994 offset, sizeof(ri));
11004 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11005 * caused read-only snapshots to be corrupted if they were created at a moment
11006 * when the source subvolume/snapshot had orphan items. The issue was that the
11007 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11008 * node instead of the post orphan cleanup root node.
11009 * So this function, and its callees, just detects and fixes those cases. Even
11010 * though the regression was for read-only snapshots, this function applies to
11011 * any snapshot/subvolume root.
11012 * This must be run before any other repair code - not doing it so, makes other
11013 * repair code delete or modify backrefs in the extent tree for example, which
11014 * will result in an inconsistent fs after repairing the root items.
11016 static int repair_root_items(struct btrfs_fs_info *info)
11018 struct btrfs_path *path = NULL;
11019 struct btrfs_key key;
11020 struct extent_buffer *leaf;
11021 struct btrfs_trans_handle *trans = NULL;
11024 int need_trans = 0;
11026 ret = build_roots_info_cache(info);
11030 path = btrfs_alloc_path();
11036 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11037 key.type = BTRFS_ROOT_ITEM_KEY;
11042 * Avoid opening and committing transactions if a leaf doesn't have
11043 * any root items that need to be fixed, so that we avoid rotating
11044 * backup roots unnecessarily.
11047 trans = btrfs_start_transaction(info->tree_root, 1);
11048 if (IS_ERR(trans)) {
11049 ret = PTR_ERR(trans);
11054 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11058 leaf = path->nodes[0];
11061 struct btrfs_key found_key;
11063 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11064 int no_more_keys = find_next_key(path, &key);
11066 btrfs_release_path(path);
11068 ret = btrfs_commit_transaction(trans,
11080 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11082 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11084 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11087 ret = maybe_repair_root_item(info, path, &found_key,
11092 if (!trans && repair) {
11095 btrfs_release_path(path);
11105 free_roots_info_cache();
11106 btrfs_free_path(path);
11108 btrfs_commit_transaction(trans, info->tree_root);
11115 const char * const cmd_check_usage[] = {
11116 "btrfs check [options] <device>",
11117 "Check structural integrity of a filesystem (unmounted).",
11118 "Check structural integrity of an unmounted filesystem. Verify internal",
11119 "trees' consistency and item connectivity. In the repair mode try to",
11120 "fix the problems found.",
11121 "WARNING: the repair mode is considered dangerous",
11123 "-s|--super <superblock> use this superblock copy",
11124 "-b|--backup use the first valid backup root copy",
11125 "--repair try to repair the filesystem",
11126 "--readonly run in read-only mode (default)",
11127 "--init-csum-tree create a new CRC tree",
11128 "--init-extent-tree create a new extent tree",
11129 "--low-memory check in low memory usage mode(experimental)",
11130 "--check-data-csum verify checksums of data blocks",
11131 "-Q|--qgroup-report print a report on qgroup consistency",
11132 "-E|--subvol-extents <subvolid>",
11133 " print subvolume extents and sharing state",
11134 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11135 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11136 "-p|--progress indicate progress",
11140 int cmd_check(int argc, char **argv)
11142 struct cache_tree root_cache;
11143 struct btrfs_root *root;
11144 struct btrfs_fs_info *info;
11147 u64 tree_root_bytenr = 0;
11148 u64 chunk_root_bytenr = 0;
11149 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11152 int init_csum_tree = 0;
11154 int qgroup_report = 0;
11155 int qgroups_repaired = 0;
11156 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
11160 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11161 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11162 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11163 GETOPT_VAL_LOW_MEMORY };
11164 static const struct option long_options[] = {
11165 { "super", required_argument, NULL, 's' },
11166 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11167 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11168 { "init-csum-tree", no_argument, NULL,
11169 GETOPT_VAL_INIT_CSUM },
11170 { "init-extent-tree", no_argument, NULL,
11171 GETOPT_VAL_INIT_EXTENT },
11172 { "check-data-csum", no_argument, NULL,
11173 GETOPT_VAL_CHECK_CSUM },
11174 { "backup", no_argument, NULL, 'b' },
11175 { "subvol-extents", required_argument, NULL, 'E' },
11176 { "qgroup-report", no_argument, NULL, 'Q' },
11177 { "tree-root", required_argument, NULL, 'r' },
11178 { "chunk-root", required_argument, NULL,
11179 GETOPT_VAL_CHUNK_TREE },
11180 { "progress", no_argument, NULL, 'p' },
11181 { "low-memory", no_argument, NULL,
11182 GETOPT_VAL_LOW_MEMORY },
11183 { NULL, 0, NULL, 0}
11186 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11190 case 'a': /* ignored */ break;
11192 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11195 num = arg_strtou64(optarg);
11196 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11198 "ERROR: super mirror should be less than: %d\n",
11199 BTRFS_SUPER_MIRROR_MAX);
11202 bytenr = btrfs_sb_offset(((int)num));
11203 printf("using SB copy %llu, bytenr %llu\n", num,
11204 (unsigned long long)bytenr);
11210 subvolid = arg_strtou64(optarg);
11213 tree_root_bytenr = arg_strtou64(optarg);
11215 case GETOPT_VAL_CHUNK_TREE:
11216 chunk_root_bytenr = arg_strtou64(optarg);
11219 ctx.progress_enabled = true;
11223 usage(cmd_check_usage);
11224 case GETOPT_VAL_REPAIR:
11225 printf("enabling repair mode\n");
11227 ctree_flags |= OPEN_CTREE_WRITES;
11229 case GETOPT_VAL_READONLY:
11232 case GETOPT_VAL_INIT_CSUM:
11233 printf("Creating a new CRC tree\n");
11234 init_csum_tree = 1;
11236 ctree_flags |= OPEN_CTREE_WRITES;
11238 case GETOPT_VAL_INIT_EXTENT:
11239 init_extent_tree = 1;
11240 ctree_flags |= (OPEN_CTREE_WRITES |
11241 OPEN_CTREE_NO_BLOCK_GROUPS);
11244 case GETOPT_VAL_CHECK_CSUM:
11245 check_data_csum = 1;
11247 case GETOPT_VAL_LOW_MEMORY:
11253 if (check_argc_exact(argc - optind, 1))
11254 usage(cmd_check_usage);
11256 if (ctx.progress_enabled) {
11257 ctx.tp = TASK_NOTHING;
11258 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11261 /* This check is the only reason for --readonly to exist */
11262 if (readonly && repair) {
11263 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11268 * Not supported yet
11270 if (repair && low_memory) {
11271 error("Low memory mode doesn't support repair yet");
11276 cache_tree_init(&root_cache);
11278 if((ret = check_mounted(argv[optind])) < 0) {
11279 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11282 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11287 /* only allow partial opening under repair mode */
11289 ctree_flags |= OPEN_CTREE_PARTIAL;
11291 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11292 chunk_root_bytenr, ctree_flags);
11294 fprintf(stderr, "Couldn't open file system\n");
11299 global_info = info;
11300 root = info->fs_root;
11303 * repair mode will force us to commit transaction which
11304 * will make us fail to load log tree when mounting.
11306 if (repair && btrfs_super_log_root(info->super_copy)) {
11307 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11312 ret = zero_log_tree(root);
11314 fprintf(stderr, "fail to zero log tree\n");
11319 uuid_unparse(info->super_copy->fsid, uuidbuf);
11320 if (qgroup_report) {
11321 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11323 ret = qgroup_verify_all(info);
11329 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11330 subvolid, argv[optind], uuidbuf);
11331 ret = print_extent_state(info, subvolid);
11334 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11336 if (!extent_buffer_uptodate(info->tree_root->node) ||
11337 !extent_buffer_uptodate(info->dev_root->node) ||
11338 !extent_buffer_uptodate(info->chunk_root->node)) {
11339 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11344 if (init_extent_tree || init_csum_tree) {
11345 struct btrfs_trans_handle *trans;
11347 trans = btrfs_start_transaction(info->extent_root, 0);
11348 if (IS_ERR(trans)) {
11349 fprintf(stderr, "Error starting transaction\n");
11350 ret = PTR_ERR(trans);
11354 if (init_extent_tree) {
11355 printf("Creating a new extent tree\n");
11356 ret = reinit_extent_tree(trans, info);
11361 if (init_csum_tree) {
11362 fprintf(stderr, "Reinit crc root\n");
11363 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11365 fprintf(stderr, "crc root initialization failed\n");
11370 ret = fill_csum_tree(trans, info->csum_root,
11373 fprintf(stderr, "crc refilling failed\n");
11378 * Ok now we commit and run the normal fsck, which will add
11379 * extent entries for all of the items it finds.
11381 ret = btrfs_commit_transaction(trans, info->extent_root);
11385 if (!extent_buffer_uptodate(info->extent_root->node)) {
11386 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11390 if (!extent_buffer_uptodate(info->csum_root->node)) {
11391 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11396 if (!ctx.progress_enabled)
11397 fprintf(stderr, "checking extents\n");
11399 ret = check_chunks_and_extents_v2(root);
11401 ret = check_chunks_and_extents(root);
11403 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11405 ret = repair_root_items(info);
11409 fprintf(stderr, "Fixed %d roots.\n", ret);
11411 } else if (ret > 0) {
11413 "Found %d roots with an outdated root item.\n",
11416 "Please run a filesystem check with the option --repair to fix them.\n");
11421 if (!ctx.progress_enabled) {
11422 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11423 fprintf(stderr, "checking free space tree\n");
11425 fprintf(stderr, "checking free space cache\n");
11427 ret = check_space_cache(root);
11432 * We used to have to have these hole extents in between our real
11433 * extents so if we don't have this flag set we need to make sure there
11434 * are no gaps in the file extents for inodes, otherwise we can just
11435 * ignore it when this happens.
11437 no_holes = btrfs_fs_incompat(root->fs_info,
11438 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11439 if (!ctx.progress_enabled)
11440 fprintf(stderr, "checking fs roots\n");
11441 ret = check_fs_roots(root, &root_cache);
11445 fprintf(stderr, "checking csums\n");
11446 ret = check_csums(root);
11450 fprintf(stderr, "checking root refs\n");
11451 ret = check_root_refs(root, &root_cache);
11455 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11456 struct extent_buffer *eb;
11458 eb = list_first_entry(&root->fs_info->recow_ebs,
11459 struct extent_buffer, recow);
11460 list_del_init(&eb->recow);
11461 ret = recow_extent_buffer(root, eb);
11466 while (!list_empty(&delete_items)) {
11467 struct bad_item *bad;
11469 bad = list_first_entry(&delete_items, struct bad_item, list);
11470 list_del_init(&bad->list);
11472 ret = delete_bad_item(root, bad);
11476 if (info->quota_enabled) {
11478 fprintf(stderr, "checking quota groups\n");
11479 err = qgroup_verify_all(info);
11483 err = repair_qgroups(info, &qgroups_repaired);
11488 if (!list_empty(&root->fs_info->recow_ebs)) {
11489 fprintf(stderr, "Transid errors in file system\n");
11493 /* Don't override original ret */
11494 if (!ret && qgroups_repaired)
11495 ret = qgroups_repaired;
11497 if (found_old_backref) { /*
11498 * there was a disk format change when mixed
11499 * backref was in testing tree. The old format
11500 * existed about one week.
11502 printf("\n * Found old mixed backref format. "
11503 "The old format is not supported! *"
11504 "\n * Please mount the FS in readonly mode, "
11505 "backup data and re-format the FS. *\n\n");
11508 printf("found %llu bytes used err is %d\n",
11509 (unsigned long long)bytes_used, ret);
11510 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11511 printf("total tree bytes: %llu\n",
11512 (unsigned long long)total_btree_bytes);
11513 printf("total fs tree bytes: %llu\n",
11514 (unsigned long long)total_fs_tree_bytes);
11515 printf("total extent tree bytes: %llu\n",
11516 (unsigned long long)total_extent_tree_bytes);
11517 printf("btree space waste bytes: %llu\n",
11518 (unsigned long long)btree_space_waste);
11519 printf("file data blocks allocated: %llu\n referenced %llu\n",
11520 (unsigned long long)data_bytes_allocated,
11521 (unsigned long long)data_bytes_referenced);
11523 free_qgroup_counts();
11524 free_root_recs_tree(&root_cache);
11528 if (ctx.progress_enabled)
11529 task_deinit(ctx.info);