2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
438 * Error bit for low memory mode check.
440 * Currently no caller cares about it yet. Just internal use for error
443 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
449 static void *print_status_check(void *p)
451 struct task_ctx *priv = p;
452 const char work_indicator[] = { '.', 'o', 'O', 'o' };
454 static char *task_position_string[] = {
456 "checking free space cache",
460 task_period_start(priv->info, 1000 /* 1s */);
462 if (priv->tp == TASK_NOTHING)
466 printf("%s [%c]\r", task_position_string[priv->tp],
467 work_indicator[count % 4]);
470 task_period_wait(priv->info);
475 static int print_status_return(void *p)
483 /* Compatible function to allow reuse of old codes */
484 static u64 first_extent_gap(struct rb_root *holes)
486 struct file_extent_hole *hole;
488 if (RB_EMPTY_ROOT(holes))
491 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
495 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
497 struct file_extent_hole *hole1;
498 struct file_extent_hole *hole2;
500 hole1 = rb_entry(node1, struct file_extent_hole, node);
501 hole2 = rb_entry(node2, struct file_extent_hole, node);
503 if (hole1->start > hole2->start)
505 if (hole1->start < hole2->start)
507 /* Now hole1->start == hole2->start */
508 if (hole1->len >= hole2->len)
510 * Hole 1 will be merge center
511 * Same hole will be merged later
514 /* Hole 2 will be merge center */
519 * Add a hole to the record
521 * This will do hole merge for copy_file_extent_holes(),
522 * which will ensure there won't be continuous holes.
524 static int add_file_extent_hole(struct rb_root *holes,
527 struct file_extent_hole *hole;
528 struct file_extent_hole *prev = NULL;
529 struct file_extent_hole *next = NULL;
531 hole = malloc(sizeof(*hole));
536 /* Since compare will not return 0, no -EEXIST will happen */
537 rb_insert(holes, &hole->node, compare_hole);
539 /* simple merge with previous hole */
540 if (rb_prev(&hole->node))
541 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
543 if (prev && prev->start + prev->len >= hole->start) {
544 hole->len = hole->start + hole->len - prev->start;
545 hole->start = prev->start;
546 rb_erase(&prev->node, holes);
551 /* iterate merge with next holes */
553 if (!rb_next(&hole->node))
555 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
557 if (hole->start + hole->len >= next->start) {
558 if (hole->start + hole->len <= next->start + next->len)
559 hole->len = next->start + next->len -
561 rb_erase(&next->node, holes);
570 static int compare_hole_range(struct rb_node *node, void *data)
572 struct file_extent_hole *hole;
575 hole = (struct file_extent_hole *)data;
578 hole = rb_entry(node, struct file_extent_hole, node);
579 if (start < hole->start)
581 if (start >= hole->start && start < hole->start + hole->len)
587 * Delete a hole in the record
589 * This will do the hole split and is much restrict than add.
591 static int del_file_extent_hole(struct rb_root *holes,
594 struct file_extent_hole *hole;
595 struct file_extent_hole tmp;
600 struct rb_node *node;
607 node = rb_search(holes, &tmp, compare_hole_range, NULL);
610 hole = rb_entry(node, struct file_extent_hole, node);
611 if (start + len > hole->start + hole->len)
615 * Now there will be no overlap, delete the hole and re-add the
616 * split(s) if they exists.
618 if (start > hole->start) {
619 prev_start = hole->start;
620 prev_len = start - hole->start;
623 if (hole->start + hole->len > start + len) {
624 next_start = start + len;
625 next_len = hole->start + hole->len - start - len;
628 rb_erase(node, holes);
631 ret = add_file_extent_hole(holes, prev_start, prev_len);
636 ret = add_file_extent_hole(holes, next_start, next_len);
643 static int copy_file_extent_holes(struct rb_root *dst,
646 struct file_extent_hole *hole;
647 struct rb_node *node;
650 node = rb_first(src);
652 hole = rb_entry(node, struct file_extent_hole, node);
653 ret = add_file_extent_hole(dst, hole->start, hole->len);
656 node = rb_next(node);
661 static void free_file_extent_holes(struct rb_root *holes)
663 struct rb_node *node;
664 struct file_extent_hole *hole;
666 node = rb_first(holes);
668 hole = rb_entry(node, struct file_extent_hole, node);
669 rb_erase(node, holes);
671 node = rb_first(holes);
675 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
677 static void record_root_in_trans(struct btrfs_trans_handle *trans,
678 struct btrfs_root *root)
680 if (root->last_trans != trans->transid) {
681 root->track_dirty = 1;
682 root->last_trans = trans->transid;
683 root->commit_root = root->node;
684 extent_buffer_get(root->node);
688 static u8 imode_to_type(u32 imode)
691 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
692 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
693 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
694 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
695 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
696 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
697 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
698 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
701 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
705 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
707 struct device_record *rec1;
708 struct device_record *rec2;
710 rec1 = rb_entry(node1, struct device_record, node);
711 rec2 = rb_entry(node2, struct device_record, node);
712 if (rec1->devid > rec2->devid)
714 else if (rec1->devid < rec2->devid)
720 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
722 struct inode_record *rec;
723 struct inode_backref *backref;
724 struct inode_backref *orig;
725 struct inode_backref *tmp;
726 struct orphan_data_extent *src_orphan;
727 struct orphan_data_extent *dst_orphan;
731 rec = malloc(sizeof(*rec));
733 return ERR_PTR(-ENOMEM);
734 memcpy(rec, orig_rec, sizeof(*rec));
736 INIT_LIST_HEAD(&rec->backrefs);
737 INIT_LIST_HEAD(&rec->orphan_extents);
738 rec->holes = RB_ROOT;
740 list_for_each_entry(orig, &orig_rec->backrefs, list) {
741 size = sizeof(*orig) + orig->namelen + 1;
742 backref = malloc(size);
747 memcpy(backref, orig, size);
748 list_add_tail(&backref->list, &rec->backrefs);
750 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
751 dst_orphan = malloc(sizeof(*dst_orphan));
756 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
757 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
759 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
765 if (!list_empty(&rec->backrefs))
766 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
767 list_del(&orig->list);
771 if (!list_empty(&rec->orphan_extents))
772 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
773 list_del(&orig->list);
782 static void print_orphan_data_extents(struct list_head *orphan_extents,
785 struct orphan_data_extent *orphan;
787 if (list_empty(orphan_extents))
789 printf("The following data extent is lost in tree %llu:\n",
791 list_for_each_entry(orphan, orphan_extents, list) {
792 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
793 orphan->objectid, orphan->offset, orphan->disk_bytenr,
798 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
800 u64 root_objectid = root->root_key.objectid;
801 int errors = rec->errors;
805 /* reloc root errors, we print its corresponding fs root objectid*/
806 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
807 root_objectid = root->root_key.offset;
808 fprintf(stderr, "reloc");
810 fprintf(stderr, "root %llu inode %llu errors %x",
811 (unsigned long long) root_objectid,
812 (unsigned long long) rec->ino, rec->errors);
814 if (errors & I_ERR_NO_INODE_ITEM)
815 fprintf(stderr, ", no inode item");
816 if (errors & I_ERR_NO_ORPHAN_ITEM)
817 fprintf(stderr, ", no orphan item");
818 if (errors & I_ERR_DUP_INODE_ITEM)
819 fprintf(stderr, ", dup inode item");
820 if (errors & I_ERR_DUP_DIR_INDEX)
821 fprintf(stderr, ", dup dir index");
822 if (errors & I_ERR_ODD_DIR_ITEM)
823 fprintf(stderr, ", odd dir item");
824 if (errors & I_ERR_ODD_FILE_EXTENT)
825 fprintf(stderr, ", odd file extent");
826 if (errors & I_ERR_BAD_FILE_EXTENT)
827 fprintf(stderr, ", bad file extent");
828 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
829 fprintf(stderr, ", file extent overlap");
830 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
831 fprintf(stderr, ", file extent discount");
832 if (errors & I_ERR_DIR_ISIZE_WRONG)
833 fprintf(stderr, ", dir isize wrong");
834 if (errors & I_ERR_FILE_NBYTES_WRONG)
835 fprintf(stderr, ", nbytes wrong");
836 if (errors & I_ERR_ODD_CSUM_ITEM)
837 fprintf(stderr, ", odd csum item");
838 if (errors & I_ERR_SOME_CSUM_MISSING)
839 fprintf(stderr, ", some csum missing");
840 if (errors & I_ERR_LINK_COUNT_WRONG)
841 fprintf(stderr, ", link count wrong");
842 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
843 fprintf(stderr, ", orphan file extent");
844 fprintf(stderr, "\n");
845 /* Print the orphan extents if needed */
846 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
847 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
849 /* Print the holes if needed */
850 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
851 struct file_extent_hole *hole;
852 struct rb_node *node;
855 node = rb_first(&rec->holes);
856 fprintf(stderr, "Found file extent holes:\n");
859 hole = rb_entry(node, struct file_extent_hole, node);
860 fprintf(stderr, "\tstart: %llu, len: %llu\n",
861 hole->start, hole->len);
862 node = rb_next(node);
865 fprintf(stderr, "\tstart: 0, len: %llu\n",
866 round_up(rec->isize, root->sectorsize));
870 static void print_ref_error(int errors)
872 if (errors & REF_ERR_NO_DIR_ITEM)
873 fprintf(stderr, ", no dir item");
874 if (errors & REF_ERR_NO_DIR_INDEX)
875 fprintf(stderr, ", no dir index");
876 if (errors & REF_ERR_NO_INODE_REF)
877 fprintf(stderr, ", no inode ref");
878 if (errors & REF_ERR_DUP_DIR_ITEM)
879 fprintf(stderr, ", dup dir item");
880 if (errors & REF_ERR_DUP_DIR_INDEX)
881 fprintf(stderr, ", dup dir index");
882 if (errors & REF_ERR_DUP_INODE_REF)
883 fprintf(stderr, ", dup inode ref");
884 if (errors & REF_ERR_INDEX_UNMATCH)
885 fprintf(stderr, ", index mismatch");
886 if (errors & REF_ERR_FILETYPE_UNMATCH)
887 fprintf(stderr, ", filetype mismatch");
888 if (errors & REF_ERR_NAME_TOO_LONG)
889 fprintf(stderr, ", name too long");
890 if (errors & REF_ERR_NO_ROOT_REF)
891 fprintf(stderr, ", no root ref");
892 if (errors & REF_ERR_NO_ROOT_BACKREF)
893 fprintf(stderr, ", no root backref");
894 if (errors & REF_ERR_DUP_ROOT_REF)
895 fprintf(stderr, ", dup root ref");
896 if (errors & REF_ERR_DUP_ROOT_BACKREF)
897 fprintf(stderr, ", dup root backref");
898 fprintf(stderr, "\n");
901 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
904 struct ptr_node *node;
905 struct cache_extent *cache;
906 struct inode_record *rec = NULL;
909 cache = lookup_cache_extent(inode_cache, ino, 1);
911 node = container_of(cache, struct ptr_node, cache);
913 if (mod && rec->refs > 1) {
914 node->data = clone_inode_rec(rec);
915 if (IS_ERR(node->data))
921 rec = calloc(1, sizeof(*rec));
923 return ERR_PTR(-ENOMEM);
925 rec->extent_start = (u64)-1;
927 INIT_LIST_HEAD(&rec->backrefs);
928 INIT_LIST_HEAD(&rec->orphan_extents);
929 rec->holes = RB_ROOT;
931 node = malloc(sizeof(*node));
934 return ERR_PTR(-ENOMEM);
936 node->cache.start = ino;
937 node->cache.size = 1;
940 if (ino == BTRFS_FREE_INO_OBJECTID)
943 ret = insert_cache_extent(inode_cache, &node->cache);
945 return ERR_PTR(-EEXIST);
950 static void free_orphan_data_extents(struct list_head *orphan_extents)
952 struct orphan_data_extent *orphan;
954 while (!list_empty(orphan_extents)) {
955 orphan = list_entry(orphan_extents->next,
956 struct orphan_data_extent, list);
957 list_del(&orphan->list);
962 static void free_inode_rec(struct inode_record *rec)
964 struct inode_backref *backref;
969 while (!list_empty(&rec->backrefs)) {
970 backref = to_inode_backref(rec->backrefs.next);
971 list_del(&backref->list);
974 free_orphan_data_extents(&rec->orphan_extents);
975 free_file_extent_holes(&rec->holes);
979 static int can_free_inode_rec(struct inode_record *rec)
981 if (!rec->errors && rec->checked && rec->found_inode_item &&
982 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
987 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
988 struct inode_record *rec)
990 struct cache_extent *cache;
991 struct inode_backref *tmp, *backref;
992 struct ptr_node *node;
993 unsigned char filetype;
995 if (!rec->found_inode_item)
998 filetype = imode_to_type(rec->imode);
999 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1000 if (backref->found_dir_item && backref->found_dir_index) {
1001 if (backref->filetype != filetype)
1002 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1003 if (!backref->errors && backref->found_inode_ref &&
1004 rec->nlink == rec->found_link) {
1005 list_del(&backref->list);
1011 if (!rec->checked || rec->merging)
1014 if (S_ISDIR(rec->imode)) {
1015 if (rec->found_size != rec->isize)
1016 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1017 if (rec->found_file_extent)
1018 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1019 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1020 if (rec->found_dir_item)
1021 rec->errors |= I_ERR_ODD_DIR_ITEM;
1022 if (rec->found_size != rec->nbytes)
1023 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1024 if (rec->nlink > 0 && !no_holes &&
1025 (rec->extent_end < rec->isize ||
1026 first_extent_gap(&rec->holes) < rec->isize))
1027 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1030 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1031 if (rec->found_csum_item && rec->nodatasum)
1032 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1033 if (rec->some_csum_missing && !rec->nodatasum)
1034 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1037 BUG_ON(rec->refs != 1);
1038 if (can_free_inode_rec(rec)) {
1039 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1040 node = container_of(cache, struct ptr_node, cache);
1041 BUG_ON(node->data != rec);
1042 remove_cache_extent(inode_cache, &node->cache);
1044 free_inode_rec(rec);
1048 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1050 struct btrfs_path path;
1051 struct btrfs_key key;
1054 key.objectid = BTRFS_ORPHAN_OBJECTID;
1055 key.type = BTRFS_ORPHAN_ITEM_KEY;
1058 btrfs_init_path(&path);
1059 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1060 btrfs_release_path(&path);
1066 static int process_inode_item(struct extent_buffer *eb,
1067 int slot, struct btrfs_key *key,
1068 struct shared_node *active_node)
1070 struct inode_record *rec;
1071 struct btrfs_inode_item *item;
1073 rec = active_node->current;
1074 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1075 if (rec->found_inode_item) {
1076 rec->errors |= I_ERR_DUP_INODE_ITEM;
1079 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1080 rec->nlink = btrfs_inode_nlink(eb, item);
1081 rec->isize = btrfs_inode_size(eb, item);
1082 rec->nbytes = btrfs_inode_nbytes(eb, item);
1083 rec->imode = btrfs_inode_mode(eb, item);
1084 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1086 rec->found_inode_item = 1;
1087 if (rec->nlink == 0)
1088 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1089 maybe_free_inode_rec(&active_node->inode_cache, rec);
1093 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1095 int namelen, u64 dir)
1097 struct inode_backref *backref;
1099 list_for_each_entry(backref, &rec->backrefs, list) {
1100 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1102 if (backref->dir != dir || backref->namelen != namelen)
1104 if (memcmp(name, backref->name, namelen))
1109 backref = malloc(sizeof(*backref) + namelen + 1);
1112 memset(backref, 0, sizeof(*backref));
1114 backref->namelen = namelen;
1115 memcpy(backref->name, name, namelen);
1116 backref->name[namelen] = '\0';
1117 list_add_tail(&backref->list, &rec->backrefs);
1121 static int add_inode_backref(struct cache_tree *inode_cache,
1122 u64 ino, u64 dir, u64 index,
1123 const char *name, int namelen,
1124 int filetype, int itemtype, int errors)
1126 struct inode_record *rec;
1127 struct inode_backref *backref;
1129 rec = get_inode_rec(inode_cache, ino, 1);
1130 BUG_ON(IS_ERR(rec));
1131 backref = get_inode_backref(rec, name, namelen, dir);
1134 backref->errors |= errors;
1135 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1136 if (backref->found_dir_index)
1137 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1138 if (backref->found_inode_ref && backref->index != index)
1139 backref->errors |= REF_ERR_INDEX_UNMATCH;
1140 if (backref->found_dir_item && backref->filetype != filetype)
1141 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1143 backref->index = index;
1144 backref->filetype = filetype;
1145 backref->found_dir_index = 1;
1146 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1148 if (backref->found_dir_item)
1149 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1150 if (backref->found_dir_index && backref->filetype != filetype)
1151 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1153 backref->filetype = filetype;
1154 backref->found_dir_item = 1;
1155 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1156 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1157 if (backref->found_inode_ref)
1158 backref->errors |= REF_ERR_DUP_INODE_REF;
1159 if (backref->found_dir_index && backref->index != index)
1160 backref->errors |= REF_ERR_INDEX_UNMATCH;
1162 backref->index = index;
1164 backref->ref_type = itemtype;
1165 backref->found_inode_ref = 1;
1170 maybe_free_inode_rec(inode_cache, rec);
1174 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1175 struct cache_tree *dst_cache)
1177 struct inode_backref *backref;
1182 list_for_each_entry(backref, &src->backrefs, list) {
1183 if (backref->found_dir_index) {
1184 add_inode_backref(dst_cache, dst->ino, backref->dir,
1185 backref->index, backref->name,
1186 backref->namelen, backref->filetype,
1187 BTRFS_DIR_INDEX_KEY, backref->errors);
1189 if (backref->found_dir_item) {
1191 add_inode_backref(dst_cache, dst->ino,
1192 backref->dir, 0, backref->name,
1193 backref->namelen, backref->filetype,
1194 BTRFS_DIR_ITEM_KEY, backref->errors);
1196 if (backref->found_inode_ref) {
1197 add_inode_backref(dst_cache, dst->ino,
1198 backref->dir, backref->index,
1199 backref->name, backref->namelen, 0,
1200 backref->ref_type, backref->errors);
1204 if (src->found_dir_item)
1205 dst->found_dir_item = 1;
1206 if (src->found_file_extent)
1207 dst->found_file_extent = 1;
1208 if (src->found_csum_item)
1209 dst->found_csum_item = 1;
1210 if (src->some_csum_missing)
1211 dst->some_csum_missing = 1;
1212 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1213 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1218 BUG_ON(src->found_link < dir_count);
1219 dst->found_link += src->found_link - dir_count;
1220 dst->found_size += src->found_size;
1221 if (src->extent_start != (u64)-1) {
1222 if (dst->extent_start == (u64)-1) {
1223 dst->extent_start = src->extent_start;
1224 dst->extent_end = src->extent_end;
1226 if (dst->extent_end > src->extent_start)
1227 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1228 else if (dst->extent_end < src->extent_start) {
1229 ret = add_file_extent_hole(&dst->holes,
1231 src->extent_start - dst->extent_end);
1233 if (dst->extent_end < src->extent_end)
1234 dst->extent_end = src->extent_end;
1238 dst->errors |= src->errors;
1239 if (src->found_inode_item) {
1240 if (!dst->found_inode_item) {
1241 dst->nlink = src->nlink;
1242 dst->isize = src->isize;
1243 dst->nbytes = src->nbytes;
1244 dst->imode = src->imode;
1245 dst->nodatasum = src->nodatasum;
1246 dst->found_inode_item = 1;
1248 dst->errors |= I_ERR_DUP_INODE_ITEM;
1256 static int splice_shared_node(struct shared_node *src_node,
1257 struct shared_node *dst_node)
1259 struct cache_extent *cache;
1260 struct ptr_node *node, *ins;
1261 struct cache_tree *src, *dst;
1262 struct inode_record *rec, *conflict;
1263 u64 current_ino = 0;
1267 if (--src_node->refs == 0)
1269 if (src_node->current)
1270 current_ino = src_node->current->ino;
1272 src = &src_node->root_cache;
1273 dst = &dst_node->root_cache;
1275 cache = search_cache_extent(src, 0);
1277 node = container_of(cache, struct ptr_node, cache);
1279 cache = next_cache_extent(cache);
1282 remove_cache_extent(src, &node->cache);
1285 ins = malloc(sizeof(*ins));
1287 ins->cache.start = node->cache.start;
1288 ins->cache.size = node->cache.size;
1292 ret = insert_cache_extent(dst, &ins->cache);
1293 if (ret == -EEXIST) {
1294 conflict = get_inode_rec(dst, rec->ino, 1);
1295 BUG_ON(IS_ERR(conflict));
1296 merge_inode_recs(rec, conflict, dst);
1298 conflict->checked = 1;
1299 if (dst_node->current == conflict)
1300 dst_node->current = NULL;
1302 maybe_free_inode_rec(dst, conflict);
1303 free_inode_rec(rec);
1310 if (src == &src_node->root_cache) {
1311 src = &src_node->inode_cache;
1312 dst = &dst_node->inode_cache;
1316 if (current_ino > 0 && (!dst_node->current ||
1317 current_ino > dst_node->current->ino)) {
1318 if (dst_node->current) {
1319 dst_node->current->checked = 1;
1320 maybe_free_inode_rec(dst, dst_node->current);
1322 dst_node->current = get_inode_rec(dst, current_ino, 1);
1323 BUG_ON(IS_ERR(dst_node->current));
1328 static void free_inode_ptr(struct cache_extent *cache)
1330 struct ptr_node *node;
1331 struct inode_record *rec;
1333 node = container_of(cache, struct ptr_node, cache);
1335 free_inode_rec(rec);
1339 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1341 static struct shared_node *find_shared_node(struct cache_tree *shared,
1344 struct cache_extent *cache;
1345 struct shared_node *node;
1347 cache = lookup_cache_extent(shared, bytenr, 1);
1349 node = container_of(cache, struct shared_node, cache);
1355 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1358 struct shared_node *node;
1360 node = calloc(1, sizeof(*node));
1363 node->cache.start = bytenr;
1364 node->cache.size = 1;
1365 cache_tree_init(&node->root_cache);
1366 cache_tree_init(&node->inode_cache);
1369 ret = insert_cache_extent(shared, &node->cache);
1374 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1375 struct walk_control *wc, int level)
1377 struct shared_node *node;
1378 struct shared_node *dest;
1381 if (level == wc->active_node)
1384 BUG_ON(wc->active_node <= level);
1385 node = find_shared_node(&wc->shared, bytenr);
1387 ret = add_shared_node(&wc->shared, bytenr, refs);
1389 node = find_shared_node(&wc->shared, bytenr);
1390 wc->nodes[level] = node;
1391 wc->active_node = level;
1395 if (wc->root_level == wc->active_node &&
1396 btrfs_root_refs(&root->root_item) == 0) {
1397 if (--node->refs == 0) {
1398 free_inode_recs_tree(&node->root_cache);
1399 free_inode_recs_tree(&node->inode_cache);
1400 remove_cache_extent(&wc->shared, &node->cache);
1406 dest = wc->nodes[wc->active_node];
1407 splice_shared_node(node, dest);
1408 if (node->refs == 0) {
1409 remove_cache_extent(&wc->shared, &node->cache);
1415 static int leave_shared_node(struct btrfs_root *root,
1416 struct walk_control *wc, int level)
1418 struct shared_node *node;
1419 struct shared_node *dest;
1422 if (level == wc->root_level)
1425 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1429 BUG_ON(i >= BTRFS_MAX_LEVEL);
1431 node = wc->nodes[wc->active_node];
1432 wc->nodes[wc->active_node] = NULL;
1433 wc->active_node = i;
1435 dest = wc->nodes[wc->active_node];
1436 if (wc->active_node < wc->root_level ||
1437 btrfs_root_refs(&root->root_item) > 0) {
1438 BUG_ON(node->refs <= 1);
1439 splice_shared_node(node, dest);
1441 BUG_ON(node->refs < 2);
1450 * 1 - if the root with id child_root_id is a child of root parent_root_id
1451 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1452 * has other root(s) as parent(s)
1453 * 2 - if the root child_root_id doesn't have any parent roots
1455 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1458 struct btrfs_path path;
1459 struct btrfs_key key;
1460 struct extent_buffer *leaf;
1464 btrfs_init_path(&path);
1466 key.objectid = parent_root_id;
1467 key.type = BTRFS_ROOT_REF_KEY;
1468 key.offset = child_root_id;
1469 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1473 btrfs_release_path(&path);
1477 key.objectid = child_root_id;
1478 key.type = BTRFS_ROOT_BACKREF_KEY;
1480 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1486 leaf = path.nodes[0];
1487 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1488 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1491 leaf = path.nodes[0];
1494 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1495 if (key.objectid != child_root_id ||
1496 key.type != BTRFS_ROOT_BACKREF_KEY)
1501 if (key.offset == parent_root_id) {
1502 btrfs_release_path(&path);
1509 btrfs_release_path(&path);
1512 return has_parent ? 0 : 2;
1515 static int process_dir_item(struct btrfs_root *root,
1516 struct extent_buffer *eb,
1517 int slot, struct btrfs_key *key,
1518 struct shared_node *active_node)
1528 struct btrfs_dir_item *di;
1529 struct inode_record *rec;
1530 struct cache_tree *root_cache;
1531 struct cache_tree *inode_cache;
1532 struct btrfs_key location;
1533 char namebuf[BTRFS_NAME_LEN];
1535 root_cache = &active_node->root_cache;
1536 inode_cache = &active_node->inode_cache;
1537 rec = active_node->current;
1538 rec->found_dir_item = 1;
1540 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1541 total = btrfs_item_size_nr(eb, slot);
1542 while (cur < total) {
1544 btrfs_dir_item_key_to_cpu(eb, di, &location);
1545 name_len = btrfs_dir_name_len(eb, di);
1546 data_len = btrfs_dir_data_len(eb, di);
1547 filetype = btrfs_dir_type(eb, di);
1549 rec->found_size += name_len;
1550 if (name_len <= BTRFS_NAME_LEN) {
1554 len = BTRFS_NAME_LEN;
1555 error = REF_ERR_NAME_TOO_LONG;
1557 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1559 if (location.type == BTRFS_INODE_ITEM_KEY) {
1560 add_inode_backref(inode_cache, location.objectid,
1561 key->objectid, key->offset, namebuf,
1562 len, filetype, key->type, error);
1563 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1564 add_inode_backref(root_cache, location.objectid,
1565 key->objectid, key->offset,
1566 namebuf, len, filetype,
1569 fprintf(stderr, "invalid location in dir item %u\n",
1571 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1572 key->objectid, key->offset, namebuf,
1573 len, filetype, key->type, error);
1576 len = sizeof(*di) + name_len + data_len;
1577 di = (struct btrfs_dir_item *)((char *)di + len);
1580 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1581 rec->errors |= I_ERR_DUP_DIR_INDEX;
1586 static int process_inode_ref(struct extent_buffer *eb,
1587 int slot, struct btrfs_key *key,
1588 struct shared_node *active_node)
1596 struct cache_tree *inode_cache;
1597 struct btrfs_inode_ref *ref;
1598 char namebuf[BTRFS_NAME_LEN];
1600 inode_cache = &active_node->inode_cache;
1602 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1603 total = btrfs_item_size_nr(eb, slot);
1604 while (cur < total) {
1605 name_len = btrfs_inode_ref_name_len(eb, ref);
1606 index = btrfs_inode_ref_index(eb, ref);
1607 if (name_len <= BTRFS_NAME_LEN) {
1611 len = BTRFS_NAME_LEN;
1612 error = REF_ERR_NAME_TOO_LONG;
1614 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1615 add_inode_backref(inode_cache, key->objectid, key->offset,
1616 index, namebuf, len, 0, key->type, error);
1618 len = sizeof(*ref) + name_len;
1619 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1625 static int process_inode_extref(struct extent_buffer *eb,
1626 int slot, struct btrfs_key *key,
1627 struct shared_node *active_node)
1636 struct cache_tree *inode_cache;
1637 struct btrfs_inode_extref *extref;
1638 char namebuf[BTRFS_NAME_LEN];
1640 inode_cache = &active_node->inode_cache;
1642 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1643 total = btrfs_item_size_nr(eb, slot);
1644 while (cur < total) {
1645 name_len = btrfs_inode_extref_name_len(eb, extref);
1646 index = btrfs_inode_extref_index(eb, extref);
1647 parent = btrfs_inode_extref_parent(eb, extref);
1648 if (name_len <= BTRFS_NAME_LEN) {
1652 len = BTRFS_NAME_LEN;
1653 error = REF_ERR_NAME_TOO_LONG;
1655 read_extent_buffer(eb, namebuf,
1656 (unsigned long)(extref + 1), len);
1657 add_inode_backref(inode_cache, key->objectid, parent,
1658 index, namebuf, len, 0, key->type, error);
1660 len = sizeof(*extref) + name_len;
1661 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1668 static int count_csum_range(struct btrfs_root *root, u64 start,
1669 u64 len, u64 *found)
1671 struct btrfs_key key;
1672 struct btrfs_path path;
1673 struct extent_buffer *leaf;
1678 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1680 btrfs_init_path(&path);
1682 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1684 key.type = BTRFS_EXTENT_CSUM_KEY;
1686 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1690 if (ret > 0 && path.slots[0] > 0) {
1691 leaf = path.nodes[0];
1692 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1693 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1694 key.type == BTRFS_EXTENT_CSUM_KEY)
1699 leaf = path.nodes[0];
1700 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1701 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1706 leaf = path.nodes[0];
1709 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1710 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1711 key.type != BTRFS_EXTENT_CSUM_KEY)
1714 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1715 if (key.offset >= start + len)
1718 if (key.offset > start)
1721 size = btrfs_item_size_nr(leaf, path.slots[0]);
1722 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1723 if (csum_end > start) {
1724 size = min(csum_end - start, len);
1733 btrfs_release_path(&path);
1739 static int process_file_extent(struct btrfs_root *root,
1740 struct extent_buffer *eb,
1741 int slot, struct btrfs_key *key,
1742 struct shared_node *active_node)
1744 struct inode_record *rec;
1745 struct btrfs_file_extent_item *fi;
1747 u64 disk_bytenr = 0;
1748 u64 extent_offset = 0;
1749 u64 mask = root->sectorsize - 1;
1753 rec = active_node->current;
1754 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1755 rec->found_file_extent = 1;
1757 if (rec->extent_start == (u64)-1) {
1758 rec->extent_start = key->offset;
1759 rec->extent_end = key->offset;
1762 if (rec->extent_end > key->offset)
1763 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1764 else if (rec->extent_end < key->offset) {
1765 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1766 key->offset - rec->extent_end);
1771 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1772 extent_type = btrfs_file_extent_type(eb, fi);
1774 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1775 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 rec->found_size += num_bytes;
1779 num_bytes = (num_bytes + mask) & ~mask;
1780 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1781 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1782 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1783 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1784 extent_offset = btrfs_file_extent_offset(eb, fi);
1785 if (num_bytes == 0 || (num_bytes & mask))
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787 if (num_bytes + extent_offset >
1788 btrfs_file_extent_ram_bytes(eb, fi))
1789 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1791 (btrfs_file_extent_compression(eb, fi) ||
1792 btrfs_file_extent_encryption(eb, fi) ||
1793 btrfs_file_extent_other_encoding(eb, fi)))
1794 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1795 if (disk_bytenr > 0)
1796 rec->found_size += num_bytes;
1798 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1800 rec->extent_end = key->offset + num_bytes;
1803 * The data reloc tree will copy full extents into its inode and then
1804 * copy the corresponding csums. Because the extent it copied could be
1805 * a preallocated extent that hasn't been written to yet there may be no
1806 * csums to copy, ergo we won't have csums for our file extent. This is
1807 * ok so just don't bother checking csums if the inode belongs to the
1810 if (disk_bytenr > 0 &&
1811 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1813 if (btrfs_file_extent_compression(eb, fi))
1814 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1816 disk_bytenr += extent_offset;
1818 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1821 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1823 rec->found_csum_item = 1;
1824 if (found < num_bytes)
1825 rec->some_csum_missing = 1;
1826 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1828 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1834 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1835 struct walk_control *wc)
1837 struct btrfs_key key;
1841 struct cache_tree *inode_cache;
1842 struct shared_node *active_node;
1844 if (wc->root_level == wc->active_node &&
1845 btrfs_root_refs(&root->root_item) == 0)
1848 active_node = wc->nodes[wc->active_node];
1849 inode_cache = &active_node->inode_cache;
1850 nritems = btrfs_header_nritems(eb);
1851 for (i = 0; i < nritems; i++) {
1852 btrfs_item_key_to_cpu(eb, &key, i);
1854 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1856 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1859 if (active_node->current == NULL ||
1860 active_node->current->ino < key.objectid) {
1861 if (active_node->current) {
1862 active_node->current->checked = 1;
1863 maybe_free_inode_rec(inode_cache,
1864 active_node->current);
1866 active_node->current = get_inode_rec(inode_cache,
1868 BUG_ON(IS_ERR(active_node->current));
1871 case BTRFS_DIR_ITEM_KEY:
1872 case BTRFS_DIR_INDEX_KEY:
1873 ret = process_dir_item(root, eb, i, &key, active_node);
1875 case BTRFS_INODE_REF_KEY:
1876 ret = process_inode_ref(eb, i, &key, active_node);
1878 case BTRFS_INODE_EXTREF_KEY:
1879 ret = process_inode_extref(eb, i, &key, active_node);
1881 case BTRFS_INODE_ITEM_KEY:
1882 ret = process_inode_item(eb, i, &key, active_node);
1884 case BTRFS_EXTENT_DATA_KEY:
1885 ret = process_file_extent(root, eb, i, &key,
1895 static void reada_walk_down(struct btrfs_root *root,
1896 struct extent_buffer *node, int slot)
1905 level = btrfs_header_level(node);
1909 nritems = btrfs_header_nritems(node);
1910 blocksize = root->nodesize;
1911 for (i = slot; i < nritems; i++) {
1912 bytenr = btrfs_node_blockptr(node, i);
1913 ptr_gen = btrfs_node_ptr_generation(node, i);
1914 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1919 * Check the child node/leaf by the following condition:
1920 * 1. the first item key of the node/leaf should be the same with the one
1922 * 2. block in parent node should match the child node/leaf.
1923 * 3. generation of parent node and child's header should be consistent.
1925 * Or the child node/leaf pointed by the key in parent is not valid.
1927 * We hope to check leaf owner too, but since subvol may share leaves,
1928 * which makes leaf owner check not so strong, key check should be
1929 * sufficient enough for that case.
1931 static int check_child_node(struct btrfs_root *root,
1932 struct extent_buffer *parent, int slot,
1933 struct extent_buffer *child)
1935 struct btrfs_key parent_key;
1936 struct btrfs_key child_key;
1939 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1940 if (btrfs_header_level(child) == 0)
1941 btrfs_item_key_to_cpu(child, &child_key, 0);
1943 btrfs_node_key_to_cpu(child, &child_key, 0);
1945 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1948 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1949 parent_key.objectid, parent_key.type, parent_key.offset,
1950 child_key.objectid, child_key.type, child_key.offset);
1952 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1954 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1955 btrfs_node_blockptr(parent, slot),
1956 btrfs_header_bytenr(child));
1958 if (btrfs_node_ptr_generation(parent, slot) !=
1959 btrfs_header_generation(child)) {
1961 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1962 btrfs_header_generation(child),
1963 btrfs_node_ptr_generation(parent, slot));
1969 u64 bytenr[BTRFS_MAX_LEVEL];
1970 u64 refs[BTRFS_MAX_LEVEL];
1973 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1974 struct walk_control *wc, int *level,
1975 struct node_refs *nrefs)
1977 enum btrfs_tree_block_status status;
1980 struct extent_buffer *next;
1981 struct extent_buffer *cur;
1986 WARN_ON(*level < 0);
1987 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1989 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1990 refs = nrefs->refs[*level];
1993 ret = btrfs_lookup_extent_info(NULL, root,
1994 path->nodes[*level]->start,
1995 *level, 1, &refs, NULL);
2000 nrefs->bytenr[*level] = path->nodes[*level]->start;
2001 nrefs->refs[*level] = refs;
2005 ret = enter_shared_node(root, path->nodes[*level]->start,
2013 while (*level >= 0) {
2014 WARN_ON(*level < 0);
2015 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2016 cur = path->nodes[*level];
2018 if (btrfs_header_level(cur) != *level)
2021 if (path->slots[*level] >= btrfs_header_nritems(cur))
2024 ret = process_one_leaf(root, cur, wc);
2029 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2030 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2031 blocksize = root->nodesize;
2033 if (bytenr == nrefs->bytenr[*level - 1]) {
2034 refs = nrefs->refs[*level - 1];
2036 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2037 *level - 1, 1, &refs, NULL);
2041 nrefs->bytenr[*level - 1] = bytenr;
2042 nrefs->refs[*level - 1] = refs;
2047 ret = enter_shared_node(root, bytenr, refs,
2050 path->slots[*level]++;
2055 next = btrfs_find_tree_block(root, bytenr, blocksize);
2056 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2057 free_extent_buffer(next);
2058 reada_walk_down(root, cur, path->slots[*level]);
2059 next = read_tree_block(root, bytenr, blocksize,
2061 if (!extent_buffer_uptodate(next)) {
2062 struct btrfs_key node_key;
2064 btrfs_node_key_to_cpu(path->nodes[*level],
2066 path->slots[*level]);
2067 btrfs_add_corrupt_extent_record(root->fs_info,
2069 path->nodes[*level]->start,
2070 root->nodesize, *level);
2076 ret = check_child_node(root, cur, path->slots[*level], next);
2082 if (btrfs_is_leaf(next))
2083 status = btrfs_check_leaf(root, NULL, next);
2085 status = btrfs_check_node(root, NULL, next);
2086 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2087 free_extent_buffer(next);
2092 *level = *level - 1;
2093 free_extent_buffer(path->nodes[*level]);
2094 path->nodes[*level] = next;
2095 path->slots[*level] = 0;
2098 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2102 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2103 struct walk_control *wc, int *level)
2106 struct extent_buffer *leaf;
2108 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2109 leaf = path->nodes[i];
2110 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2115 free_extent_buffer(path->nodes[*level]);
2116 path->nodes[*level] = NULL;
2117 BUG_ON(*level > wc->active_node);
2118 if (*level == wc->active_node)
2119 leave_shared_node(root, wc, *level);
2126 static int check_root_dir(struct inode_record *rec)
2128 struct inode_backref *backref;
2131 if (!rec->found_inode_item || rec->errors)
2133 if (rec->nlink != 1 || rec->found_link != 0)
2135 if (list_empty(&rec->backrefs))
2137 backref = to_inode_backref(rec->backrefs.next);
2138 if (!backref->found_inode_ref)
2140 if (backref->index != 0 || backref->namelen != 2 ||
2141 memcmp(backref->name, "..", 2))
2143 if (backref->found_dir_index || backref->found_dir_item)
2150 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2151 struct btrfs_root *root, struct btrfs_path *path,
2152 struct inode_record *rec)
2154 struct btrfs_inode_item *ei;
2155 struct btrfs_key key;
2158 key.objectid = rec->ino;
2159 key.type = BTRFS_INODE_ITEM_KEY;
2160 key.offset = (u64)-1;
2162 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166 if (!path->slots[0]) {
2173 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2174 if (key.objectid != rec->ino) {
2179 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2180 struct btrfs_inode_item);
2181 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2182 btrfs_mark_buffer_dirty(path->nodes[0]);
2183 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2184 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2185 root->root_key.objectid);
2187 btrfs_release_path(path);
2191 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2192 struct btrfs_root *root,
2193 struct btrfs_path *path,
2194 struct inode_record *rec)
2198 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2199 btrfs_release_path(path);
2201 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2205 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2206 struct btrfs_root *root,
2207 struct btrfs_path *path,
2208 struct inode_record *rec)
2210 struct btrfs_inode_item *ei;
2211 struct btrfs_key key;
2214 key.objectid = rec->ino;
2215 key.type = BTRFS_INODE_ITEM_KEY;
2218 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2225 /* Since ret == 0, no need to check anything */
2226 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2227 struct btrfs_inode_item);
2228 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2229 btrfs_mark_buffer_dirty(path->nodes[0]);
2230 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2231 printf("reset nbytes for ino %llu root %llu\n",
2232 rec->ino, root->root_key.objectid);
2234 btrfs_release_path(path);
2238 static int add_missing_dir_index(struct btrfs_root *root,
2239 struct cache_tree *inode_cache,
2240 struct inode_record *rec,
2241 struct inode_backref *backref)
2243 struct btrfs_path *path;
2244 struct btrfs_trans_handle *trans;
2245 struct btrfs_dir_item *dir_item;
2246 struct extent_buffer *leaf;
2247 struct btrfs_key key;
2248 struct btrfs_disk_key disk_key;
2249 struct inode_record *dir_rec;
2250 unsigned long name_ptr;
2251 u32 data_size = sizeof(*dir_item) + backref->namelen;
2254 path = btrfs_alloc_path();
2258 trans = btrfs_start_transaction(root, 1);
2259 if (IS_ERR(trans)) {
2260 btrfs_free_path(path);
2261 return PTR_ERR(trans);
2264 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2265 (unsigned long long)rec->ino);
2266 key.objectid = backref->dir;
2267 key.type = BTRFS_DIR_INDEX_KEY;
2268 key.offset = backref->index;
2270 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2273 leaf = path->nodes[0];
2274 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2276 disk_key.objectid = cpu_to_le64(rec->ino);
2277 disk_key.type = BTRFS_INODE_ITEM_KEY;
2278 disk_key.offset = 0;
2280 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2281 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2282 btrfs_set_dir_data_len(leaf, dir_item, 0);
2283 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2284 name_ptr = (unsigned long)(dir_item + 1);
2285 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2286 btrfs_mark_buffer_dirty(leaf);
2287 btrfs_free_path(path);
2288 btrfs_commit_transaction(trans, root);
2290 backref->found_dir_index = 1;
2291 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2292 BUG_ON(IS_ERR(dir_rec));
2295 dir_rec->found_size += backref->namelen;
2296 if (dir_rec->found_size == dir_rec->isize &&
2297 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2298 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2299 if (dir_rec->found_size != dir_rec->isize)
2300 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2305 static int delete_dir_index(struct btrfs_root *root,
2306 struct cache_tree *inode_cache,
2307 struct inode_record *rec,
2308 struct inode_backref *backref)
2310 struct btrfs_trans_handle *trans;
2311 struct btrfs_dir_item *di;
2312 struct btrfs_path *path;
2315 path = btrfs_alloc_path();
2319 trans = btrfs_start_transaction(root, 1);
2320 if (IS_ERR(trans)) {
2321 btrfs_free_path(path);
2322 return PTR_ERR(trans);
2326 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2327 (unsigned long long)backref->dir,
2328 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2329 (unsigned long long)root->objectid);
2331 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2332 backref->name, backref->namelen,
2333 backref->index, -1);
2336 btrfs_free_path(path);
2337 btrfs_commit_transaction(trans, root);
2344 ret = btrfs_del_item(trans, root, path);
2346 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2348 btrfs_free_path(path);
2349 btrfs_commit_transaction(trans, root);
2353 static int create_inode_item(struct btrfs_root *root,
2354 struct inode_record *rec,
2355 struct inode_backref *backref, int root_dir)
2357 struct btrfs_trans_handle *trans;
2358 struct btrfs_inode_item inode_item;
2359 time_t now = time(NULL);
2362 trans = btrfs_start_transaction(root, 1);
2363 if (IS_ERR(trans)) {
2364 ret = PTR_ERR(trans);
2368 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2369 "be incomplete, please check permissions and content after "
2370 "the fsck completes.\n", (unsigned long long)root->objectid,
2371 (unsigned long long)rec->ino);
2373 memset(&inode_item, 0, sizeof(inode_item));
2374 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2376 btrfs_set_stack_inode_nlink(&inode_item, 1);
2378 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2379 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2380 if (rec->found_dir_item) {
2381 if (rec->found_file_extent)
2382 fprintf(stderr, "root %llu inode %llu has both a dir "
2383 "item and extents, unsure if it is a dir or a "
2384 "regular file so setting it as a directory\n",
2385 (unsigned long long)root->objectid,
2386 (unsigned long long)rec->ino);
2387 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2388 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2389 } else if (!rec->found_dir_item) {
2390 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2391 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2393 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2394 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2395 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2396 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2397 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2398 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2399 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2400 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2402 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2404 btrfs_commit_transaction(trans, root);
2408 static int repair_inode_backrefs(struct btrfs_root *root,
2409 struct inode_record *rec,
2410 struct cache_tree *inode_cache,
2413 struct inode_backref *tmp, *backref;
2414 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2418 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2419 if (!delete && rec->ino == root_dirid) {
2420 if (!rec->found_inode_item) {
2421 ret = create_inode_item(root, rec, backref, 1);
2428 /* Index 0 for root dir's are special, don't mess with it */
2429 if (rec->ino == root_dirid && backref->index == 0)
2433 ((backref->found_dir_index && !backref->found_inode_ref) ||
2434 (backref->found_dir_index && backref->found_inode_ref &&
2435 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2436 ret = delete_dir_index(root, inode_cache, rec, backref);
2440 list_del(&backref->list);
2444 if (!delete && !backref->found_dir_index &&
2445 backref->found_dir_item && backref->found_inode_ref) {
2446 ret = add_missing_dir_index(root, inode_cache, rec,
2451 if (backref->found_dir_item &&
2452 backref->found_dir_index &&
2453 backref->found_dir_index) {
2454 if (!backref->errors &&
2455 backref->found_inode_ref) {
2456 list_del(&backref->list);
2462 if (!delete && (!backref->found_dir_index &&
2463 !backref->found_dir_item &&
2464 backref->found_inode_ref)) {
2465 struct btrfs_trans_handle *trans;
2466 struct btrfs_key location;
2468 ret = check_dir_conflict(root, backref->name,
2474 * let nlink fixing routine to handle it,
2475 * which can do it better.
2480 location.objectid = rec->ino;
2481 location.type = BTRFS_INODE_ITEM_KEY;
2482 location.offset = 0;
2484 trans = btrfs_start_transaction(root, 1);
2485 if (IS_ERR(trans)) {
2486 ret = PTR_ERR(trans);
2489 fprintf(stderr, "adding missing dir index/item pair "
2491 (unsigned long long)rec->ino);
2492 ret = btrfs_insert_dir_item(trans, root, backref->name,
2494 backref->dir, &location,
2495 imode_to_type(rec->imode),
2498 btrfs_commit_transaction(trans, root);
2502 if (!delete && (backref->found_inode_ref &&
2503 backref->found_dir_index &&
2504 backref->found_dir_item &&
2505 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2506 !rec->found_inode_item)) {
2507 ret = create_inode_item(root, rec, backref, 0);
2514 return ret ? ret : repaired;
2518 * To determine the file type for nlink/inode_item repair
2520 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2521 * Return -ENOENT if file type is not found.
2523 static int find_file_type(struct inode_record *rec, u8 *type)
2525 struct inode_backref *backref;
2527 /* For inode item recovered case */
2528 if (rec->found_inode_item) {
2529 *type = imode_to_type(rec->imode);
2533 list_for_each_entry(backref, &rec->backrefs, list) {
2534 if (backref->found_dir_index || backref->found_dir_item) {
2535 *type = backref->filetype;
2543 * To determine the file name for nlink repair
2545 * Return 0 if file name is found, set name and namelen.
2546 * Return -ENOENT if file name is not found.
2548 static int find_file_name(struct inode_record *rec,
2549 char *name, int *namelen)
2551 struct inode_backref *backref;
2553 list_for_each_entry(backref, &rec->backrefs, list) {
2554 if (backref->found_dir_index || backref->found_dir_item ||
2555 backref->found_inode_ref) {
2556 memcpy(name, backref->name, backref->namelen);
2557 *namelen = backref->namelen;
2564 /* Reset the nlink of the inode to the correct one */
2565 static int reset_nlink(struct btrfs_trans_handle *trans,
2566 struct btrfs_root *root,
2567 struct btrfs_path *path,
2568 struct inode_record *rec)
2570 struct inode_backref *backref;
2571 struct inode_backref *tmp;
2572 struct btrfs_key key;
2573 struct btrfs_inode_item *inode_item;
2576 /* We don't believe this either, reset it and iterate backref */
2577 rec->found_link = 0;
2579 /* Remove all backref including the valid ones */
2580 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2581 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2582 backref->index, backref->name,
2583 backref->namelen, 0);
2587 /* remove invalid backref, so it won't be added back */
2588 if (!(backref->found_dir_index &&
2589 backref->found_dir_item &&
2590 backref->found_inode_ref)) {
2591 list_del(&backref->list);
2598 /* Set nlink to 0 */
2599 key.objectid = rec->ino;
2600 key.type = BTRFS_INODE_ITEM_KEY;
2602 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2609 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2610 struct btrfs_inode_item);
2611 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2612 btrfs_mark_buffer_dirty(path->nodes[0]);
2613 btrfs_release_path(path);
2616 * Add back valid inode_ref/dir_item/dir_index,
2617 * add_link() will handle the nlink inc, so new nlink must be correct
2619 list_for_each_entry(backref, &rec->backrefs, list) {
2620 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2621 backref->name, backref->namelen,
2622 backref->filetype, &backref->index, 1);
2627 btrfs_release_path(path);
2631 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2632 struct btrfs_root *root,
2633 struct btrfs_path *path,
2634 struct inode_record *rec)
2636 char *dir_name = "lost+found";
2637 char namebuf[BTRFS_NAME_LEN] = {0};
2642 int name_recovered = 0;
2643 int type_recovered = 0;
2647 * Get file name and type first before these invalid inode ref
2648 * are deleted by remove_all_invalid_backref()
2650 name_recovered = !find_file_name(rec, namebuf, &namelen);
2651 type_recovered = !find_file_type(rec, &type);
2653 if (!name_recovered) {
2654 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2655 rec->ino, rec->ino);
2656 namelen = count_digits(rec->ino);
2657 sprintf(namebuf, "%llu", rec->ino);
2660 if (!type_recovered) {
2661 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2663 type = BTRFS_FT_REG_FILE;
2667 ret = reset_nlink(trans, root, path, rec);
2670 "Failed to reset nlink for inode %llu: %s\n",
2671 rec->ino, strerror(-ret));
2675 if (rec->found_link == 0) {
2676 lost_found_ino = root->highest_inode;
2677 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2682 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2683 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2686 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2687 dir_name, strerror(-ret));
2690 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2691 namebuf, namelen, type, NULL, 1);
2693 * Add ".INO" suffix several times to handle case where
2694 * "FILENAME.INO" is already taken by another file.
2696 while (ret == -EEXIST) {
2698 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2700 if (namelen + count_digits(rec->ino) + 1 >
2705 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2707 namelen += count_digits(rec->ino) + 1;
2708 ret = btrfs_add_link(trans, root, rec->ino,
2709 lost_found_ino, namebuf,
2710 namelen, type, NULL, 1);
2714 "Failed to link the inode %llu to %s dir: %s\n",
2715 rec->ino, dir_name, strerror(-ret));
2719 * Just increase the found_link, don't actually add the
2720 * backref. This will make things easier and this inode
2721 * record will be freed after the repair is done.
2722 * So fsck will not report problem about this inode.
2725 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2726 namelen, namebuf, dir_name);
2728 printf("Fixed the nlink of inode %llu\n", rec->ino);
2731 * Clear the flag anyway, or we will loop forever for the same inode
2732 * as it will not be removed from the bad inode list and the dead loop
2735 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2736 btrfs_release_path(path);
2741 * Check if there is any normal(reg or prealloc) file extent for given
2743 * This is used to determine the file type when neither its dir_index/item or
2744 * inode_item exists.
2746 * This will *NOT* report error, if any error happens, just consider it does
2747 * not have any normal file extent.
2749 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2751 struct btrfs_path *path;
2752 struct btrfs_key key;
2753 struct btrfs_key found_key;
2754 struct btrfs_file_extent_item *fi;
2758 path = btrfs_alloc_path();
2762 key.type = BTRFS_EXTENT_DATA_KEY;
2765 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2770 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2771 ret = btrfs_next_leaf(root, path);
2778 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2780 if (found_key.objectid != ino ||
2781 found_key.type != BTRFS_EXTENT_DATA_KEY)
2783 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2784 struct btrfs_file_extent_item);
2785 type = btrfs_file_extent_type(path->nodes[0], fi);
2786 if (type != BTRFS_FILE_EXTENT_INLINE) {
2792 btrfs_free_path(path);
2796 static u32 btrfs_type_to_imode(u8 type)
2798 static u32 imode_by_btrfs_type[] = {
2799 [BTRFS_FT_REG_FILE] = S_IFREG,
2800 [BTRFS_FT_DIR] = S_IFDIR,
2801 [BTRFS_FT_CHRDEV] = S_IFCHR,
2802 [BTRFS_FT_BLKDEV] = S_IFBLK,
2803 [BTRFS_FT_FIFO] = S_IFIFO,
2804 [BTRFS_FT_SOCK] = S_IFSOCK,
2805 [BTRFS_FT_SYMLINK] = S_IFLNK,
2808 return imode_by_btrfs_type[(type)];
2811 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2812 struct btrfs_root *root,
2813 struct btrfs_path *path,
2814 struct inode_record *rec)
2818 int type_recovered = 0;
2821 printf("Trying to rebuild inode:%llu\n", rec->ino);
2823 type_recovered = !find_file_type(rec, &filetype);
2826 * Try to determine inode type if type not found.
2828 * For found regular file extent, it must be FILE.
2829 * For found dir_item/index, it must be DIR.
2831 * For undetermined one, use FILE as fallback.
2834 * 1. If found backref(inode_index/item is already handled) to it,
2836 * Need new inode-inode ref structure to allow search for that.
2838 if (!type_recovered) {
2839 if (rec->found_file_extent &&
2840 find_normal_file_extent(root, rec->ino)) {
2842 filetype = BTRFS_FT_REG_FILE;
2843 } else if (rec->found_dir_item) {
2845 filetype = BTRFS_FT_DIR;
2846 } else if (!list_empty(&rec->orphan_extents)) {
2848 filetype = BTRFS_FT_REG_FILE;
2850 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2853 filetype = BTRFS_FT_REG_FILE;
2857 ret = btrfs_new_inode(trans, root, rec->ino,
2858 mode | btrfs_type_to_imode(filetype));
2863 * Here inode rebuild is done, we only rebuild the inode item,
2864 * don't repair the nlink(like move to lost+found).
2865 * That is the job of nlink repair.
2867 * We just fill the record and return
2869 rec->found_dir_item = 1;
2870 rec->imode = mode | btrfs_type_to_imode(filetype);
2872 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2873 /* Ensure the inode_nlinks repair function will be called */
2874 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2879 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2880 struct btrfs_root *root,
2881 struct btrfs_path *path,
2882 struct inode_record *rec)
2884 struct orphan_data_extent *orphan;
2885 struct orphan_data_extent *tmp;
2888 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2890 * Check for conflicting file extents
2892 * Here we don't know whether the extents is compressed or not,
2893 * so we can only assume it not compressed nor data offset,
2894 * and use its disk_len as extent length.
2896 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2897 orphan->offset, orphan->disk_len, 0);
2898 btrfs_release_path(path);
2903 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2904 orphan->disk_bytenr, orphan->disk_len);
2905 ret = btrfs_free_extent(trans,
2906 root->fs_info->extent_root,
2907 orphan->disk_bytenr, orphan->disk_len,
2908 0, root->objectid, orphan->objectid,
2913 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2914 orphan->offset, orphan->disk_bytenr,
2915 orphan->disk_len, orphan->disk_len);
2919 /* Update file size info */
2920 rec->found_size += orphan->disk_len;
2921 if (rec->found_size == rec->nbytes)
2922 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2924 /* Update the file extent hole info too */
2925 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2929 if (RB_EMPTY_ROOT(&rec->holes))
2930 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2932 list_del(&orphan->list);
2935 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2940 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2941 struct btrfs_root *root,
2942 struct btrfs_path *path,
2943 struct inode_record *rec)
2945 struct rb_node *node;
2946 struct file_extent_hole *hole;
2950 node = rb_first(&rec->holes);
2954 hole = rb_entry(node, struct file_extent_hole, node);
2955 ret = btrfs_punch_hole(trans, root, rec->ino,
2956 hole->start, hole->len);
2959 ret = del_file_extent_hole(&rec->holes, hole->start,
2963 if (RB_EMPTY_ROOT(&rec->holes))
2964 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2965 node = rb_first(&rec->holes);
2967 /* special case for a file losing all its file extent */
2969 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2970 round_up(rec->isize, root->sectorsize));
2974 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2975 rec->ino, root->objectid);
2980 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2982 struct btrfs_trans_handle *trans;
2983 struct btrfs_path *path;
2986 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2987 I_ERR_NO_ORPHAN_ITEM |
2988 I_ERR_LINK_COUNT_WRONG |
2989 I_ERR_NO_INODE_ITEM |
2990 I_ERR_FILE_EXTENT_ORPHAN |
2991 I_ERR_FILE_EXTENT_DISCOUNT|
2992 I_ERR_FILE_NBYTES_WRONG)))
2995 path = btrfs_alloc_path();
3000 * For nlink repair, it may create a dir and add link, so
3001 * 2 for parent(256)'s dir_index and dir_item
3002 * 2 for lost+found dir's inode_item and inode_ref
3003 * 1 for the new inode_ref of the file
3004 * 2 for lost+found dir's dir_index and dir_item for the file
3006 trans = btrfs_start_transaction(root, 7);
3007 if (IS_ERR(trans)) {
3008 btrfs_free_path(path);
3009 return PTR_ERR(trans);
3012 if (rec->errors & I_ERR_NO_INODE_ITEM)
3013 ret = repair_inode_no_item(trans, root, path, rec);
3014 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3015 ret = repair_inode_orphan_extent(trans, root, path, rec);
3016 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3017 ret = repair_inode_discount_extent(trans, root, path, rec);
3018 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3019 ret = repair_inode_isize(trans, root, path, rec);
3020 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3021 ret = repair_inode_orphan_item(trans, root, path, rec);
3022 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3023 ret = repair_inode_nlinks(trans, root, path, rec);
3024 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3025 ret = repair_inode_nbytes(trans, root, path, rec);
3026 btrfs_commit_transaction(trans, root);
3027 btrfs_free_path(path);
3031 static int check_inode_recs(struct btrfs_root *root,
3032 struct cache_tree *inode_cache)
3034 struct cache_extent *cache;
3035 struct ptr_node *node;
3036 struct inode_record *rec;
3037 struct inode_backref *backref;
3042 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3044 if (btrfs_root_refs(&root->root_item) == 0) {
3045 if (!cache_tree_empty(inode_cache))
3046 fprintf(stderr, "warning line %d\n", __LINE__);
3051 * We need to record the highest inode number for later 'lost+found'
3053 * We must select an ino not used/referred by any existing inode, or
3054 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3055 * this may cause 'lost+found' dir has wrong nlinks.
3057 cache = last_cache_extent(inode_cache);
3059 node = container_of(cache, struct ptr_node, cache);
3061 if (rec->ino > root->highest_inode)
3062 root->highest_inode = rec->ino;
3066 * We need to repair backrefs first because we could change some of the
3067 * errors in the inode recs.
3069 * We also need to go through and delete invalid backrefs first and then
3070 * add the correct ones second. We do this because we may get EEXIST
3071 * when adding back the correct index because we hadn't yet deleted the
3074 * For example, if we were missing a dir index then the directories
3075 * isize would be wrong, so if we fixed the isize to what we thought it
3076 * would be and then fixed the backref we'd still have a invalid fs, so
3077 * we need to add back the dir index and then check to see if the isize
3082 if (stage == 3 && !err)
3085 cache = search_cache_extent(inode_cache, 0);
3086 while (repair && cache) {
3087 node = container_of(cache, struct ptr_node, cache);
3089 cache = next_cache_extent(cache);
3091 /* Need to free everything up and rescan */
3093 remove_cache_extent(inode_cache, &node->cache);
3095 free_inode_rec(rec);
3099 if (list_empty(&rec->backrefs))
3102 ret = repair_inode_backrefs(root, rec, inode_cache,
3116 rec = get_inode_rec(inode_cache, root_dirid, 0);
3117 BUG_ON(IS_ERR(rec));
3119 ret = check_root_dir(rec);
3121 fprintf(stderr, "root %llu root dir %llu error\n",
3122 (unsigned long long)root->root_key.objectid,
3123 (unsigned long long)root_dirid);
3124 print_inode_error(root, rec);
3129 struct btrfs_trans_handle *trans;
3131 trans = btrfs_start_transaction(root, 1);
3132 if (IS_ERR(trans)) {
3133 err = PTR_ERR(trans);
3138 "root %llu missing its root dir, recreating\n",
3139 (unsigned long long)root->objectid);
3141 ret = btrfs_make_root_dir(trans, root, root_dirid);
3144 btrfs_commit_transaction(trans, root);
3148 fprintf(stderr, "root %llu root dir %llu not found\n",
3149 (unsigned long long)root->root_key.objectid,
3150 (unsigned long long)root_dirid);
3154 cache = search_cache_extent(inode_cache, 0);
3157 node = container_of(cache, struct ptr_node, cache);
3159 remove_cache_extent(inode_cache, &node->cache);
3161 if (rec->ino == root_dirid ||
3162 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3163 free_inode_rec(rec);
3167 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3168 ret = check_orphan_item(root, rec->ino);
3170 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3171 if (can_free_inode_rec(rec)) {
3172 free_inode_rec(rec);
3177 if (!rec->found_inode_item)
3178 rec->errors |= I_ERR_NO_INODE_ITEM;
3179 if (rec->found_link != rec->nlink)
3180 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3182 ret = try_repair_inode(root, rec);
3183 if (ret == 0 && can_free_inode_rec(rec)) {
3184 free_inode_rec(rec);
3190 if (!(repair && ret == 0))
3192 print_inode_error(root, rec);
3193 list_for_each_entry(backref, &rec->backrefs, list) {
3194 if (!backref->found_dir_item)
3195 backref->errors |= REF_ERR_NO_DIR_ITEM;
3196 if (!backref->found_dir_index)
3197 backref->errors |= REF_ERR_NO_DIR_INDEX;
3198 if (!backref->found_inode_ref)
3199 backref->errors |= REF_ERR_NO_INODE_REF;
3200 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3201 " namelen %u name %s filetype %d errors %x",
3202 (unsigned long long)backref->dir,
3203 (unsigned long long)backref->index,
3204 backref->namelen, backref->name,
3205 backref->filetype, backref->errors);
3206 print_ref_error(backref->errors);
3208 free_inode_rec(rec);
3210 return (error > 0) ? -1 : 0;
3213 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3216 struct cache_extent *cache;
3217 struct root_record *rec = NULL;
3220 cache = lookup_cache_extent(root_cache, objectid, 1);
3222 rec = container_of(cache, struct root_record, cache);
3224 rec = calloc(1, sizeof(*rec));
3226 return ERR_PTR(-ENOMEM);
3227 rec->objectid = objectid;
3228 INIT_LIST_HEAD(&rec->backrefs);
3229 rec->cache.start = objectid;
3230 rec->cache.size = 1;
3232 ret = insert_cache_extent(root_cache, &rec->cache);
3234 return ERR_PTR(-EEXIST);
3239 static struct root_backref *get_root_backref(struct root_record *rec,
3240 u64 ref_root, u64 dir, u64 index,
3241 const char *name, int namelen)
3243 struct root_backref *backref;
3245 list_for_each_entry(backref, &rec->backrefs, list) {
3246 if (backref->ref_root != ref_root || backref->dir != dir ||
3247 backref->namelen != namelen)
3249 if (memcmp(name, backref->name, namelen))
3254 backref = calloc(1, sizeof(*backref) + namelen + 1);
3257 backref->ref_root = ref_root;
3259 backref->index = index;
3260 backref->namelen = namelen;
3261 memcpy(backref->name, name, namelen);
3262 backref->name[namelen] = '\0';
3263 list_add_tail(&backref->list, &rec->backrefs);
3267 static void free_root_record(struct cache_extent *cache)
3269 struct root_record *rec;
3270 struct root_backref *backref;
3272 rec = container_of(cache, struct root_record, cache);
3273 while (!list_empty(&rec->backrefs)) {
3274 backref = to_root_backref(rec->backrefs.next);
3275 list_del(&backref->list);
3282 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3284 static int add_root_backref(struct cache_tree *root_cache,
3285 u64 root_id, u64 ref_root, u64 dir, u64 index,
3286 const char *name, int namelen,
3287 int item_type, int errors)
3289 struct root_record *rec;
3290 struct root_backref *backref;
3292 rec = get_root_rec(root_cache, root_id);
3293 BUG_ON(IS_ERR(rec));
3294 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3297 backref->errors |= errors;
3299 if (item_type != BTRFS_DIR_ITEM_KEY) {
3300 if (backref->found_dir_index || backref->found_back_ref ||
3301 backref->found_forward_ref) {
3302 if (backref->index != index)
3303 backref->errors |= REF_ERR_INDEX_UNMATCH;
3305 backref->index = index;
3309 if (item_type == BTRFS_DIR_ITEM_KEY) {
3310 if (backref->found_forward_ref)
3312 backref->found_dir_item = 1;
3313 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3314 backref->found_dir_index = 1;
3315 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3316 if (backref->found_forward_ref)
3317 backref->errors |= REF_ERR_DUP_ROOT_REF;
3318 else if (backref->found_dir_item)
3320 backref->found_forward_ref = 1;
3321 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3322 if (backref->found_back_ref)
3323 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3324 backref->found_back_ref = 1;
3329 if (backref->found_forward_ref && backref->found_dir_item)
3330 backref->reachable = 1;
3334 static int merge_root_recs(struct btrfs_root *root,
3335 struct cache_tree *src_cache,
3336 struct cache_tree *dst_cache)
3338 struct cache_extent *cache;
3339 struct ptr_node *node;
3340 struct inode_record *rec;
3341 struct inode_backref *backref;
3344 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3345 free_inode_recs_tree(src_cache);
3350 cache = search_cache_extent(src_cache, 0);
3353 node = container_of(cache, struct ptr_node, cache);
3355 remove_cache_extent(src_cache, &node->cache);
3358 ret = is_child_root(root, root->objectid, rec->ino);
3364 list_for_each_entry(backref, &rec->backrefs, list) {
3365 BUG_ON(backref->found_inode_ref);
3366 if (backref->found_dir_item)
3367 add_root_backref(dst_cache, rec->ino,
3368 root->root_key.objectid, backref->dir,
3369 backref->index, backref->name,
3370 backref->namelen, BTRFS_DIR_ITEM_KEY,
3372 if (backref->found_dir_index)
3373 add_root_backref(dst_cache, rec->ino,
3374 root->root_key.objectid, backref->dir,
3375 backref->index, backref->name,
3376 backref->namelen, BTRFS_DIR_INDEX_KEY,
3380 free_inode_rec(rec);
3387 static int check_root_refs(struct btrfs_root *root,
3388 struct cache_tree *root_cache)
3390 struct root_record *rec;
3391 struct root_record *ref_root;
3392 struct root_backref *backref;
3393 struct cache_extent *cache;
3399 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3400 BUG_ON(IS_ERR(rec));
3403 /* fixme: this can not detect circular references */
3406 cache = search_cache_extent(root_cache, 0);
3410 rec = container_of(cache, struct root_record, cache);
3411 cache = next_cache_extent(cache);
3413 if (rec->found_ref == 0)
3416 list_for_each_entry(backref, &rec->backrefs, list) {
3417 if (!backref->reachable)
3420 ref_root = get_root_rec(root_cache,
3422 BUG_ON(IS_ERR(ref_root));
3423 if (ref_root->found_ref > 0)
3426 backref->reachable = 0;
3428 if (rec->found_ref == 0)
3434 cache = search_cache_extent(root_cache, 0);
3438 rec = container_of(cache, struct root_record, cache);
3439 cache = next_cache_extent(cache);
3441 if (rec->found_ref == 0 &&
3442 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3443 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3444 ret = check_orphan_item(root->fs_info->tree_root,
3450 * If we don't have a root item then we likely just have
3451 * a dir item in a snapshot for this root but no actual
3452 * ref key or anything so it's meaningless.
3454 if (!rec->found_root_item)
3457 fprintf(stderr, "fs tree %llu not referenced\n",
3458 (unsigned long long)rec->objectid);
3462 if (rec->found_ref > 0 && !rec->found_root_item)
3464 list_for_each_entry(backref, &rec->backrefs, list) {
3465 if (!backref->found_dir_item)
3466 backref->errors |= REF_ERR_NO_DIR_ITEM;
3467 if (!backref->found_dir_index)
3468 backref->errors |= REF_ERR_NO_DIR_INDEX;
3469 if (!backref->found_back_ref)
3470 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3471 if (!backref->found_forward_ref)
3472 backref->errors |= REF_ERR_NO_ROOT_REF;
3473 if (backref->reachable && backref->errors)
3480 fprintf(stderr, "fs tree %llu refs %u %s\n",
3481 (unsigned long long)rec->objectid, rec->found_ref,
3482 rec->found_root_item ? "" : "not found");
3484 list_for_each_entry(backref, &rec->backrefs, list) {
3485 if (!backref->reachable)
3487 if (!backref->errors && rec->found_root_item)
3489 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3490 " index %llu namelen %u name %s errors %x\n",
3491 (unsigned long long)backref->ref_root,
3492 (unsigned long long)backref->dir,
3493 (unsigned long long)backref->index,
3494 backref->namelen, backref->name,
3496 print_ref_error(backref->errors);
3499 return errors > 0 ? 1 : 0;
3502 static int process_root_ref(struct extent_buffer *eb, int slot,
3503 struct btrfs_key *key,
3504 struct cache_tree *root_cache)
3510 struct btrfs_root_ref *ref;
3511 char namebuf[BTRFS_NAME_LEN];
3514 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3516 dirid = btrfs_root_ref_dirid(eb, ref);
3517 index = btrfs_root_ref_sequence(eb, ref);
3518 name_len = btrfs_root_ref_name_len(eb, ref);
3520 if (name_len <= BTRFS_NAME_LEN) {
3524 len = BTRFS_NAME_LEN;
3525 error = REF_ERR_NAME_TOO_LONG;
3527 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3529 if (key->type == BTRFS_ROOT_REF_KEY) {
3530 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3531 index, namebuf, len, key->type, error);
3533 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3534 index, namebuf, len, key->type, error);
3539 static void free_corrupt_block(struct cache_extent *cache)
3541 struct btrfs_corrupt_block *corrupt;
3543 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3547 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3550 * Repair the btree of the given root.
3552 * The fix is to remove the node key in corrupt_blocks cache_tree.
3553 * and rebalance the tree.
3554 * After the fix, the btree should be writeable.
3556 static int repair_btree(struct btrfs_root *root,
3557 struct cache_tree *corrupt_blocks)
3559 struct btrfs_trans_handle *trans;
3560 struct btrfs_path *path;
3561 struct btrfs_corrupt_block *corrupt;
3562 struct cache_extent *cache;
3563 struct btrfs_key key;
3568 if (cache_tree_empty(corrupt_blocks))
3571 path = btrfs_alloc_path();
3575 trans = btrfs_start_transaction(root, 1);
3576 if (IS_ERR(trans)) {
3577 ret = PTR_ERR(trans);
3578 fprintf(stderr, "Error starting transaction: %s\n",
3582 cache = first_cache_extent(corrupt_blocks);
3584 corrupt = container_of(cache, struct btrfs_corrupt_block,
3586 level = corrupt->level;
3587 path->lowest_level = level;
3588 key.objectid = corrupt->key.objectid;
3589 key.type = corrupt->key.type;
3590 key.offset = corrupt->key.offset;
3593 * Here we don't want to do any tree balance, since it may
3594 * cause a balance with corrupted brother leaf/node,
3595 * so ins_len set to 0 here.
3596 * Balance will be done after all corrupt node/leaf is deleted.
3598 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3601 offset = btrfs_node_blockptr(path->nodes[level],
3602 path->slots[level]);
3604 /* Remove the ptr */
3605 ret = btrfs_del_ptr(trans, root, path, level,
3606 path->slots[level]);
3610 * Remove the corresponding extent
3611 * return value is not concerned.
3613 btrfs_release_path(path);
3614 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3615 0, root->root_key.objectid,
3617 cache = next_cache_extent(cache);
3620 /* Balance the btree using btrfs_search_slot() */
3621 cache = first_cache_extent(corrupt_blocks);
3623 corrupt = container_of(cache, struct btrfs_corrupt_block,
3625 memcpy(&key, &corrupt->key, sizeof(key));
3626 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3629 /* return will always >0 since it won't find the item */
3631 btrfs_release_path(path);
3632 cache = next_cache_extent(cache);
3635 btrfs_commit_transaction(trans, root);
3637 btrfs_free_path(path);
3641 static int check_fs_root(struct btrfs_root *root,
3642 struct cache_tree *root_cache,
3643 struct walk_control *wc)
3649 struct btrfs_path path;
3650 struct shared_node root_node;
3651 struct root_record *rec;
3652 struct btrfs_root_item *root_item = &root->root_item;
3653 struct cache_tree corrupt_blocks;
3654 struct orphan_data_extent *orphan;
3655 struct orphan_data_extent *tmp;
3656 enum btrfs_tree_block_status status;
3657 struct node_refs nrefs;
3660 * Reuse the corrupt_block cache tree to record corrupted tree block
3662 * Unlike the usage in extent tree check, here we do it in a per
3663 * fs/subvol tree base.
3665 cache_tree_init(&corrupt_blocks);
3666 root->fs_info->corrupt_blocks = &corrupt_blocks;
3668 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3669 rec = get_root_rec(root_cache, root->root_key.objectid);
3670 BUG_ON(IS_ERR(rec));
3671 if (btrfs_root_refs(root_item) > 0)
3672 rec->found_root_item = 1;
3675 btrfs_init_path(&path);
3676 memset(&root_node, 0, sizeof(root_node));
3677 cache_tree_init(&root_node.root_cache);
3678 cache_tree_init(&root_node.inode_cache);
3679 memset(&nrefs, 0, sizeof(nrefs));
3681 /* Move the orphan extent record to corresponding inode_record */
3682 list_for_each_entry_safe(orphan, tmp,
3683 &root->orphan_data_extents, list) {
3684 struct inode_record *inode;
3686 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3688 BUG_ON(IS_ERR(inode));
3689 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3690 list_move(&orphan->list, &inode->orphan_extents);
3693 level = btrfs_header_level(root->node);
3694 memset(wc->nodes, 0, sizeof(wc->nodes));
3695 wc->nodes[level] = &root_node;
3696 wc->active_node = level;
3697 wc->root_level = level;
3699 /* We may not have checked the root block, lets do that now */
3700 if (btrfs_is_leaf(root->node))
3701 status = btrfs_check_leaf(root, NULL, root->node);
3703 status = btrfs_check_node(root, NULL, root->node);
3704 if (status != BTRFS_TREE_BLOCK_CLEAN)
3707 if (btrfs_root_refs(root_item) > 0 ||
3708 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3709 path.nodes[level] = root->node;
3710 extent_buffer_get(root->node);
3711 path.slots[level] = 0;
3713 struct btrfs_key key;
3714 struct btrfs_disk_key found_key;
3716 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3717 level = root_item->drop_level;
3718 path.lowest_level = level;
3719 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3722 btrfs_node_key(path.nodes[level], &found_key,
3724 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3725 sizeof(found_key)));
3729 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3735 wret = walk_up_tree(root, &path, wc, &level);
3742 btrfs_release_path(&path);
3744 if (!cache_tree_empty(&corrupt_blocks)) {
3745 struct cache_extent *cache;
3746 struct btrfs_corrupt_block *corrupt;
3748 printf("The following tree block(s) is corrupted in tree %llu:\n",
3749 root->root_key.objectid);
3750 cache = first_cache_extent(&corrupt_blocks);
3752 corrupt = container_of(cache,
3753 struct btrfs_corrupt_block,
3755 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3756 cache->start, corrupt->level,
3757 corrupt->key.objectid, corrupt->key.type,
3758 corrupt->key.offset);
3759 cache = next_cache_extent(cache);
3762 printf("Try to repair the btree for root %llu\n",
3763 root->root_key.objectid);
3764 ret = repair_btree(root, &corrupt_blocks);
3766 fprintf(stderr, "Failed to repair btree: %s\n",
3769 printf("Btree for root %llu is fixed\n",
3770 root->root_key.objectid);
3774 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3778 if (root_node.current) {
3779 root_node.current->checked = 1;
3780 maybe_free_inode_rec(&root_node.inode_cache,
3784 err = check_inode_recs(root, &root_node.inode_cache);
3788 free_corrupt_blocks_tree(&corrupt_blocks);
3789 root->fs_info->corrupt_blocks = NULL;
3790 free_orphan_data_extents(&root->orphan_data_extents);
3794 static int fs_root_objectid(u64 objectid)
3796 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3797 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3799 return is_fstree(objectid);
3802 static int check_fs_roots(struct btrfs_root *root,
3803 struct cache_tree *root_cache)
3805 struct btrfs_path path;
3806 struct btrfs_key key;
3807 struct walk_control wc;
3808 struct extent_buffer *leaf, *tree_node;
3809 struct btrfs_root *tmp_root;
3810 struct btrfs_root *tree_root = root->fs_info->tree_root;
3814 if (ctx.progress_enabled) {
3815 ctx.tp = TASK_FS_ROOTS;
3816 task_start(ctx.info);
3820 * Just in case we made any changes to the extent tree that weren't
3821 * reflected into the free space cache yet.
3824 reset_cached_block_groups(root->fs_info);
3825 memset(&wc, 0, sizeof(wc));
3826 cache_tree_init(&wc.shared);
3827 btrfs_init_path(&path);
3832 key.type = BTRFS_ROOT_ITEM_KEY;
3833 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3838 tree_node = tree_root->node;
3840 if (tree_node != tree_root->node) {
3841 free_root_recs_tree(root_cache);
3842 btrfs_release_path(&path);
3845 leaf = path.nodes[0];
3846 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3847 ret = btrfs_next_leaf(tree_root, &path);
3853 leaf = path.nodes[0];
3855 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3856 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3857 fs_root_objectid(key.objectid)) {
3858 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3859 tmp_root = btrfs_read_fs_root_no_cache(
3860 root->fs_info, &key);
3862 key.offset = (u64)-1;
3863 tmp_root = btrfs_read_fs_root(
3864 root->fs_info, &key);
3866 if (IS_ERR(tmp_root)) {
3870 ret = check_fs_root(tmp_root, root_cache, &wc);
3871 if (ret == -EAGAIN) {
3872 free_root_recs_tree(root_cache);
3873 btrfs_release_path(&path);
3878 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3879 btrfs_free_fs_root(tmp_root);
3880 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3881 key.type == BTRFS_ROOT_BACKREF_KEY) {
3882 process_root_ref(leaf, path.slots[0], &key,
3889 btrfs_release_path(&path);
3891 free_extent_cache_tree(&wc.shared);
3892 if (!cache_tree_empty(&wc.shared))
3893 fprintf(stderr, "warning line %d\n", __LINE__);
3895 task_stop(ctx.info);
3900 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3903 struct extent_backref *back;
3904 struct tree_backref *tback;
3905 struct data_backref *dback;
3909 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3910 back = rb_node_to_extent_backref(n);
3911 if (!back->found_extent_tree) {
3915 if (back->is_data) {
3916 dback = to_data_backref(back);
3917 fprintf(stderr, "Backref %llu %s %llu"
3918 " owner %llu offset %llu num_refs %lu"
3919 " not found in extent tree\n",
3920 (unsigned long long)rec->start,
3921 back->full_backref ?
3923 back->full_backref ?
3924 (unsigned long long)dback->parent:
3925 (unsigned long long)dback->root,
3926 (unsigned long long)dback->owner,
3927 (unsigned long long)dback->offset,
3928 (unsigned long)dback->num_refs);
3930 tback = to_tree_backref(back);
3931 fprintf(stderr, "Backref %llu parent %llu"
3932 " root %llu not found in extent tree\n",
3933 (unsigned long long)rec->start,
3934 (unsigned long long)tback->parent,
3935 (unsigned long long)tback->root);
3938 if (!back->is_data && !back->found_ref) {
3942 tback = to_tree_backref(back);
3943 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3944 (unsigned long long)rec->start,
3945 back->full_backref ? "parent" : "root",
3946 back->full_backref ?
3947 (unsigned long long)tback->parent :
3948 (unsigned long long)tback->root, back);
3950 if (back->is_data) {
3951 dback = to_data_backref(back);
3952 if (dback->found_ref != dback->num_refs) {
3956 fprintf(stderr, "Incorrect local backref count"
3957 " on %llu %s %llu owner %llu"
3958 " offset %llu found %u wanted %u back %p\n",
3959 (unsigned long long)rec->start,
3960 back->full_backref ?
3962 back->full_backref ?
3963 (unsigned long long)dback->parent:
3964 (unsigned long long)dback->root,
3965 (unsigned long long)dback->owner,
3966 (unsigned long long)dback->offset,
3967 dback->found_ref, dback->num_refs, back);
3969 if (dback->disk_bytenr != rec->start) {
3973 fprintf(stderr, "Backref disk bytenr does not"
3974 " match extent record, bytenr=%llu, "
3975 "ref bytenr=%llu\n",
3976 (unsigned long long)rec->start,
3977 (unsigned long long)dback->disk_bytenr);
3980 if (dback->bytes != rec->nr) {
3984 fprintf(stderr, "Backref bytes do not match "
3985 "extent backref, bytenr=%llu, ref "
3986 "bytes=%llu, backref bytes=%llu\n",
3987 (unsigned long long)rec->start,
3988 (unsigned long long)rec->nr,
3989 (unsigned long long)dback->bytes);
3992 if (!back->is_data) {
3995 dback = to_data_backref(back);
3996 found += dback->found_ref;
3999 if (found != rec->refs) {
4003 fprintf(stderr, "Incorrect global backref count "
4004 "on %llu found %llu wanted %llu\n",
4005 (unsigned long long)rec->start,
4006 (unsigned long long)found,
4007 (unsigned long long)rec->refs);
4013 static void __free_one_backref(struct rb_node *node)
4015 struct extent_backref *back = rb_node_to_extent_backref(node);
4020 static void free_all_extent_backrefs(struct extent_record *rec)
4022 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4025 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4026 struct cache_tree *extent_cache)
4028 struct cache_extent *cache;
4029 struct extent_record *rec;
4032 cache = first_cache_extent(extent_cache);
4035 rec = container_of(cache, struct extent_record, cache);
4036 remove_cache_extent(extent_cache, cache);
4037 free_all_extent_backrefs(rec);
4042 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4043 struct extent_record *rec)
4045 if (rec->content_checked && rec->owner_ref_checked &&
4046 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4047 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4048 !rec->bad_full_backref && !rec->crossing_stripes &&
4049 !rec->wrong_chunk_type) {
4050 remove_cache_extent(extent_cache, &rec->cache);
4051 free_all_extent_backrefs(rec);
4052 list_del_init(&rec->list);
4058 static int check_owner_ref(struct btrfs_root *root,
4059 struct extent_record *rec,
4060 struct extent_buffer *buf)
4062 struct extent_backref *node, *tmp;
4063 struct tree_backref *back;
4064 struct btrfs_root *ref_root;
4065 struct btrfs_key key;
4066 struct btrfs_path path;
4067 struct extent_buffer *parent;
4072 rbtree_postorder_for_each_entry_safe(node, tmp,
4073 &rec->backref_tree, node) {
4076 if (!node->found_ref)
4078 if (node->full_backref)
4080 back = to_tree_backref(node);
4081 if (btrfs_header_owner(buf) == back->root)
4084 BUG_ON(rec->is_root);
4086 /* try to find the block by search corresponding fs tree */
4087 key.objectid = btrfs_header_owner(buf);
4088 key.type = BTRFS_ROOT_ITEM_KEY;
4089 key.offset = (u64)-1;
4091 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4092 if (IS_ERR(ref_root))
4095 level = btrfs_header_level(buf);
4097 btrfs_item_key_to_cpu(buf, &key, 0);
4099 btrfs_node_key_to_cpu(buf, &key, 0);
4101 btrfs_init_path(&path);
4102 path.lowest_level = level + 1;
4103 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4107 parent = path.nodes[level + 1];
4108 if (parent && buf->start == btrfs_node_blockptr(parent,
4109 path.slots[level + 1]))
4112 btrfs_release_path(&path);
4113 return found ? 0 : 1;
4116 static int is_extent_tree_record(struct extent_record *rec)
4118 struct extent_backref *ref, *tmp;
4119 struct tree_backref *back;
4122 rbtree_postorder_for_each_entry_safe(ref, tmp,
4123 &rec->backref_tree, node) {
4126 back = to_tree_backref(ref);
4127 if (ref->full_backref)
4129 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4136 static int record_bad_block_io(struct btrfs_fs_info *info,
4137 struct cache_tree *extent_cache,
4140 struct extent_record *rec;
4141 struct cache_extent *cache;
4142 struct btrfs_key key;
4144 cache = lookup_cache_extent(extent_cache, start, len);
4148 rec = container_of(cache, struct extent_record, cache);
4149 if (!is_extent_tree_record(rec))
4152 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4153 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4156 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4157 struct extent_buffer *buf, int slot)
4159 if (btrfs_header_level(buf)) {
4160 struct btrfs_key_ptr ptr1, ptr2;
4162 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4163 sizeof(struct btrfs_key_ptr));
4164 read_extent_buffer(buf, &ptr2,
4165 btrfs_node_key_ptr_offset(slot + 1),
4166 sizeof(struct btrfs_key_ptr));
4167 write_extent_buffer(buf, &ptr1,
4168 btrfs_node_key_ptr_offset(slot + 1),
4169 sizeof(struct btrfs_key_ptr));
4170 write_extent_buffer(buf, &ptr2,
4171 btrfs_node_key_ptr_offset(slot),
4172 sizeof(struct btrfs_key_ptr));
4174 struct btrfs_disk_key key;
4175 btrfs_node_key(buf, &key, 0);
4176 btrfs_fixup_low_keys(root, path, &key,
4177 btrfs_header_level(buf) + 1);
4180 struct btrfs_item *item1, *item2;
4181 struct btrfs_key k1, k2;
4182 char *item1_data, *item2_data;
4183 u32 item1_offset, item2_offset, item1_size, item2_size;
4185 item1 = btrfs_item_nr(slot);
4186 item2 = btrfs_item_nr(slot + 1);
4187 btrfs_item_key_to_cpu(buf, &k1, slot);
4188 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4189 item1_offset = btrfs_item_offset(buf, item1);
4190 item2_offset = btrfs_item_offset(buf, item2);
4191 item1_size = btrfs_item_size(buf, item1);
4192 item2_size = btrfs_item_size(buf, item2);
4194 item1_data = malloc(item1_size);
4197 item2_data = malloc(item2_size);
4203 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4204 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4206 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4207 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4211 btrfs_set_item_offset(buf, item1, item2_offset);
4212 btrfs_set_item_offset(buf, item2, item1_offset);
4213 btrfs_set_item_size(buf, item1, item2_size);
4214 btrfs_set_item_size(buf, item2, item1_size);
4216 path->slots[0] = slot;
4217 btrfs_set_item_key_unsafe(root, path, &k2);
4218 path->slots[0] = slot + 1;
4219 btrfs_set_item_key_unsafe(root, path, &k1);
4224 static int fix_key_order(struct btrfs_trans_handle *trans,
4225 struct btrfs_root *root,
4226 struct btrfs_path *path)
4228 struct extent_buffer *buf;
4229 struct btrfs_key k1, k2;
4231 int level = path->lowest_level;
4234 buf = path->nodes[level];
4235 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4237 btrfs_node_key_to_cpu(buf, &k1, i);
4238 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4240 btrfs_item_key_to_cpu(buf, &k1, i);
4241 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4243 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4245 ret = swap_values(root, path, buf, i);
4248 btrfs_mark_buffer_dirty(buf);
4254 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4255 struct btrfs_root *root,
4256 struct btrfs_path *path,
4257 struct extent_buffer *buf, int slot)
4259 struct btrfs_key key;
4260 int nritems = btrfs_header_nritems(buf);
4262 btrfs_item_key_to_cpu(buf, &key, slot);
4264 /* These are all the keys we can deal with missing. */
4265 if (key.type != BTRFS_DIR_INDEX_KEY &&
4266 key.type != BTRFS_EXTENT_ITEM_KEY &&
4267 key.type != BTRFS_METADATA_ITEM_KEY &&
4268 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4269 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4272 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4273 (unsigned long long)key.objectid, key.type,
4274 (unsigned long long)key.offset, slot, buf->start);
4275 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4276 btrfs_item_nr_offset(slot + 1),
4277 sizeof(struct btrfs_item) *
4278 (nritems - slot - 1));
4279 btrfs_set_header_nritems(buf, nritems - 1);
4281 struct btrfs_disk_key disk_key;
4283 btrfs_item_key(buf, &disk_key, 0);
4284 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4286 btrfs_mark_buffer_dirty(buf);
4290 static int fix_item_offset(struct btrfs_trans_handle *trans,
4291 struct btrfs_root *root,
4292 struct btrfs_path *path)
4294 struct extent_buffer *buf;
4298 /* We should only get this for leaves */
4299 BUG_ON(path->lowest_level);
4300 buf = path->nodes[0];
4302 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4303 unsigned int shift = 0, offset;
4305 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4306 BTRFS_LEAF_DATA_SIZE(root)) {
4307 if (btrfs_item_end_nr(buf, i) >
4308 BTRFS_LEAF_DATA_SIZE(root)) {
4309 ret = delete_bogus_item(trans, root, path,
4313 fprintf(stderr, "item is off the end of the "
4314 "leaf, can't fix\n");
4318 shift = BTRFS_LEAF_DATA_SIZE(root) -
4319 btrfs_item_end_nr(buf, i);
4320 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4321 btrfs_item_offset_nr(buf, i - 1)) {
4322 if (btrfs_item_end_nr(buf, i) >
4323 btrfs_item_offset_nr(buf, i - 1)) {
4324 ret = delete_bogus_item(trans, root, path,
4328 fprintf(stderr, "items overlap, can't fix\n");
4332 shift = btrfs_item_offset_nr(buf, i - 1) -
4333 btrfs_item_end_nr(buf, i);
4338 printf("Shifting item nr %d by %u bytes in block %llu\n",
4339 i, shift, (unsigned long long)buf->start);
4340 offset = btrfs_item_offset_nr(buf, i);
4341 memmove_extent_buffer(buf,
4342 btrfs_leaf_data(buf) + offset + shift,
4343 btrfs_leaf_data(buf) + offset,
4344 btrfs_item_size_nr(buf, i));
4345 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4347 btrfs_mark_buffer_dirty(buf);
4351 * We may have moved things, in which case we want to exit so we don't
4352 * write those changes out. Once we have proper abort functionality in
4353 * progs this can be changed to something nicer.
4360 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4361 * then just return -EIO.
4363 static int try_to_fix_bad_block(struct btrfs_root *root,
4364 struct extent_buffer *buf,
4365 enum btrfs_tree_block_status status)
4367 struct btrfs_trans_handle *trans;
4368 struct ulist *roots;
4369 struct ulist_node *node;
4370 struct btrfs_root *search_root;
4371 struct btrfs_path *path;
4372 struct ulist_iterator iter;
4373 struct btrfs_key root_key, key;
4376 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4377 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4380 path = btrfs_alloc_path();
4384 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4387 btrfs_free_path(path);
4391 ULIST_ITER_INIT(&iter);
4392 while ((node = ulist_next(roots, &iter))) {
4393 root_key.objectid = node->val;
4394 root_key.type = BTRFS_ROOT_ITEM_KEY;
4395 root_key.offset = (u64)-1;
4397 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4404 trans = btrfs_start_transaction(search_root, 0);
4405 if (IS_ERR(trans)) {
4406 ret = PTR_ERR(trans);
4410 path->lowest_level = btrfs_header_level(buf);
4411 path->skip_check_block = 1;
4412 if (path->lowest_level)
4413 btrfs_node_key_to_cpu(buf, &key, 0);
4415 btrfs_item_key_to_cpu(buf, &key, 0);
4416 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4419 btrfs_commit_transaction(trans, search_root);
4422 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4423 ret = fix_key_order(trans, search_root, path);
4424 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4425 ret = fix_item_offset(trans, search_root, path);
4427 btrfs_commit_transaction(trans, search_root);
4430 btrfs_release_path(path);
4431 btrfs_commit_transaction(trans, search_root);
4434 btrfs_free_path(path);
4438 static int check_block(struct btrfs_root *root,
4439 struct cache_tree *extent_cache,
4440 struct extent_buffer *buf, u64 flags)
4442 struct extent_record *rec;
4443 struct cache_extent *cache;
4444 struct btrfs_key key;
4445 enum btrfs_tree_block_status status;
4449 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4452 rec = container_of(cache, struct extent_record, cache);
4453 rec->generation = btrfs_header_generation(buf);
4455 level = btrfs_header_level(buf);
4456 if (btrfs_header_nritems(buf) > 0) {
4459 btrfs_item_key_to_cpu(buf, &key, 0);
4461 btrfs_node_key_to_cpu(buf, &key, 0);
4463 rec->info_objectid = key.objectid;
4465 rec->info_level = level;
4467 if (btrfs_is_leaf(buf))
4468 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4470 status = btrfs_check_node(root, &rec->parent_key, buf);
4472 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4474 status = try_to_fix_bad_block(root, buf, status);
4475 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4477 fprintf(stderr, "bad block %llu\n",
4478 (unsigned long long)buf->start);
4481 * Signal to callers we need to start the scan over
4482 * again since we'll have cowed blocks.
4487 rec->content_checked = 1;
4488 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4489 rec->owner_ref_checked = 1;
4491 ret = check_owner_ref(root, rec, buf);
4493 rec->owner_ref_checked = 1;
4497 maybe_free_extent_rec(extent_cache, rec);
4502 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4503 u64 parent, u64 root)
4505 struct rb_node *node;
4506 struct tree_backref *back = NULL;
4507 struct tree_backref match = {
4514 match.parent = parent;
4515 match.node.full_backref = 1;
4520 node = rb_search(&rec->backref_tree, &match.node.node,
4521 (rb_compare_keys)compare_extent_backref, NULL);
4523 back = to_tree_backref(rb_node_to_extent_backref(node));
4528 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4529 u64 parent, u64 root)
4531 struct tree_backref *ref = malloc(sizeof(*ref));
4535 memset(&ref->node, 0, sizeof(ref->node));
4537 ref->parent = parent;
4538 ref->node.full_backref = 1;
4541 ref->node.full_backref = 0;
4543 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4548 static struct data_backref *find_data_backref(struct extent_record *rec,
4549 u64 parent, u64 root,
4550 u64 owner, u64 offset,
4552 u64 disk_bytenr, u64 bytes)
4554 struct rb_node *node;
4555 struct data_backref *back = NULL;
4556 struct data_backref match = {
4563 .found_ref = found_ref,
4564 .disk_bytenr = disk_bytenr,
4568 match.parent = parent;
4569 match.node.full_backref = 1;
4574 node = rb_search(&rec->backref_tree, &match.node.node,
4575 (rb_compare_keys)compare_extent_backref, NULL);
4577 back = to_data_backref(rb_node_to_extent_backref(node));
4582 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4583 u64 parent, u64 root,
4584 u64 owner, u64 offset,
4587 struct data_backref *ref = malloc(sizeof(*ref));
4591 memset(&ref->node, 0, sizeof(ref->node));
4592 ref->node.is_data = 1;
4595 ref->parent = parent;
4598 ref->node.full_backref = 1;
4602 ref->offset = offset;
4603 ref->node.full_backref = 0;
4605 ref->bytes = max_size;
4608 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4609 if (max_size > rec->max_size)
4610 rec->max_size = max_size;
4614 /* Check if the type of extent matches with its chunk */
4615 static void check_extent_type(struct extent_record *rec)
4617 struct btrfs_block_group_cache *bg_cache;
4619 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4623 /* data extent, check chunk directly*/
4624 if (!rec->metadata) {
4625 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4626 rec->wrong_chunk_type = 1;
4630 /* metadata extent, check the obvious case first */
4631 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4632 BTRFS_BLOCK_GROUP_METADATA))) {
4633 rec->wrong_chunk_type = 1;
4638 * Check SYSTEM extent, as it's also marked as metadata, we can only
4639 * make sure it's a SYSTEM extent by its backref
4641 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4642 struct extent_backref *node;
4643 struct tree_backref *tback;
4646 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4647 if (node->is_data) {
4648 /* tree block shouldn't have data backref */
4649 rec->wrong_chunk_type = 1;
4652 tback = container_of(node, struct tree_backref, node);
4654 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4655 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4657 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4658 if (!(bg_cache->flags & bg_type))
4659 rec->wrong_chunk_type = 1;
4664 * Allocate a new extent record, fill default values from @tmpl and insert int
4665 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4666 * the cache, otherwise it fails.
4668 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4669 struct extent_record *tmpl)
4671 struct extent_record *rec;
4674 rec = malloc(sizeof(*rec));
4677 rec->start = tmpl->start;
4678 rec->max_size = tmpl->max_size;
4679 rec->nr = max(tmpl->nr, tmpl->max_size);
4680 rec->found_rec = tmpl->found_rec;
4681 rec->content_checked = tmpl->content_checked;
4682 rec->owner_ref_checked = tmpl->owner_ref_checked;
4683 rec->num_duplicates = 0;
4684 rec->metadata = tmpl->metadata;
4685 rec->flag_block_full_backref = FLAG_UNSET;
4686 rec->bad_full_backref = 0;
4687 rec->crossing_stripes = 0;
4688 rec->wrong_chunk_type = 0;
4689 rec->is_root = tmpl->is_root;
4690 rec->refs = tmpl->refs;
4691 rec->extent_item_refs = tmpl->extent_item_refs;
4692 rec->parent_generation = tmpl->parent_generation;
4693 INIT_LIST_HEAD(&rec->backrefs);
4694 INIT_LIST_HEAD(&rec->dups);
4695 INIT_LIST_HEAD(&rec->list);
4696 rec->backref_tree = RB_ROOT;
4697 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4698 rec->cache.start = tmpl->start;
4699 rec->cache.size = tmpl->nr;
4700 ret = insert_cache_extent(extent_cache, &rec->cache);
4702 bytes_used += rec->nr;
4705 rec->crossing_stripes = check_crossing_stripes(rec->start,
4706 global_info->tree_root->nodesize);
4707 check_extent_type(rec);
4712 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4714 * - refs - if found, increase refs
4715 * - is_root - if found, set
4716 * - content_checked - if found, set
4717 * - owner_ref_checked - if found, set
4719 * If not found, create a new one, initialize and insert.
4721 static int add_extent_rec(struct cache_tree *extent_cache,
4722 struct extent_record *tmpl)
4724 struct extent_record *rec;
4725 struct cache_extent *cache;
4729 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4731 rec = container_of(cache, struct extent_record, cache);
4735 rec->nr = max(tmpl->nr, tmpl->max_size);
4738 * We need to make sure to reset nr to whatever the extent
4739 * record says was the real size, this way we can compare it to
4742 if (tmpl->found_rec) {
4743 if (tmpl->start != rec->start || rec->found_rec) {
4744 struct extent_record *tmp;
4747 if (list_empty(&rec->list))
4748 list_add_tail(&rec->list,
4749 &duplicate_extents);
4752 * We have to do this song and dance in case we
4753 * find an extent record that falls inside of
4754 * our current extent record but does not have
4755 * the same objectid.
4757 tmp = malloc(sizeof(*tmp));
4760 tmp->start = tmpl->start;
4761 tmp->max_size = tmpl->max_size;
4764 tmp->metadata = tmpl->metadata;
4765 tmp->extent_item_refs = tmpl->extent_item_refs;
4766 INIT_LIST_HEAD(&tmp->list);
4767 list_add_tail(&tmp->list, &rec->dups);
4768 rec->num_duplicates++;
4775 if (tmpl->extent_item_refs && !dup) {
4776 if (rec->extent_item_refs) {
4777 fprintf(stderr, "block %llu rec "
4778 "extent_item_refs %llu, passed %llu\n",
4779 (unsigned long long)tmpl->start,
4780 (unsigned long long)
4781 rec->extent_item_refs,
4782 (unsigned long long)tmpl->extent_item_refs);
4784 rec->extent_item_refs = tmpl->extent_item_refs;
4788 if (tmpl->content_checked)
4789 rec->content_checked = 1;
4790 if (tmpl->owner_ref_checked)
4791 rec->owner_ref_checked = 1;
4792 memcpy(&rec->parent_key, &tmpl->parent_key,
4793 sizeof(tmpl->parent_key));
4794 if (tmpl->parent_generation)
4795 rec->parent_generation = tmpl->parent_generation;
4796 if (rec->max_size < tmpl->max_size)
4797 rec->max_size = tmpl->max_size;
4800 * A metadata extent can't cross stripe_len boundary, otherwise
4801 * kernel scrub won't be able to handle it.
4802 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4806 rec->crossing_stripes = check_crossing_stripes(
4807 rec->start, global_info->tree_root->nodesize);
4808 check_extent_type(rec);
4809 maybe_free_extent_rec(extent_cache, rec);
4813 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4818 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4819 u64 parent, u64 root, int found_ref)
4821 struct extent_record *rec;
4822 struct tree_backref *back;
4823 struct cache_extent *cache;
4825 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4827 struct extent_record tmpl;
4829 memset(&tmpl, 0, sizeof(tmpl));
4830 tmpl.start = bytenr;
4834 add_extent_rec_nolookup(extent_cache, &tmpl);
4836 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4841 rec = container_of(cache, struct extent_record, cache);
4842 if (rec->start != bytenr) {
4846 back = find_tree_backref(rec, parent, root);
4848 back = alloc_tree_backref(rec, parent, root);
4853 if (back->node.found_ref) {
4854 fprintf(stderr, "Extent back ref already exists "
4855 "for %llu parent %llu root %llu \n",
4856 (unsigned long long)bytenr,
4857 (unsigned long long)parent,
4858 (unsigned long long)root);
4860 back->node.found_ref = 1;
4862 if (back->node.found_extent_tree) {
4863 fprintf(stderr, "Extent back ref already exists "
4864 "for %llu parent %llu root %llu \n",
4865 (unsigned long long)bytenr,
4866 (unsigned long long)parent,
4867 (unsigned long long)root);
4869 back->node.found_extent_tree = 1;
4871 check_extent_type(rec);
4872 maybe_free_extent_rec(extent_cache, rec);
4876 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4877 u64 parent, u64 root, u64 owner, u64 offset,
4878 u32 num_refs, int found_ref, u64 max_size)
4880 struct extent_record *rec;
4881 struct data_backref *back;
4882 struct cache_extent *cache;
4884 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4886 struct extent_record tmpl;
4888 memset(&tmpl, 0, sizeof(tmpl));
4889 tmpl.start = bytenr;
4891 tmpl.max_size = max_size;
4893 add_extent_rec_nolookup(extent_cache, &tmpl);
4895 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4900 rec = container_of(cache, struct extent_record, cache);
4901 if (rec->max_size < max_size)
4902 rec->max_size = max_size;
4905 * If found_ref is set then max_size is the real size and must match the
4906 * existing refs. So if we have already found a ref then we need to
4907 * make sure that this ref matches the existing one, otherwise we need
4908 * to add a new backref so we can notice that the backrefs don't match
4909 * and we need to figure out who is telling the truth. This is to
4910 * account for that awful fsync bug I introduced where we'd end up with
4911 * a btrfs_file_extent_item that would have its length include multiple
4912 * prealloc extents or point inside of a prealloc extent.
4914 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4917 back = alloc_data_backref(rec, parent, root, owner, offset,
4923 BUG_ON(num_refs != 1);
4924 if (back->node.found_ref)
4925 BUG_ON(back->bytes != max_size);
4926 back->node.found_ref = 1;
4927 back->found_ref += 1;
4928 back->bytes = max_size;
4929 back->disk_bytenr = bytenr;
4931 rec->content_checked = 1;
4932 rec->owner_ref_checked = 1;
4934 if (back->node.found_extent_tree) {
4935 fprintf(stderr, "Extent back ref already exists "
4936 "for %llu parent %llu root %llu "
4937 "owner %llu offset %llu num_refs %lu\n",
4938 (unsigned long long)bytenr,
4939 (unsigned long long)parent,
4940 (unsigned long long)root,
4941 (unsigned long long)owner,
4942 (unsigned long long)offset,
4943 (unsigned long)num_refs);
4945 back->num_refs = num_refs;
4946 back->node.found_extent_tree = 1;
4948 maybe_free_extent_rec(extent_cache, rec);
4952 static int add_pending(struct cache_tree *pending,
4953 struct cache_tree *seen, u64 bytenr, u32 size)
4956 ret = add_cache_extent(seen, bytenr, size);
4959 add_cache_extent(pending, bytenr, size);
4963 static int pick_next_pending(struct cache_tree *pending,
4964 struct cache_tree *reada,
4965 struct cache_tree *nodes,
4966 u64 last, struct block_info *bits, int bits_nr,
4969 unsigned long node_start = last;
4970 struct cache_extent *cache;
4973 cache = search_cache_extent(reada, 0);
4975 bits[0].start = cache->start;
4976 bits[0].size = cache->size;
4981 if (node_start > 32768)
4982 node_start -= 32768;
4984 cache = search_cache_extent(nodes, node_start);
4986 cache = search_cache_extent(nodes, 0);
4989 cache = search_cache_extent(pending, 0);
4994 bits[ret].start = cache->start;
4995 bits[ret].size = cache->size;
4996 cache = next_cache_extent(cache);
4998 } while (cache && ret < bits_nr);
5004 bits[ret].start = cache->start;
5005 bits[ret].size = cache->size;
5006 cache = next_cache_extent(cache);
5008 } while (cache && ret < bits_nr);
5010 if (bits_nr - ret > 8) {
5011 u64 lookup = bits[0].start + bits[0].size;
5012 struct cache_extent *next;
5013 next = search_cache_extent(pending, lookup);
5015 if (next->start - lookup > 32768)
5017 bits[ret].start = next->start;
5018 bits[ret].size = next->size;
5019 lookup = next->start + next->size;
5023 next = next_cache_extent(next);
5031 static void free_chunk_record(struct cache_extent *cache)
5033 struct chunk_record *rec;
5035 rec = container_of(cache, struct chunk_record, cache);
5036 list_del_init(&rec->list);
5037 list_del_init(&rec->dextents);
5041 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5043 cache_tree_free_extents(chunk_cache, free_chunk_record);
5046 static void free_device_record(struct rb_node *node)
5048 struct device_record *rec;
5050 rec = container_of(node, struct device_record, node);
5054 FREE_RB_BASED_TREE(device_cache, free_device_record);
5056 int insert_block_group_record(struct block_group_tree *tree,
5057 struct block_group_record *bg_rec)
5061 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5065 list_add_tail(&bg_rec->list, &tree->block_groups);
5069 static void free_block_group_record(struct cache_extent *cache)
5071 struct block_group_record *rec;
5073 rec = container_of(cache, struct block_group_record, cache);
5074 list_del_init(&rec->list);
5078 void free_block_group_tree(struct block_group_tree *tree)
5080 cache_tree_free_extents(&tree->tree, free_block_group_record);
5083 int insert_device_extent_record(struct device_extent_tree *tree,
5084 struct device_extent_record *de_rec)
5089 * Device extent is a bit different from the other extents, because
5090 * the extents which belong to the different devices may have the
5091 * same start and size, so we need use the special extent cache
5092 * search/insert functions.
5094 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5098 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5099 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5103 static void free_device_extent_record(struct cache_extent *cache)
5105 struct device_extent_record *rec;
5107 rec = container_of(cache, struct device_extent_record, cache);
5108 if (!list_empty(&rec->chunk_list))
5109 list_del_init(&rec->chunk_list);
5110 if (!list_empty(&rec->device_list))
5111 list_del_init(&rec->device_list);
5115 void free_device_extent_tree(struct device_extent_tree *tree)
5117 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5120 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5121 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5122 struct extent_buffer *leaf, int slot)
5124 struct btrfs_extent_ref_v0 *ref0;
5125 struct btrfs_key key;
5127 btrfs_item_key_to_cpu(leaf, &key, slot);
5128 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5129 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5130 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5132 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5133 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5139 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5140 struct btrfs_key *key,
5143 struct btrfs_chunk *ptr;
5144 struct chunk_record *rec;
5147 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5148 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5150 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5152 fprintf(stderr, "memory allocation failed\n");
5156 INIT_LIST_HEAD(&rec->list);
5157 INIT_LIST_HEAD(&rec->dextents);
5160 rec->cache.start = key->offset;
5161 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5163 rec->generation = btrfs_header_generation(leaf);
5165 rec->objectid = key->objectid;
5166 rec->type = key->type;
5167 rec->offset = key->offset;
5169 rec->length = rec->cache.size;
5170 rec->owner = btrfs_chunk_owner(leaf, ptr);
5171 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5172 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5173 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5174 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5175 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5176 rec->num_stripes = num_stripes;
5177 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5179 for (i = 0; i < rec->num_stripes; ++i) {
5180 rec->stripes[i].devid =
5181 btrfs_stripe_devid_nr(leaf, ptr, i);
5182 rec->stripes[i].offset =
5183 btrfs_stripe_offset_nr(leaf, ptr, i);
5184 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5185 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5192 static int process_chunk_item(struct cache_tree *chunk_cache,
5193 struct btrfs_key *key, struct extent_buffer *eb,
5196 struct chunk_record *rec;
5199 rec = btrfs_new_chunk_record(eb, key, slot);
5200 ret = insert_cache_extent(chunk_cache, &rec->cache);
5202 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5203 rec->offset, rec->length);
5210 static int process_device_item(struct rb_root *dev_cache,
5211 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5213 struct btrfs_dev_item *ptr;
5214 struct device_record *rec;
5217 ptr = btrfs_item_ptr(eb,
5218 slot, struct btrfs_dev_item);
5220 rec = malloc(sizeof(*rec));
5222 fprintf(stderr, "memory allocation failed\n");
5226 rec->devid = key->offset;
5227 rec->generation = btrfs_header_generation(eb);
5229 rec->objectid = key->objectid;
5230 rec->type = key->type;
5231 rec->offset = key->offset;
5233 rec->devid = btrfs_device_id(eb, ptr);
5234 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5235 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5237 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5239 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5246 struct block_group_record *
5247 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5250 struct btrfs_block_group_item *ptr;
5251 struct block_group_record *rec;
5253 rec = calloc(1, sizeof(*rec));
5255 fprintf(stderr, "memory allocation failed\n");
5259 rec->cache.start = key->objectid;
5260 rec->cache.size = key->offset;
5262 rec->generation = btrfs_header_generation(leaf);
5264 rec->objectid = key->objectid;
5265 rec->type = key->type;
5266 rec->offset = key->offset;
5268 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5269 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5271 INIT_LIST_HEAD(&rec->list);
5276 static int process_block_group_item(struct block_group_tree *block_group_cache,
5277 struct btrfs_key *key,
5278 struct extent_buffer *eb, int slot)
5280 struct block_group_record *rec;
5283 rec = btrfs_new_block_group_record(eb, key, slot);
5284 ret = insert_block_group_record(block_group_cache, rec);
5286 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5287 rec->objectid, rec->offset);
5294 struct device_extent_record *
5295 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5296 struct btrfs_key *key, int slot)
5298 struct device_extent_record *rec;
5299 struct btrfs_dev_extent *ptr;
5301 rec = calloc(1, sizeof(*rec));
5303 fprintf(stderr, "memory allocation failed\n");
5307 rec->cache.objectid = key->objectid;
5308 rec->cache.start = key->offset;
5310 rec->generation = btrfs_header_generation(leaf);
5312 rec->objectid = key->objectid;
5313 rec->type = key->type;
5314 rec->offset = key->offset;
5316 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5317 rec->chunk_objecteid =
5318 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5320 btrfs_dev_extent_chunk_offset(leaf, ptr);
5321 rec->length = btrfs_dev_extent_length(leaf, ptr);
5322 rec->cache.size = rec->length;
5324 INIT_LIST_HEAD(&rec->chunk_list);
5325 INIT_LIST_HEAD(&rec->device_list);
5331 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5332 struct btrfs_key *key, struct extent_buffer *eb,
5335 struct device_extent_record *rec;
5338 rec = btrfs_new_device_extent_record(eb, key, slot);
5339 ret = insert_device_extent_record(dev_extent_cache, rec);
5342 "Device extent[%llu, %llu, %llu] existed.\n",
5343 rec->objectid, rec->offset, rec->length);
5350 static int process_extent_item(struct btrfs_root *root,
5351 struct cache_tree *extent_cache,
5352 struct extent_buffer *eb, int slot)
5354 struct btrfs_extent_item *ei;
5355 struct btrfs_extent_inline_ref *iref;
5356 struct btrfs_extent_data_ref *dref;
5357 struct btrfs_shared_data_ref *sref;
5358 struct btrfs_key key;
5359 struct extent_record tmpl;
5363 u32 item_size = btrfs_item_size_nr(eb, slot);
5369 btrfs_item_key_to_cpu(eb, &key, slot);
5371 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5373 num_bytes = root->nodesize;
5375 num_bytes = key.offset;
5378 if (item_size < sizeof(*ei)) {
5379 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5380 struct btrfs_extent_item_v0 *ei0;
5381 BUG_ON(item_size != sizeof(*ei0));
5382 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5383 refs = btrfs_extent_refs_v0(eb, ei0);
5387 memset(&tmpl, 0, sizeof(tmpl));
5388 tmpl.start = key.objectid;
5389 tmpl.nr = num_bytes;
5390 tmpl.extent_item_refs = refs;
5391 tmpl.metadata = metadata;
5393 tmpl.max_size = num_bytes;
5395 return add_extent_rec(extent_cache, &tmpl);
5398 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5399 refs = btrfs_extent_refs(eb, ei);
5400 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5405 memset(&tmpl, 0, sizeof(tmpl));
5406 tmpl.start = key.objectid;
5407 tmpl.nr = num_bytes;
5408 tmpl.extent_item_refs = refs;
5409 tmpl.metadata = metadata;
5411 tmpl.max_size = num_bytes;
5412 add_extent_rec(extent_cache, &tmpl);
5414 ptr = (unsigned long)(ei + 1);
5415 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5416 key.type == BTRFS_EXTENT_ITEM_KEY)
5417 ptr += sizeof(struct btrfs_tree_block_info);
5419 end = (unsigned long)ei + item_size;
5421 iref = (struct btrfs_extent_inline_ref *)ptr;
5422 type = btrfs_extent_inline_ref_type(eb, iref);
5423 offset = btrfs_extent_inline_ref_offset(eb, iref);
5425 case BTRFS_TREE_BLOCK_REF_KEY:
5426 add_tree_backref(extent_cache, key.objectid,
5429 case BTRFS_SHARED_BLOCK_REF_KEY:
5430 add_tree_backref(extent_cache, key.objectid,
5433 case BTRFS_EXTENT_DATA_REF_KEY:
5434 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5435 add_data_backref(extent_cache, key.objectid, 0,
5436 btrfs_extent_data_ref_root(eb, dref),
5437 btrfs_extent_data_ref_objectid(eb,
5439 btrfs_extent_data_ref_offset(eb, dref),
5440 btrfs_extent_data_ref_count(eb, dref),
5443 case BTRFS_SHARED_DATA_REF_KEY:
5444 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5445 add_data_backref(extent_cache, key.objectid, offset,
5447 btrfs_shared_data_ref_count(eb, sref),
5451 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5452 key.objectid, key.type, num_bytes);
5455 ptr += btrfs_extent_inline_ref_size(type);
5462 static int check_cache_range(struct btrfs_root *root,
5463 struct btrfs_block_group_cache *cache,
5464 u64 offset, u64 bytes)
5466 struct btrfs_free_space *entry;
5472 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5473 bytenr = btrfs_sb_offset(i);
5474 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5475 cache->key.objectid, bytenr, 0,
5476 &logical, &nr, &stripe_len);
5481 if (logical[nr] + stripe_len <= offset)
5483 if (offset + bytes <= logical[nr])
5485 if (logical[nr] == offset) {
5486 if (stripe_len >= bytes) {
5490 bytes -= stripe_len;
5491 offset += stripe_len;
5492 } else if (logical[nr] < offset) {
5493 if (logical[nr] + stripe_len >=
5498 bytes = (offset + bytes) -
5499 (logical[nr] + stripe_len);
5500 offset = logical[nr] + stripe_len;
5503 * Could be tricky, the super may land in the
5504 * middle of the area we're checking. First
5505 * check the easiest case, it's at the end.
5507 if (logical[nr] + stripe_len >=
5509 bytes = logical[nr] - offset;
5513 /* Check the left side */
5514 ret = check_cache_range(root, cache,
5516 logical[nr] - offset);
5522 /* Now we continue with the right side */
5523 bytes = (offset + bytes) -
5524 (logical[nr] + stripe_len);
5525 offset = logical[nr] + stripe_len;
5532 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5534 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5535 offset, offset+bytes);
5539 if (entry->offset != offset) {
5540 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5545 if (entry->bytes != bytes) {
5546 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5547 bytes, entry->bytes, offset);
5551 unlink_free_space(cache->free_space_ctl, entry);
5556 static int verify_space_cache(struct btrfs_root *root,
5557 struct btrfs_block_group_cache *cache)
5559 struct btrfs_path *path;
5560 struct extent_buffer *leaf;
5561 struct btrfs_key key;
5565 path = btrfs_alloc_path();
5569 root = root->fs_info->extent_root;
5571 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5573 key.objectid = last;
5575 key.type = BTRFS_EXTENT_ITEM_KEY;
5577 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5582 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5583 ret = btrfs_next_leaf(root, path);
5591 leaf = path->nodes[0];
5592 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5593 if (key.objectid >= cache->key.offset + cache->key.objectid)
5595 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5596 key.type != BTRFS_METADATA_ITEM_KEY) {
5601 if (last == key.objectid) {
5602 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5603 last = key.objectid + key.offset;
5605 last = key.objectid + root->nodesize;
5610 ret = check_cache_range(root, cache, last,
5611 key.objectid - last);
5614 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5615 last = key.objectid + key.offset;
5617 last = key.objectid + root->nodesize;
5621 if (last < cache->key.objectid + cache->key.offset)
5622 ret = check_cache_range(root, cache, last,
5623 cache->key.objectid +
5624 cache->key.offset - last);
5627 btrfs_free_path(path);
5630 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5631 fprintf(stderr, "There are still entries left in the space "
5639 static int check_space_cache(struct btrfs_root *root)
5641 struct btrfs_block_group_cache *cache;
5642 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5646 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5647 btrfs_super_generation(root->fs_info->super_copy) !=
5648 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5649 printf("cache and super generation don't match, space cache "
5650 "will be invalidated\n");
5654 if (ctx.progress_enabled) {
5655 ctx.tp = TASK_FREE_SPACE;
5656 task_start(ctx.info);
5660 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5664 start = cache->key.objectid + cache->key.offset;
5665 if (!cache->free_space_ctl) {
5666 if (btrfs_init_free_space_ctl(cache,
5667 root->sectorsize)) {
5672 btrfs_remove_free_space_cache(cache);
5675 if (btrfs_fs_compat_ro(root->fs_info,
5676 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5677 ret = exclude_super_stripes(root, cache);
5679 fprintf(stderr, "could not exclude super stripes: %s\n",
5684 ret = load_free_space_tree(root->fs_info, cache);
5685 free_excluded_extents(root, cache);
5687 fprintf(stderr, "could not load free space tree: %s\n",
5694 ret = load_free_space_cache(root->fs_info, cache);
5699 ret = verify_space_cache(root, cache);
5701 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5702 cache->key.objectid);
5707 task_stop(ctx.info);
5709 return error ? -EINVAL : 0;
5712 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5713 u64 num_bytes, unsigned long leaf_offset,
5714 struct extent_buffer *eb) {
5717 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5719 unsigned long csum_offset;
5723 u64 data_checked = 0;
5729 if (num_bytes % root->sectorsize)
5732 data = malloc(num_bytes);
5736 while (offset < num_bytes) {
5739 read_len = num_bytes - offset;
5740 /* read as much space once a time */
5741 ret = read_extent_data(root, data + offset,
5742 bytenr + offset, &read_len, mirror);
5746 /* verify every 4k data's checksum */
5747 while (data_checked < read_len) {
5749 tmp = offset + data_checked;
5751 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5752 csum, root->sectorsize);
5753 btrfs_csum_final(csum, (char *)&csum);
5755 csum_offset = leaf_offset +
5756 tmp / root->sectorsize * csum_size;
5757 read_extent_buffer(eb, (char *)&csum_expected,
5758 csum_offset, csum_size);
5759 /* try another mirror */
5760 if (csum != csum_expected) {
5761 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5762 mirror, bytenr + tmp,
5763 csum, csum_expected);
5764 num_copies = btrfs_num_copies(
5765 &root->fs_info->mapping_tree,
5767 if (mirror < num_copies - 1) {
5772 data_checked += root->sectorsize;
5781 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5784 struct btrfs_path *path;
5785 struct extent_buffer *leaf;
5786 struct btrfs_key key;
5789 path = btrfs_alloc_path();
5791 fprintf(stderr, "Error allocating path\n");
5795 key.objectid = bytenr;
5796 key.type = BTRFS_EXTENT_ITEM_KEY;
5797 key.offset = (u64)-1;
5800 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5803 fprintf(stderr, "Error looking up extent record %d\n", ret);
5804 btrfs_free_path(path);
5807 if (path->slots[0] > 0) {
5810 ret = btrfs_prev_leaf(root, path);
5813 } else if (ret > 0) {
5820 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5823 * Block group items come before extent items if they have the same
5824 * bytenr, so walk back one more just in case. Dear future traveller,
5825 * first congrats on mastering time travel. Now if it's not too much
5826 * trouble could you go back to 2006 and tell Chris to make the
5827 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5828 * EXTENT_ITEM_KEY please?
5830 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5831 if (path->slots[0] > 0) {
5834 ret = btrfs_prev_leaf(root, path);
5837 } else if (ret > 0) {
5842 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5846 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5847 ret = btrfs_next_leaf(root, path);
5849 fprintf(stderr, "Error going to next leaf "
5851 btrfs_free_path(path);
5857 leaf = path->nodes[0];
5858 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5859 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5863 if (key.objectid + key.offset < bytenr) {
5867 if (key.objectid > bytenr + num_bytes)
5870 if (key.objectid == bytenr) {
5871 if (key.offset >= num_bytes) {
5875 num_bytes -= key.offset;
5876 bytenr += key.offset;
5877 } else if (key.objectid < bytenr) {
5878 if (key.objectid + key.offset >= bytenr + num_bytes) {
5882 num_bytes = (bytenr + num_bytes) -
5883 (key.objectid + key.offset);
5884 bytenr = key.objectid + key.offset;
5886 if (key.objectid + key.offset < bytenr + num_bytes) {
5887 u64 new_start = key.objectid + key.offset;
5888 u64 new_bytes = bytenr + num_bytes - new_start;
5891 * Weird case, the extent is in the middle of
5892 * our range, we'll have to search one side
5893 * and then the other. Not sure if this happens
5894 * in real life, but no harm in coding it up
5895 * anyway just in case.
5897 btrfs_release_path(path);
5898 ret = check_extent_exists(root, new_start,
5901 fprintf(stderr, "Right section didn't "
5905 num_bytes = key.objectid - bytenr;
5908 num_bytes = key.objectid - bytenr;
5915 if (num_bytes && !ret) {
5916 fprintf(stderr, "There are no extents for csum range "
5917 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5921 btrfs_free_path(path);
5925 static int check_csums(struct btrfs_root *root)
5927 struct btrfs_path *path;
5928 struct extent_buffer *leaf;
5929 struct btrfs_key key;
5930 u64 offset = 0, num_bytes = 0;
5931 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5935 unsigned long leaf_offset;
5937 root = root->fs_info->csum_root;
5938 if (!extent_buffer_uptodate(root->node)) {
5939 fprintf(stderr, "No valid csum tree found\n");
5943 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5944 key.type = BTRFS_EXTENT_CSUM_KEY;
5947 path = btrfs_alloc_path();
5951 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5953 fprintf(stderr, "Error searching csum tree %d\n", ret);
5954 btrfs_free_path(path);
5958 if (ret > 0 && path->slots[0])
5963 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5964 ret = btrfs_next_leaf(root, path);
5966 fprintf(stderr, "Error going to next leaf "
5973 leaf = path->nodes[0];
5975 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5976 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5981 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5982 csum_size) * root->sectorsize;
5983 if (!check_data_csum)
5984 goto skip_csum_check;
5985 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5986 ret = check_extent_csums(root, key.offset, data_len,
5992 offset = key.offset;
5993 } else if (key.offset != offset + num_bytes) {
5994 ret = check_extent_exists(root, offset, num_bytes);
5996 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5997 "there is no extent record\n",
5998 offset, offset+num_bytes);
6001 offset = key.offset;
6004 num_bytes += data_len;
6008 btrfs_free_path(path);
6012 static int is_dropped_key(struct btrfs_key *key,
6013 struct btrfs_key *drop_key) {
6014 if (key->objectid < drop_key->objectid)
6016 else if (key->objectid == drop_key->objectid) {
6017 if (key->type < drop_key->type)
6019 else if (key->type == drop_key->type) {
6020 if (key->offset < drop_key->offset)
6028 * Here are the rules for FULL_BACKREF.
6030 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6031 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6033 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6034 * if it happened after the relocation occurred since we'll have dropped the
6035 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6036 * have no real way to know for sure.
6038 * We process the blocks one root at a time, and we start from the lowest root
6039 * objectid and go to the highest. So we can just lookup the owner backref for
6040 * the record and if we don't find it then we know it doesn't exist and we have
6043 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6044 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6045 * be set or not and then we can check later once we've gathered all the refs.
6047 static int calc_extent_flag(struct btrfs_root *root,
6048 struct cache_tree *extent_cache,
6049 struct extent_buffer *buf,
6050 struct root_item_record *ri,
6053 struct extent_record *rec;
6054 struct cache_extent *cache;
6055 struct tree_backref *tback;
6058 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6059 /* we have added this extent before */
6061 rec = container_of(cache, struct extent_record, cache);
6064 * Except file/reloc tree, we can not have
6067 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6072 if (buf->start == ri->bytenr)
6075 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6078 owner = btrfs_header_owner(buf);
6079 if (owner == ri->objectid)
6082 tback = find_tree_backref(rec, 0, owner);
6087 if (rec->flag_block_full_backref != FLAG_UNSET &&
6088 rec->flag_block_full_backref != 0)
6089 rec->bad_full_backref = 1;
6092 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6093 if (rec->flag_block_full_backref != FLAG_UNSET &&
6094 rec->flag_block_full_backref != 1)
6095 rec->bad_full_backref = 1;
6099 static int run_next_block(struct btrfs_root *root,
6100 struct block_info *bits,
6103 struct cache_tree *pending,
6104 struct cache_tree *seen,
6105 struct cache_tree *reada,
6106 struct cache_tree *nodes,
6107 struct cache_tree *extent_cache,
6108 struct cache_tree *chunk_cache,
6109 struct rb_root *dev_cache,
6110 struct block_group_tree *block_group_cache,
6111 struct device_extent_tree *dev_extent_cache,
6112 struct root_item_record *ri)
6114 struct extent_buffer *buf;
6115 struct extent_record *rec = NULL;
6126 struct btrfs_key key;
6127 struct cache_extent *cache;
6130 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6131 bits_nr, &reada_bits);
6136 for(i = 0; i < nritems; i++) {
6137 ret = add_cache_extent(reada, bits[i].start,
6142 /* fixme, get the parent transid */
6143 readahead_tree_block(root, bits[i].start,
6147 *last = bits[0].start;
6148 bytenr = bits[0].start;
6149 size = bits[0].size;
6151 cache = lookup_cache_extent(pending, bytenr, size);
6153 remove_cache_extent(pending, cache);
6156 cache = lookup_cache_extent(reada, bytenr, size);
6158 remove_cache_extent(reada, cache);
6161 cache = lookup_cache_extent(nodes, bytenr, size);
6163 remove_cache_extent(nodes, cache);
6166 cache = lookup_cache_extent(extent_cache, bytenr, size);
6168 rec = container_of(cache, struct extent_record, cache);
6169 gen = rec->parent_generation;
6172 /* fixme, get the real parent transid */
6173 buf = read_tree_block(root, bytenr, size, gen);
6174 if (!extent_buffer_uptodate(buf)) {
6175 record_bad_block_io(root->fs_info,
6176 extent_cache, bytenr, size);
6180 nritems = btrfs_header_nritems(buf);
6183 if (!init_extent_tree) {
6184 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6185 btrfs_header_level(buf), 1, NULL,
6188 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6190 fprintf(stderr, "Couldn't calc extent flags\n");
6191 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6196 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6198 fprintf(stderr, "Couldn't calc extent flags\n");
6199 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6203 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6205 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6206 ri->objectid == btrfs_header_owner(buf)) {
6208 * Ok we got to this block from it's original owner and
6209 * we have FULL_BACKREF set. Relocation can leave
6210 * converted blocks over so this is altogether possible,
6211 * however it's not possible if the generation > the
6212 * last snapshot, so check for this case.
6214 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6215 btrfs_header_generation(buf) > ri->last_snapshot) {
6216 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6217 rec->bad_full_backref = 1;
6222 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6223 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6224 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6225 rec->bad_full_backref = 1;
6229 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6230 rec->flag_block_full_backref = 1;
6234 rec->flag_block_full_backref = 0;
6236 owner = btrfs_header_owner(buf);
6239 ret = check_block(root, extent_cache, buf, flags);
6243 if (btrfs_is_leaf(buf)) {
6244 btree_space_waste += btrfs_leaf_free_space(root, buf);
6245 for (i = 0; i < nritems; i++) {
6246 struct btrfs_file_extent_item *fi;
6247 btrfs_item_key_to_cpu(buf, &key, i);
6248 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6249 process_extent_item(root, extent_cache, buf,
6253 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6254 process_extent_item(root, extent_cache, buf,
6258 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6260 btrfs_item_size_nr(buf, i);
6263 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6264 process_chunk_item(chunk_cache, &key, buf, i);
6267 if (key.type == BTRFS_DEV_ITEM_KEY) {
6268 process_device_item(dev_cache, &key, buf, i);
6271 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6272 process_block_group_item(block_group_cache,
6276 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6277 process_device_extent_item(dev_extent_cache,
6282 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6283 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6284 process_extent_ref_v0(extent_cache, buf, i);
6291 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6292 add_tree_backref(extent_cache, key.objectid, 0,
6296 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6297 add_tree_backref(extent_cache, key.objectid,
6301 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6302 struct btrfs_extent_data_ref *ref;
6303 ref = btrfs_item_ptr(buf, i,
6304 struct btrfs_extent_data_ref);
6305 add_data_backref(extent_cache,
6307 btrfs_extent_data_ref_root(buf, ref),
6308 btrfs_extent_data_ref_objectid(buf,
6310 btrfs_extent_data_ref_offset(buf, ref),
6311 btrfs_extent_data_ref_count(buf, ref),
6312 0, root->sectorsize);
6315 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6316 struct btrfs_shared_data_ref *ref;
6317 ref = btrfs_item_ptr(buf, i,
6318 struct btrfs_shared_data_ref);
6319 add_data_backref(extent_cache,
6320 key.objectid, key.offset, 0, 0, 0,
6321 btrfs_shared_data_ref_count(buf, ref),
6322 0, root->sectorsize);
6325 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6326 struct bad_item *bad;
6328 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6332 bad = malloc(sizeof(struct bad_item));
6335 INIT_LIST_HEAD(&bad->list);
6336 memcpy(&bad->key, &key,
6337 sizeof(struct btrfs_key));
6338 bad->root_id = owner;
6339 list_add_tail(&bad->list, &delete_items);
6342 if (key.type != BTRFS_EXTENT_DATA_KEY)
6344 fi = btrfs_item_ptr(buf, i,
6345 struct btrfs_file_extent_item);
6346 if (btrfs_file_extent_type(buf, fi) ==
6347 BTRFS_FILE_EXTENT_INLINE)
6349 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6352 data_bytes_allocated +=
6353 btrfs_file_extent_disk_num_bytes(buf, fi);
6354 if (data_bytes_allocated < root->sectorsize) {
6357 data_bytes_referenced +=
6358 btrfs_file_extent_num_bytes(buf, fi);
6359 add_data_backref(extent_cache,
6360 btrfs_file_extent_disk_bytenr(buf, fi),
6361 parent, owner, key.objectid, key.offset -
6362 btrfs_file_extent_offset(buf, fi), 1, 1,
6363 btrfs_file_extent_disk_num_bytes(buf, fi));
6367 struct btrfs_key first_key;
6369 first_key.objectid = 0;
6372 btrfs_item_key_to_cpu(buf, &first_key, 0);
6373 level = btrfs_header_level(buf);
6374 for (i = 0; i < nritems; i++) {
6375 struct extent_record tmpl;
6377 ptr = btrfs_node_blockptr(buf, i);
6378 size = root->nodesize;
6379 btrfs_node_key_to_cpu(buf, &key, i);
6381 if ((level == ri->drop_level)
6382 && is_dropped_key(&key, &ri->drop_key)) {
6387 memset(&tmpl, 0, sizeof(tmpl));
6388 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6389 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6394 tmpl.max_size = size;
6395 ret = add_extent_rec(extent_cache, &tmpl);
6398 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6401 add_pending(nodes, seen, ptr, size);
6403 add_pending(pending, seen, ptr, size);
6406 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6407 nritems) * sizeof(struct btrfs_key_ptr);
6409 total_btree_bytes += buf->len;
6410 if (fs_root_objectid(btrfs_header_owner(buf)))
6411 total_fs_tree_bytes += buf->len;
6412 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6413 total_extent_tree_bytes += buf->len;
6414 if (!found_old_backref &&
6415 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6416 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6417 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6418 found_old_backref = 1;
6420 free_extent_buffer(buf);
6424 static int add_root_to_pending(struct extent_buffer *buf,
6425 struct cache_tree *extent_cache,
6426 struct cache_tree *pending,
6427 struct cache_tree *seen,
6428 struct cache_tree *nodes,
6431 struct extent_record tmpl;
6433 if (btrfs_header_level(buf) > 0)
6434 add_pending(nodes, seen, buf->start, buf->len);
6436 add_pending(pending, seen, buf->start, buf->len);
6438 memset(&tmpl, 0, sizeof(tmpl));
6439 tmpl.start = buf->start;
6444 tmpl.max_size = buf->len;
6445 add_extent_rec(extent_cache, &tmpl);
6447 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6448 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6449 add_tree_backref(extent_cache, buf->start, buf->start,
6452 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6456 /* as we fix the tree, we might be deleting blocks that
6457 * we're tracking for repair. This hook makes sure we
6458 * remove any backrefs for blocks as we are fixing them.
6460 static int free_extent_hook(struct btrfs_trans_handle *trans,
6461 struct btrfs_root *root,
6462 u64 bytenr, u64 num_bytes, u64 parent,
6463 u64 root_objectid, u64 owner, u64 offset,
6466 struct extent_record *rec;
6467 struct cache_extent *cache;
6469 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6471 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6472 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6476 rec = container_of(cache, struct extent_record, cache);
6478 struct data_backref *back;
6479 back = find_data_backref(rec, parent, root_objectid, owner,
6480 offset, 1, bytenr, num_bytes);
6483 if (back->node.found_ref) {
6484 back->found_ref -= refs_to_drop;
6486 rec->refs -= refs_to_drop;
6488 if (back->node.found_extent_tree) {
6489 back->num_refs -= refs_to_drop;
6490 if (rec->extent_item_refs)
6491 rec->extent_item_refs -= refs_to_drop;
6493 if (back->found_ref == 0)
6494 back->node.found_ref = 0;
6495 if (back->num_refs == 0)
6496 back->node.found_extent_tree = 0;
6498 if (!back->node.found_extent_tree && back->node.found_ref) {
6499 rb_erase(&back->node.node, &rec->backref_tree);
6503 struct tree_backref *back;
6504 back = find_tree_backref(rec, parent, root_objectid);
6507 if (back->node.found_ref) {
6510 back->node.found_ref = 0;
6512 if (back->node.found_extent_tree) {
6513 if (rec->extent_item_refs)
6514 rec->extent_item_refs--;
6515 back->node.found_extent_tree = 0;
6517 if (!back->node.found_extent_tree && back->node.found_ref) {
6518 rb_erase(&back->node.node, &rec->backref_tree);
6522 maybe_free_extent_rec(extent_cache, rec);
6527 static int delete_extent_records(struct btrfs_trans_handle *trans,
6528 struct btrfs_root *root,
6529 struct btrfs_path *path,
6530 u64 bytenr, u64 new_len)
6532 struct btrfs_key key;
6533 struct btrfs_key found_key;
6534 struct extent_buffer *leaf;
6539 key.objectid = bytenr;
6541 key.offset = (u64)-1;
6544 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6551 if (path->slots[0] == 0)
6557 leaf = path->nodes[0];
6558 slot = path->slots[0];
6560 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6561 if (found_key.objectid != bytenr)
6564 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6565 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6566 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6567 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6568 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6569 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6570 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6571 btrfs_release_path(path);
6572 if (found_key.type == 0) {
6573 if (found_key.offset == 0)
6575 key.offset = found_key.offset - 1;
6576 key.type = found_key.type;
6578 key.type = found_key.type - 1;
6579 key.offset = (u64)-1;
6583 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6584 found_key.objectid, found_key.type, found_key.offset);
6586 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6589 btrfs_release_path(path);
6591 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6592 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6593 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6594 found_key.offset : root->nodesize;
6596 ret = btrfs_update_block_group(trans, root, bytenr,
6603 btrfs_release_path(path);
6608 * for a single backref, this will allocate a new extent
6609 * and add the backref to it.
6611 static int record_extent(struct btrfs_trans_handle *trans,
6612 struct btrfs_fs_info *info,
6613 struct btrfs_path *path,
6614 struct extent_record *rec,
6615 struct extent_backref *back,
6616 int allocated, u64 flags)
6619 struct btrfs_root *extent_root = info->extent_root;
6620 struct extent_buffer *leaf;
6621 struct btrfs_key ins_key;
6622 struct btrfs_extent_item *ei;
6623 struct tree_backref *tback;
6624 struct data_backref *dback;
6625 struct btrfs_tree_block_info *bi;
6628 rec->max_size = max_t(u64, rec->max_size,
6629 info->extent_root->nodesize);
6632 u32 item_size = sizeof(*ei);
6635 item_size += sizeof(*bi);
6637 ins_key.objectid = rec->start;
6638 ins_key.offset = rec->max_size;
6639 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6641 ret = btrfs_insert_empty_item(trans, extent_root, path,
6642 &ins_key, item_size);
6646 leaf = path->nodes[0];
6647 ei = btrfs_item_ptr(leaf, path->slots[0],
6648 struct btrfs_extent_item);
6650 btrfs_set_extent_refs(leaf, ei, 0);
6651 btrfs_set_extent_generation(leaf, ei, rec->generation);
6653 if (back->is_data) {
6654 btrfs_set_extent_flags(leaf, ei,
6655 BTRFS_EXTENT_FLAG_DATA);
6657 struct btrfs_disk_key copy_key;;
6659 tback = to_tree_backref(back);
6660 bi = (struct btrfs_tree_block_info *)(ei + 1);
6661 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6664 btrfs_set_disk_key_objectid(©_key,
6665 rec->info_objectid);
6666 btrfs_set_disk_key_type(©_key, 0);
6667 btrfs_set_disk_key_offset(©_key, 0);
6669 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6670 btrfs_set_tree_block_key(leaf, bi, ©_key);
6672 btrfs_set_extent_flags(leaf, ei,
6673 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6676 btrfs_mark_buffer_dirty(leaf);
6677 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6678 rec->max_size, 1, 0);
6681 btrfs_release_path(path);
6684 if (back->is_data) {
6688 dback = to_data_backref(back);
6689 if (back->full_backref)
6690 parent = dback->parent;
6694 for (i = 0; i < dback->found_ref; i++) {
6695 /* if parent != 0, we're doing a full backref
6696 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6697 * just makes the backref allocator create a data
6700 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6701 rec->start, rec->max_size,
6705 BTRFS_FIRST_FREE_OBJECTID :
6711 fprintf(stderr, "adding new data backref"
6712 " on %llu %s %llu owner %llu"
6713 " offset %llu found %d\n",
6714 (unsigned long long)rec->start,
6715 back->full_backref ?
6717 back->full_backref ?
6718 (unsigned long long)parent :
6719 (unsigned long long)dback->root,
6720 (unsigned long long)dback->owner,
6721 (unsigned long long)dback->offset,
6726 tback = to_tree_backref(back);
6727 if (back->full_backref)
6728 parent = tback->parent;
6732 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6733 rec->start, rec->max_size,
6734 parent, tback->root, 0, 0);
6735 fprintf(stderr, "adding new tree backref on "
6736 "start %llu len %llu parent %llu root %llu\n",
6737 rec->start, rec->max_size, parent, tback->root);
6740 btrfs_release_path(path);
6744 static struct extent_entry *find_entry(struct list_head *entries,
6745 u64 bytenr, u64 bytes)
6747 struct extent_entry *entry = NULL;
6749 list_for_each_entry(entry, entries, list) {
6750 if (entry->bytenr == bytenr && entry->bytes == bytes)
6757 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6759 struct extent_entry *entry, *best = NULL, *prev = NULL;
6761 list_for_each_entry(entry, entries, list) {
6768 * If there are as many broken entries as entries then we know
6769 * not to trust this particular entry.
6771 if (entry->broken == entry->count)
6775 * If our current entry == best then we can't be sure our best
6776 * is really the best, so we need to keep searching.
6778 if (best && best->count == entry->count) {
6784 /* Prev == entry, not good enough, have to keep searching */
6785 if (!prev->broken && prev->count == entry->count)
6789 best = (prev->count > entry->count) ? prev : entry;
6790 else if (best->count < entry->count)
6798 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6799 struct data_backref *dback, struct extent_entry *entry)
6801 struct btrfs_trans_handle *trans;
6802 struct btrfs_root *root;
6803 struct btrfs_file_extent_item *fi;
6804 struct extent_buffer *leaf;
6805 struct btrfs_key key;
6809 key.objectid = dback->root;
6810 key.type = BTRFS_ROOT_ITEM_KEY;
6811 key.offset = (u64)-1;
6812 root = btrfs_read_fs_root(info, &key);
6814 fprintf(stderr, "Couldn't find root for our ref\n");
6819 * The backref points to the original offset of the extent if it was
6820 * split, so we need to search down to the offset we have and then walk
6821 * forward until we find the backref we're looking for.
6823 key.objectid = dback->owner;
6824 key.type = BTRFS_EXTENT_DATA_KEY;
6825 key.offset = dback->offset;
6826 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6828 fprintf(stderr, "Error looking up ref %d\n", ret);
6833 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6834 ret = btrfs_next_leaf(root, path);
6836 fprintf(stderr, "Couldn't find our ref, next\n");
6840 leaf = path->nodes[0];
6841 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6842 if (key.objectid != dback->owner ||
6843 key.type != BTRFS_EXTENT_DATA_KEY) {
6844 fprintf(stderr, "Couldn't find our ref, search\n");
6847 fi = btrfs_item_ptr(leaf, path->slots[0],
6848 struct btrfs_file_extent_item);
6849 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6850 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6852 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6857 btrfs_release_path(path);
6859 trans = btrfs_start_transaction(root, 1);
6861 return PTR_ERR(trans);
6864 * Ok we have the key of the file extent we want to fix, now we can cow
6865 * down to the thing and fix it.
6867 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6869 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6870 key.objectid, key.type, key.offset, ret);
6874 fprintf(stderr, "Well that's odd, we just found this key "
6875 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6880 leaf = path->nodes[0];
6881 fi = btrfs_item_ptr(leaf, path->slots[0],
6882 struct btrfs_file_extent_item);
6884 if (btrfs_file_extent_compression(leaf, fi) &&
6885 dback->disk_bytenr != entry->bytenr) {
6886 fprintf(stderr, "Ref doesn't match the record start and is "
6887 "compressed, please take a btrfs-image of this file "
6888 "system and send it to a btrfs developer so they can "
6889 "complete this functionality for bytenr %Lu\n",
6890 dback->disk_bytenr);
6895 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6896 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6897 } else if (dback->disk_bytenr > entry->bytenr) {
6898 u64 off_diff, offset;
6900 off_diff = dback->disk_bytenr - entry->bytenr;
6901 offset = btrfs_file_extent_offset(leaf, fi);
6902 if (dback->disk_bytenr + offset +
6903 btrfs_file_extent_num_bytes(leaf, fi) >
6904 entry->bytenr + entry->bytes) {
6905 fprintf(stderr, "Ref is past the entry end, please "
6906 "take a btrfs-image of this file system and "
6907 "send it to a btrfs developer, ref %Lu\n",
6908 dback->disk_bytenr);
6913 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6914 btrfs_set_file_extent_offset(leaf, fi, offset);
6915 } else if (dback->disk_bytenr < entry->bytenr) {
6918 offset = btrfs_file_extent_offset(leaf, fi);
6919 if (dback->disk_bytenr + offset < entry->bytenr) {
6920 fprintf(stderr, "Ref is before the entry start, please"
6921 " take a btrfs-image of this file system and "
6922 "send it to a btrfs developer, ref %Lu\n",
6923 dback->disk_bytenr);
6928 offset += dback->disk_bytenr;
6929 offset -= entry->bytenr;
6930 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6931 btrfs_set_file_extent_offset(leaf, fi, offset);
6934 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6937 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6938 * only do this if we aren't using compression, otherwise it's a
6941 if (!btrfs_file_extent_compression(leaf, fi))
6942 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6944 printf("ram bytes may be wrong?\n");
6945 btrfs_mark_buffer_dirty(leaf);
6947 err = btrfs_commit_transaction(trans, root);
6948 btrfs_release_path(path);
6949 return ret ? ret : err;
6952 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6953 struct extent_record *rec)
6955 struct extent_backref *back, *tmp;
6956 struct data_backref *dback;
6957 struct extent_entry *entry, *best = NULL;
6960 int broken_entries = 0;
6965 * Metadata is easy and the backrefs should always agree on bytenr and
6966 * size, if not we've got bigger issues.
6971 rbtree_postorder_for_each_entry_safe(back, tmp,
6972 &rec->backref_tree, node) {
6973 if (back->full_backref || !back->is_data)
6976 dback = to_data_backref(back);
6979 * We only pay attention to backrefs that we found a real
6982 if (dback->found_ref == 0)
6986 * For now we only catch when the bytes don't match, not the
6987 * bytenr. We can easily do this at the same time, but I want
6988 * to have a fs image to test on before we just add repair
6989 * functionality willy-nilly so we know we won't screw up the
6993 entry = find_entry(&entries, dback->disk_bytenr,
6996 entry = malloc(sizeof(struct extent_entry));
7001 memset(entry, 0, sizeof(*entry));
7002 entry->bytenr = dback->disk_bytenr;
7003 entry->bytes = dback->bytes;
7004 list_add_tail(&entry->list, &entries);
7009 * If we only have on entry we may think the entries agree when
7010 * in reality they don't so we have to do some extra checking.
7012 if (dback->disk_bytenr != rec->start ||
7013 dback->bytes != rec->nr || back->broken)
7024 /* Yay all the backrefs agree, carry on good sir */
7025 if (nr_entries <= 1 && !mismatch)
7028 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7029 "%Lu\n", rec->start);
7032 * First we want to see if the backrefs can agree amongst themselves who
7033 * is right, so figure out which one of the entries has the highest
7036 best = find_most_right_entry(&entries);
7039 * Ok so we may have an even split between what the backrefs think, so
7040 * this is where we use the extent ref to see what it thinks.
7043 entry = find_entry(&entries, rec->start, rec->nr);
7044 if (!entry && (!broken_entries || !rec->found_rec)) {
7045 fprintf(stderr, "Backrefs don't agree with each other "
7046 "and extent record doesn't agree with anybody,"
7047 " so we can't fix bytenr %Lu bytes %Lu\n",
7048 rec->start, rec->nr);
7051 } else if (!entry) {
7053 * Ok our backrefs were broken, we'll assume this is the
7054 * correct value and add an entry for this range.
7056 entry = malloc(sizeof(struct extent_entry));
7061 memset(entry, 0, sizeof(*entry));
7062 entry->bytenr = rec->start;
7063 entry->bytes = rec->nr;
7064 list_add_tail(&entry->list, &entries);
7068 best = find_most_right_entry(&entries);
7070 fprintf(stderr, "Backrefs and extent record evenly "
7071 "split on who is right, this is going to "
7072 "require user input to fix bytenr %Lu bytes "
7073 "%Lu\n", rec->start, rec->nr);
7080 * I don't think this can happen currently as we'll abort() if we catch
7081 * this case higher up, but in case somebody removes that we still can't
7082 * deal with it properly here yet, so just bail out of that's the case.
7084 if (best->bytenr != rec->start) {
7085 fprintf(stderr, "Extent start and backref starts don't match, "
7086 "please use btrfs-image on this file system and send "
7087 "it to a btrfs developer so they can make fsck fix "
7088 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7089 rec->start, rec->nr);
7095 * Ok great we all agreed on an extent record, let's go find the real
7096 * references and fix up the ones that don't match.
7098 rbtree_postorder_for_each_entry_safe(back, tmp,
7099 &rec->backref_tree, node) {
7100 if (back->full_backref || !back->is_data)
7103 dback = to_data_backref(back);
7106 * Still ignoring backrefs that don't have a real ref attached
7109 if (dback->found_ref == 0)
7112 if (dback->bytes == best->bytes &&
7113 dback->disk_bytenr == best->bytenr)
7116 ret = repair_ref(info, path, dback, best);
7122 * Ok we messed with the actual refs, which means we need to drop our
7123 * entire cache and go back and rescan. I know this is a huge pain and
7124 * adds a lot of extra work, but it's the only way to be safe. Once all
7125 * the backrefs agree we may not need to do anything to the extent
7130 while (!list_empty(&entries)) {
7131 entry = list_entry(entries.next, struct extent_entry, list);
7132 list_del_init(&entry->list);
7138 static int process_duplicates(struct btrfs_root *root,
7139 struct cache_tree *extent_cache,
7140 struct extent_record *rec)
7142 struct extent_record *good, *tmp;
7143 struct cache_extent *cache;
7147 * If we found a extent record for this extent then return, or if we
7148 * have more than one duplicate we are likely going to need to delete
7151 if (rec->found_rec || rec->num_duplicates > 1)
7154 /* Shouldn't happen but just in case */
7155 BUG_ON(!rec->num_duplicates);
7158 * So this happens if we end up with a backref that doesn't match the
7159 * actual extent entry. So either the backref is bad or the extent
7160 * entry is bad. Either way we want to have the extent_record actually
7161 * reflect what we found in the extent_tree, so we need to take the
7162 * duplicate out and use that as the extent_record since the only way we
7163 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7165 remove_cache_extent(extent_cache, &rec->cache);
7167 good = to_extent_record(rec->dups.next);
7168 list_del_init(&good->list);
7169 INIT_LIST_HEAD(&good->backrefs);
7170 INIT_LIST_HEAD(&good->dups);
7171 good->cache.start = good->start;
7172 good->cache.size = good->nr;
7173 good->content_checked = 0;
7174 good->owner_ref_checked = 0;
7175 good->num_duplicates = 0;
7176 good->refs = rec->refs;
7177 list_splice_init(&rec->backrefs, &good->backrefs);
7179 cache = lookup_cache_extent(extent_cache, good->start,
7183 tmp = container_of(cache, struct extent_record, cache);
7186 * If we find another overlapping extent and it's found_rec is
7187 * set then it's a duplicate and we need to try and delete
7190 if (tmp->found_rec || tmp->num_duplicates > 0) {
7191 if (list_empty(&good->list))
7192 list_add_tail(&good->list,
7193 &duplicate_extents);
7194 good->num_duplicates += tmp->num_duplicates + 1;
7195 list_splice_init(&tmp->dups, &good->dups);
7196 list_del_init(&tmp->list);
7197 list_add_tail(&tmp->list, &good->dups);
7198 remove_cache_extent(extent_cache, &tmp->cache);
7203 * Ok we have another non extent item backed extent rec, so lets
7204 * just add it to this extent and carry on like we did above.
7206 good->refs += tmp->refs;
7207 list_splice_init(&tmp->backrefs, &good->backrefs);
7208 remove_cache_extent(extent_cache, &tmp->cache);
7211 ret = insert_cache_extent(extent_cache, &good->cache);
7214 return good->num_duplicates ? 0 : 1;
7217 static int delete_duplicate_records(struct btrfs_root *root,
7218 struct extent_record *rec)
7220 struct btrfs_trans_handle *trans;
7221 LIST_HEAD(delete_list);
7222 struct btrfs_path *path;
7223 struct extent_record *tmp, *good, *n;
7226 struct btrfs_key key;
7228 path = btrfs_alloc_path();
7235 /* Find the record that covers all of the duplicates. */
7236 list_for_each_entry(tmp, &rec->dups, list) {
7237 if (good->start < tmp->start)
7239 if (good->nr > tmp->nr)
7242 if (tmp->start + tmp->nr < good->start + good->nr) {
7243 fprintf(stderr, "Ok we have overlapping extents that "
7244 "aren't completely covered by each other, this "
7245 "is going to require more careful thought. "
7246 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7247 tmp->start, tmp->nr, good->start, good->nr);
7254 list_add_tail(&rec->list, &delete_list);
7256 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7259 list_move_tail(&tmp->list, &delete_list);
7262 root = root->fs_info->extent_root;
7263 trans = btrfs_start_transaction(root, 1);
7264 if (IS_ERR(trans)) {
7265 ret = PTR_ERR(trans);
7269 list_for_each_entry(tmp, &delete_list, list) {
7270 if (tmp->found_rec == 0)
7272 key.objectid = tmp->start;
7273 key.type = BTRFS_EXTENT_ITEM_KEY;
7274 key.offset = tmp->nr;
7276 /* Shouldn't happen but just in case */
7277 if (tmp->metadata) {
7278 fprintf(stderr, "Well this shouldn't happen, extent "
7279 "record overlaps but is metadata? "
7280 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7284 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7290 ret = btrfs_del_item(trans, root, path);
7293 btrfs_release_path(path);
7296 err = btrfs_commit_transaction(trans, root);
7300 while (!list_empty(&delete_list)) {
7301 tmp = to_extent_record(delete_list.next);
7302 list_del_init(&tmp->list);
7308 while (!list_empty(&rec->dups)) {
7309 tmp = to_extent_record(rec->dups.next);
7310 list_del_init(&tmp->list);
7314 btrfs_free_path(path);
7316 if (!ret && !nr_del)
7317 rec->num_duplicates = 0;
7319 return ret ? ret : nr_del;
7322 static int find_possible_backrefs(struct btrfs_fs_info *info,
7323 struct btrfs_path *path,
7324 struct cache_tree *extent_cache,
7325 struct extent_record *rec)
7327 struct btrfs_root *root;
7328 struct extent_backref *back, *tmp;
7329 struct data_backref *dback;
7330 struct cache_extent *cache;
7331 struct btrfs_file_extent_item *fi;
7332 struct btrfs_key key;
7336 rbtree_postorder_for_each_entry_safe(back, tmp,
7337 &rec->backref_tree, node) {
7338 /* Don't care about full backrefs (poor unloved backrefs) */
7339 if (back->full_backref || !back->is_data)
7342 dback = to_data_backref(back);
7344 /* We found this one, we don't need to do a lookup */
7345 if (dback->found_ref)
7348 key.objectid = dback->root;
7349 key.type = BTRFS_ROOT_ITEM_KEY;
7350 key.offset = (u64)-1;
7352 root = btrfs_read_fs_root(info, &key);
7354 /* No root, definitely a bad ref, skip */
7355 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7357 /* Other err, exit */
7359 return PTR_ERR(root);
7361 key.objectid = dback->owner;
7362 key.type = BTRFS_EXTENT_DATA_KEY;
7363 key.offset = dback->offset;
7364 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7366 btrfs_release_path(path);
7369 /* Didn't find it, we can carry on */
7374 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7375 struct btrfs_file_extent_item);
7376 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7377 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7378 btrfs_release_path(path);
7379 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7381 struct extent_record *tmp;
7382 tmp = container_of(cache, struct extent_record, cache);
7385 * If we found an extent record for the bytenr for this
7386 * particular backref then we can't add it to our
7387 * current extent record. We only want to add backrefs
7388 * that don't have a corresponding extent item in the
7389 * extent tree since they likely belong to this record
7390 * and we need to fix it if it doesn't match bytenrs.
7396 dback->found_ref += 1;
7397 dback->disk_bytenr = bytenr;
7398 dback->bytes = bytes;
7401 * Set this so the verify backref code knows not to trust the
7402 * values in this backref.
7411 * Record orphan data ref into corresponding root.
7413 * Return 0 if the extent item contains data ref and recorded.
7414 * Return 1 if the extent item contains no useful data ref
7415 * On that case, it may contains only shared_dataref or metadata backref
7416 * or the file extent exists(this should be handled by the extent bytenr
7418 * Return <0 if something goes wrong.
7420 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7421 struct extent_record *rec)
7423 struct btrfs_key key;
7424 struct btrfs_root *dest_root;
7425 struct extent_backref *back, *tmp;
7426 struct data_backref *dback;
7427 struct orphan_data_extent *orphan;
7428 struct btrfs_path *path;
7429 int recorded_data_ref = 0;
7434 path = btrfs_alloc_path();
7437 rbtree_postorder_for_each_entry_safe(back, tmp,
7438 &rec->backref_tree, node) {
7439 if (back->full_backref || !back->is_data ||
7440 !back->found_extent_tree)
7442 dback = to_data_backref(back);
7443 if (dback->found_ref)
7445 key.objectid = dback->root;
7446 key.type = BTRFS_ROOT_ITEM_KEY;
7447 key.offset = (u64)-1;
7449 dest_root = btrfs_read_fs_root(fs_info, &key);
7451 /* For non-exist root we just skip it */
7452 if (IS_ERR(dest_root) || !dest_root)
7455 key.objectid = dback->owner;
7456 key.type = BTRFS_EXTENT_DATA_KEY;
7457 key.offset = dback->offset;
7459 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7461 * For ret < 0, it's OK since the fs-tree may be corrupted,
7462 * we need to record it for inode/file extent rebuild.
7463 * For ret > 0, we record it only for file extent rebuild.
7464 * For ret == 0, the file extent exists but only bytenr
7465 * mismatch, let the original bytenr fix routine to handle,
7471 orphan = malloc(sizeof(*orphan));
7476 INIT_LIST_HEAD(&orphan->list);
7477 orphan->root = dback->root;
7478 orphan->objectid = dback->owner;
7479 orphan->offset = dback->offset;
7480 orphan->disk_bytenr = rec->cache.start;
7481 orphan->disk_len = rec->cache.size;
7482 list_add(&dest_root->orphan_data_extents, &orphan->list);
7483 recorded_data_ref = 1;
7486 btrfs_free_path(path);
7488 return !recorded_data_ref;
7494 * when an incorrect extent item is found, this will delete
7495 * all of the existing entries for it and recreate them
7496 * based on what the tree scan found.
7498 static int fixup_extent_refs(struct btrfs_fs_info *info,
7499 struct cache_tree *extent_cache,
7500 struct extent_record *rec)
7502 struct btrfs_trans_handle *trans = NULL;
7504 struct btrfs_path *path;
7505 struct cache_extent *cache;
7506 struct extent_backref *back, *tmp;
7510 if (rec->flag_block_full_backref)
7511 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7513 path = btrfs_alloc_path();
7517 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7519 * Sometimes the backrefs themselves are so broken they don't
7520 * get attached to any meaningful rec, so first go back and
7521 * check any of our backrefs that we couldn't find and throw
7522 * them into the list if we find the backref so that
7523 * verify_backrefs can figure out what to do.
7525 ret = find_possible_backrefs(info, path, extent_cache, rec);
7530 /* step one, make sure all of the backrefs agree */
7531 ret = verify_backrefs(info, path, rec);
7535 trans = btrfs_start_transaction(info->extent_root, 1);
7536 if (IS_ERR(trans)) {
7537 ret = PTR_ERR(trans);
7541 /* step two, delete all the existing records */
7542 ret = delete_extent_records(trans, info->extent_root, path,
7543 rec->start, rec->max_size);
7548 /* was this block corrupt? If so, don't add references to it */
7549 cache = lookup_cache_extent(info->corrupt_blocks,
7550 rec->start, rec->max_size);
7556 /* step three, recreate all the refs we did find */
7557 rbtree_postorder_for_each_entry_safe(back, tmp,
7558 &rec->backref_tree, node) {
7560 * if we didn't find any references, don't create a
7563 if (!back->found_ref)
7566 rec->bad_full_backref = 0;
7567 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7575 int err = btrfs_commit_transaction(trans, info->extent_root);
7580 btrfs_free_path(path);
7584 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7585 struct extent_record *rec)
7587 struct btrfs_trans_handle *trans;
7588 struct btrfs_root *root = fs_info->extent_root;
7589 struct btrfs_path *path;
7590 struct btrfs_extent_item *ei;
7591 struct btrfs_key key;
7595 key.objectid = rec->start;
7596 if (rec->metadata) {
7597 key.type = BTRFS_METADATA_ITEM_KEY;
7598 key.offset = rec->info_level;
7600 key.type = BTRFS_EXTENT_ITEM_KEY;
7601 key.offset = rec->max_size;
7604 path = btrfs_alloc_path();
7608 trans = btrfs_start_transaction(root, 0);
7609 if (IS_ERR(trans)) {
7610 btrfs_free_path(path);
7611 return PTR_ERR(trans);
7614 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7616 btrfs_free_path(path);
7617 btrfs_commit_transaction(trans, root);
7620 fprintf(stderr, "Didn't find extent for %llu\n",
7621 (unsigned long long)rec->start);
7622 btrfs_free_path(path);
7623 btrfs_commit_transaction(trans, root);
7627 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7628 struct btrfs_extent_item);
7629 flags = btrfs_extent_flags(path->nodes[0], ei);
7630 if (rec->flag_block_full_backref) {
7631 fprintf(stderr, "setting full backref on %llu\n",
7632 (unsigned long long)key.objectid);
7633 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7635 fprintf(stderr, "clearing full backref on %llu\n",
7636 (unsigned long long)key.objectid);
7637 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7639 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7640 btrfs_mark_buffer_dirty(path->nodes[0]);
7641 btrfs_free_path(path);
7642 return btrfs_commit_transaction(trans, root);
7645 /* right now we only prune from the extent allocation tree */
7646 static int prune_one_block(struct btrfs_trans_handle *trans,
7647 struct btrfs_fs_info *info,
7648 struct btrfs_corrupt_block *corrupt)
7651 struct btrfs_path path;
7652 struct extent_buffer *eb;
7656 int level = corrupt->level + 1;
7658 btrfs_init_path(&path);
7660 /* we want to stop at the parent to our busted block */
7661 path.lowest_level = level;
7663 ret = btrfs_search_slot(trans, info->extent_root,
7664 &corrupt->key, &path, -1, 1);
7669 eb = path.nodes[level];
7676 * hopefully the search gave us the block we want to prune,
7677 * lets try that first
7679 slot = path.slots[level];
7680 found = btrfs_node_blockptr(eb, slot);
7681 if (found == corrupt->cache.start)
7684 nritems = btrfs_header_nritems(eb);
7686 /* the search failed, lets scan this node and hope we find it */
7687 for (slot = 0; slot < nritems; slot++) {
7688 found = btrfs_node_blockptr(eb, slot);
7689 if (found == corrupt->cache.start)
7693 * we couldn't find the bad block. TODO, search all the nodes for pointers
7696 if (eb == info->extent_root->node) {
7701 btrfs_release_path(&path);
7706 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7707 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7710 btrfs_release_path(&path);
7714 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7716 struct btrfs_trans_handle *trans = NULL;
7717 struct cache_extent *cache;
7718 struct btrfs_corrupt_block *corrupt;
7721 cache = search_cache_extent(info->corrupt_blocks, 0);
7725 trans = btrfs_start_transaction(info->extent_root, 1);
7727 return PTR_ERR(trans);
7729 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7730 prune_one_block(trans, info, corrupt);
7731 remove_cache_extent(info->corrupt_blocks, cache);
7734 return btrfs_commit_transaction(trans, info->extent_root);
7738 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7740 struct btrfs_block_group_cache *cache;
7745 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7746 &start, &end, EXTENT_DIRTY);
7749 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7755 cache = btrfs_lookup_first_block_group(fs_info, start);
7760 start = cache->key.objectid + cache->key.offset;
7764 static int check_extent_refs(struct btrfs_root *root,
7765 struct cache_tree *extent_cache)
7767 struct extent_record *rec;
7768 struct cache_extent *cache;
7777 * if we're doing a repair, we have to make sure
7778 * we don't allocate from the problem extents.
7779 * In the worst case, this will be all the
7782 cache = search_cache_extent(extent_cache, 0);
7784 rec = container_of(cache, struct extent_record, cache);
7785 set_extent_dirty(root->fs_info->excluded_extents,
7787 rec->start + rec->max_size - 1,
7789 cache = next_cache_extent(cache);
7792 /* pin down all the corrupted blocks too */
7793 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7795 set_extent_dirty(root->fs_info->excluded_extents,
7797 cache->start + cache->size - 1,
7799 cache = next_cache_extent(cache);
7801 prune_corrupt_blocks(root->fs_info);
7802 reset_cached_block_groups(root->fs_info);
7805 reset_cached_block_groups(root->fs_info);
7808 * We need to delete any duplicate entries we find first otherwise we
7809 * could mess up the extent tree when we have backrefs that actually
7810 * belong to a different extent item and not the weird duplicate one.
7812 while (repair && !list_empty(&duplicate_extents)) {
7813 rec = to_extent_record(duplicate_extents.next);
7814 list_del_init(&rec->list);
7816 /* Sometimes we can find a backref before we find an actual
7817 * extent, so we need to process it a little bit to see if there
7818 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7819 * if this is a backref screwup. If we need to delete stuff
7820 * process_duplicates() will return 0, otherwise it will return
7823 if (process_duplicates(root, extent_cache, rec))
7825 ret = delete_duplicate_records(root, rec);
7829 * delete_duplicate_records will return the number of entries
7830 * deleted, so if it's greater than 0 then we know we actually
7831 * did something and we need to remove.
7845 cache = search_cache_extent(extent_cache, 0);
7848 rec = container_of(cache, struct extent_record, cache);
7849 if (rec->num_duplicates) {
7850 fprintf(stderr, "extent item %llu has multiple extent "
7851 "items\n", (unsigned long long)rec->start);
7856 if (rec->refs != rec->extent_item_refs) {
7857 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7858 (unsigned long long)rec->start,
7859 (unsigned long long)rec->nr);
7860 fprintf(stderr, "extent item %llu, found %llu\n",
7861 (unsigned long long)rec->extent_item_refs,
7862 (unsigned long long)rec->refs);
7863 ret = record_orphan_data_extents(root->fs_info, rec);
7870 * we can't use the extent to repair file
7871 * extent, let the fallback method handle it.
7873 if (!fixed && repair) {
7874 ret = fixup_extent_refs(
7885 if (all_backpointers_checked(rec, 1)) {
7886 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7887 (unsigned long long)rec->start,
7888 (unsigned long long)rec->nr);
7890 if (!fixed && !recorded && repair) {
7891 ret = fixup_extent_refs(root->fs_info,
7900 if (!rec->owner_ref_checked) {
7901 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7902 (unsigned long long)rec->start,
7903 (unsigned long long)rec->nr);
7904 if (!fixed && !recorded && repair) {
7905 ret = fixup_extent_refs(root->fs_info,
7914 if (rec->bad_full_backref) {
7915 fprintf(stderr, "bad full backref, on [%llu]\n",
7916 (unsigned long long)rec->start);
7918 ret = fixup_extent_flags(root->fs_info, rec);
7927 * Although it's not a extent ref's problem, we reuse this
7928 * routine for error reporting.
7929 * No repair function yet.
7931 if (rec->crossing_stripes) {
7933 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7934 rec->start, rec->start + rec->max_size);
7939 if (rec->wrong_chunk_type) {
7941 "bad extent [%llu, %llu), type mismatch with chunk\n",
7942 rec->start, rec->start + rec->max_size);
7947 remove_cache_extent(extent_cache, cache);
7948 free_all_extent_backrefs(rec);
7949 if (!init_extent_tree && repair && (!cur_err || fixed))
7950 clear_extent_dirty(root->fs_info->excluded_extents,
7952 rec->start + rec->max_size - 1,
7958 if (ret && ret != -EAGAIN) {
7959 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7962 struct btrfs_trans_handle *trans;
7964 root = root->fs_info->extent_root;
7965 trans = btrfs_start_transaction(root, 1);
7966 if (IS_ERR(trans)) {
7967 ret = PTR_ERR(trans);
7971 btrfs_fix_block_accounting(trans, root);
7972 ret = btrfs_commit_transaction(trans, root);
7977 fprintf(stderr, "repaired damaged extent references\n");
7983 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7987 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7988 stripe_size = length;
7989 stripe_size /= num_stripes;
7990 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7991 stripe_size = length * 2;
7992 stripe_size /= num_stripes;
7993 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7994 stripe_size = length;
7995 stripe_size /= (num_stripes - 1);
7996 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7997 stripe_size = length;
7998 stripe_size /= (num_stripes - 2);
8000 stripe_size = length;
8006 * Check the chunk with its block group/dev list ref:
8007 * Return 0 if all refs seems valid.
8008 * Return 1 if part of refs seems valid, need later check for rebuild ref
8009 * like missing block group and needs to search extent tree to rebuild them.
8010 * Return -1 if essential refs are missing and unable to rebuild.
8012 static int check_chunk_refs(struct chunk_record *chunk_rec,
8013 struct block_group_tree *block_group_cache,
8014 struct device_extent_tree *dev_extent_cache,
8017 struct cache_extent *block_group_item;
8018 struct block_group_record *block_group_rec;
8019 struct cache_extent *dev_extent_item;
8020 struct device_extent_record *dev_extent_rec;
8024 int metadump_v2 = 0;
8028 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8031 if (block_group_item) {
8032 block_group_rec = container_of(block_group_item,
8033 struct block_group_record,
8035 if (chunk_rec->length != block_group_rec->offset ||
8036 chunk_rec->offset != block_group_rec->objectid ||
8038 chunk_rec->type_flags != block_group_rec->flags)) {
8041 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8042 chunk_rec->objectid,
8047 chunk_rec->type_flags,
8048 block_group_rec->objectid,
8049 block_group_rec->type,
8050 block_group_rec->offset,
8051 block_group_rec->offset,
8052 block_group_rec->objectid,
8053 block_group_rec->flags);
8056 list_del_init(&block_group_rec->list);
8057 chunk_rec->bg_rec = block_group_rec;
8062 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8063 chunk_rec->objectid,
8068 chunk_rec->type_flags);
8075 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8076 chunk_rec->num_stripes);
8077 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8078 devid = chunk_rec->stripes[i].devid;
8079 offset = chunk_rec->stripes[i].offset;
8080 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8081 devid, offset, length);
8082 if (dev_extent_item) {
8083 dev_extent_rec = container_of(dev_extent_item,
8084 struct device_extent_record,
8086 if (dev_extent_rec->objectid != devid ||
8087 dev_extent_rec->offset != offset ||
8088 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8089 dev_extent_rec->length != length) {
8092 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8093 chunk_rec->objectid,
8096 chunk_rec->stripes[i].devid,
8097 chunk_rec->stripes[i].offset,
8098 dev_extent_rec->objectid,
8099 dev_extent_rec->offset,
8100 dev_extent_rec->length);
8103 list_move(&dev_extent_rec->chunk_list,
8104 &chunk_rec->dextents);
8109 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8110 chunk_rec->objectid,
8113 chunk_rec->stripes[i].devid,
8114 chunk_rec->stripes[i].offset);
8121 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8122 int check_chunks(struct cache_tree *chunk_cache,
8123 struct block_group_tree *block_group_cache,
8124 struct device_extent_tree *dev_extent_cache,
8125 struct list_head *good, struct list_head *bad,
8126 struct list_head *rebuild, int silent)
8128 struct cache_extent *chunk_item;
8129 struct chunk_record *chunk_rec;
8130 struct block_group_record *bg_rec;
8131 struct device_extent_record *dext_rec;
8135 chunk_item = first_cache_extent(chunk_cache);
8136 while (chunk_item) {
8137 chunk_rec = container_of(chunk_item, struct chunk_record,
8139 err = check_chunk_refs(chunk_rec, block_group_cache,
8140 dev_extent_cache, silent);
8143 if (err == 0 && good)
8144 list_add_tail(&chunk_rec->list, good);
8145 if (err > 0 && rebuild)
8146 list_add_tail(&chunk_rec->list, rebuild);
8148 list_add_tail(&chunk_rec->list, bad);
8149 chunk_item = next_cache_extent(chunk_item);
8152 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8155 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8163 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8167 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8178 static int check_device_used(struct device_record *dev_rec,
8179 struct device_extent_tree *dext_cache)
8181 struct cache_extent *cache;
8182 struct device_extent_record *dev_extent_rec;
8185 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8187 dev_extent_rec = container_of(cache,
8188 struct device_extent_record,
8190 if (dev_extent_rec->objectid != dev_rec->devid)
8193 list_del_init(&dev_extent_rec->device_list);
8194 total_byte += dev_extent_rec->length;
8195 cache = next_cache_extent(cache);
8198 if (total_byte != dev_rec->byte_used) {
8200 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8201 total_byte, dev_rec->byte_used, dev_rec->objectid,
8202 dev_rec->type, dev_rec->offset);
8209 /* check btrfs_dev_item -> btrfs_dev_extent */
8210 static int check_devices(struct rb_root *dev_cache,
8211 struct device_extent_tree *dev_extent_cache)
8213 struct rb_node *dev_node;
8214 struct device_record *dev_rec;
8215 struct device_extent_record *dext_rec;
8219 dev_node = rb_first(dev_cache);
8221 dev_rec = container_of(dev_node, struct device_record, node);
8222 err = check_device_used(dev_rec, dev_extent_cache);
8226 dev_node = rb_next(dev_node);
8228 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8231 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8232 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8239 static int add_root_item_to_list(struct list_head *head,
8240 u64 objectid, u64 bytenr, u64 last_snapshot,
8241 u8 level, u8 drop_level,
8242 int level_size, struct btrfs_key *drop_key)
8245 struct root_item_record *ri_rec;
8246 ri_rec = malloc(sizeof(*ri_rec));
8249 ri_rec->bytenr = bytenr;
8250 ri_rec->objectid = objectid;
8251 ri_rec->level = level;
8252 ri_rec->level_size = level_size;
8253 ri_rec->drop_level = drop_level;
8254 ri_rec->last_snapshot = last_snapshot;
8256 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8257 list_add_tail(&ri_rec->list, head);
8262 static void free_root_item_list(struct list_head *list)
8264 struct root_item_record *ri_rec;
8266 while (!list_empty(list)) {
8267 ri_rec = list_first_entry(list, struct root_item_record,
8269 list_del_init(&ri_rec->list);
8274 static int deal_root_from_list(struct list_head *list,
8275 struct btrfs_root *root,
8276 struct block_info *bits,
8278 struct cache_tree *pending,
8279 struct cache_tree *seen,
8280 struct cache_tree *reada,
8281 struct cache_tree *nodes,
8282 struct cache_tree *extent_cache,
8283 struct cache_tree *chunk_cache,
8284 struct rb_root *dev_cache,
8285 struct block_group_tree *block_group_cache,
8286 struct device_extent_tree *dev_extent_cache)
8291 while (!list_empty(list)) {
8292 struct root_item_record *rec;
8293 struct extent_buffer *buf;
8294 rec = list_entry(list->next,
8295 struct root_item_record, list);
8297 buf = read_tree_block(root->fs_info->tree_root,
8298 rec->bytenr, rec->level_size, 0);
8299 if (!extent_buffer_uptodate(buf)) {
8300 free_extent_buffer(buf);
8304 add_root_to_pending(buf, extent_cache, pending,
8305 seen, nodes, rec->objectid);
8307 * To rebuild extent tree, we need deal with snapshot
8308 * one by one, otherwise we deal with node firstly which
8309 * can maximize readahead.
8312 ret = run_next_block(root, bits, bits_nr, &last,
8313 pending, seen, reada, nodes,
8314 extent_cache, chunk_cache,
8315 dev_cache, block_group_cache,
8316 dev_extent_cache, rec);
8320 free_extent_buffer(buf);
8321 list_del(&rec->list);
8327 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8328 reada, nodes, extent_cache, chunk_cache,
8329 dev_cache, block_group_cache,
8330 dev_extent_cache, NULL);
8340 static int check_chunks_and_extents(struct btrfs_root *root)
8342 struct rb_root dev_cache;
8343 struct cache_tree chunk_cache;
8344 struct block_group_tree block_group_cache;
8345 struct device_extent_tree dev_extent_cache;
8346 struct cache_tree extent_cache;
8347 struct cache_tree seen;
8348 struct cache_tree pending;
8349 struct cache_tree reada;
8350 struct cache_tree nodes;
8351 struct extent_io_tree excluded_extents;
8352 struct cache_tree corrupt_blocks;
8353 struct btrfs_path path;
8354 struct btrfs_key key;
8355 struct btrfs_key found_key;
8357 struct block_info *bits;
8359 struct extent_buffer *leaf;
8361 struct btrfs_root_item ri;
8362 struct list_head dropping_trees;
8363 struct list_head normal_trees;
8364 struct btrfs_root *root1;
8369 dev_cache = RB_ROOT;
8370 cache_tree_init(&chunk_cache);
8371 block_group_tree_init(&block_group_cache);
8372 device_extent_tree_init(&dev_extent_cache);
8374 cache_tree_init(&extent_cache);
8375 cache_tree_init(&seen);
8376 cache_tree_init(&pending);
8377 cache_tree_init(&nodes);
8378 cache_tree_init(&reada);
8379 cache_tree_init(&corrupt_blocks);
8380 extent_io_tree_init(&excluded_extents);
8381 INIT_LIST_HEAD(&dropping_trees);
8382 INIT_LIST_HEAD(&normal_trees);
8385 root->fs_info->excluded_extents = &excluded_extents;
8386 root->fs_info->fsck_extent_cache = &extent_cache;
8387 root->fs_info->free_extent_hook = free_extent_hook;
8388 root->fs_info->corrupt_blocks = &corrupt_blocks;
8392 bits = malloc(bits_nr * sizeof(struct block_info));
8398 if (ctx.progress_enabled) {
8399 ctx.tp = TASK_EXTENTS;
8400 task_start(ctx.info);
8404 root1 = root->fs_info->tree_root;
8405 level = btrfs_header_level(root1->node);
8406 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8407 root1->node->start, 0, level, 0,
8408 root1->nodesize, NULL);
8411 root1 = root->fs_info->chunk_root;
8412 level = btrfs_header_level(root1->node);
8413 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8414 root1->node->start, 0, level, 0,
8415 root1->nodesize, NULL);
8418 btrfs_init_path(&path);
8421 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8422 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8427 leaf = path.nodes[0];
8428 slot = path.slots[0];
8429 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8430 ret = btrfs_next_leaf(root, &path);
8433 leaf = path.nodes[0];
8434 slot = path.slots[0];
8436 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8437 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8438 unsigned long offset;
8441 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8442 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8443 last_snapshot = btrfs_root_last_snapshot(&ri);
8444 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8445 level = btrfs_root_level(&ri);
8446 level_size = root->nodesize;
8447 ret = add_root_item_to_list(&normal_trees,
8449 btrfs_root_bytenr(&ri),
8450 last_snapshot, level,
8451 0, level_size, NULL);
8455 level = btrfs_root_level(&ri);
8456 level_size = root->nodesize;
8457 objectid = found_key.objectid;
8458 btrfs_disk_key_to_cpu(&found_key,
8460 ret = add_root_item_to_list(&dropping_trees,
8462 btrfs_root_bytenr(&ri),
8463 last_snapshot, level,
8465 level_size, &found_key);
8472 btrfs_release_path(&path);
8475 * check_block can return -EAGAIN if it fixes something, please keep
8476 * this in mind when dealing with return values from these functions, if
8477 * we get -EAGAIN we want to fall through and restart the loop.
8479 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8480 &seen, &reada, &nodes, &extent_cache,
8481 &chunk_cache, &dev_cache, &block_group_cache,
8488 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8489 &pending, &seen, &reada, &nodes,
8490 &extent_cache, &chunk_cache, &dev_cache,
8491 &block_group_cache, &dev_extent_cache);
8498 ret = check_chunks(&chunk_cache, &block_group_cache,
8499 &dev_extent_cache, NULL, NULL, NULL, 0);
8506 ret = check_extent_refs(root, &extent_cache);
8513 ret = check_devices(&dev_cache, &dev_extent_cache);
8518 task_stop(ctx.info);
8520 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8521 extent_io_tree_cleanup(&excluded_extents);
8522 root->fs_info->fsck_extent_cache = NULL;
8523 root->fs_info->free_extent_hook = NULL;
8524 root->fs_info->corrupt_blocks = NULL;
8525 root->fs_info->excluded_extents = NULL;
8528 free_chunk_cache_tree(&chunk_cache);
8529 free_device_cache_tree(&dev_cache);
8530 free_block_group_tree(&block_group_cache);
8531 free_device_extent_tree(&dev_extent_cache);
8532 free_extent_cache_tree(&seen);
8533 free_extent_cache_tree(&pending);
8534 free_extent_cache_tree(&reada);
8535 free_extent_cache_tree(&nodes);
8538 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8539 free_extent_cache_tree(&seen);
8540 free_extent_cache_tree(&pending);
8541 free_extent_cache_tree(&reada);
8542 free_extent_cache_tree(&nodes);
8543 free_chunk_cache_tree(&chunk_cache);
8544 free_block_group_tree(&block_group_cache);
8545 free_device_cache_tree(&dev_cache);
8546 free_device_extent_tree(&dev_extent_cache);
8547 free_extent_record_cache(root->fs_info, &extent_cache);
8548 free_root_item_list(&normal_trees);
8549 free_root_item_list(&dropping_trees);
8550 extent_io_tree_cleanup(&excluded_extents);
8555 * Check backrefs of a tree block given by @bytenr or @eb.
8557 * @root: the root containing the @bytenr or @eb
8558 * @eb: tree block extent buffer, can be NULL
8559 * @bytenr: bytenr of the tree block to search
8560 * @level: tree level of the tree block
8561 * @owner: owner of the tree block
8563 * Return >0 for any error found and output error message
8564 * Return 0 for no error found
8566 static int check_tree_block_ref(struct btrfs_root *root,
8567 struct extent_buffer *eb, u64 bytenr,
8568 int level, u64 owner)
8570 struct btrfs_key key;
8571 struct btrfs_root *extent_root = root->fs_info->extent_root;
8572 struct btrfs_path path;
8573 struct btrfs_extent_item *ei;
8574 struct btrfs_extent_inline_ref *iref;
8575 struct extent_buffer *leaf;
8581 u32 nodesize = root->nodesize;
8588 btrfs_init_path(&path);
8589 key.objectid = bytenr;
8590 if (btrfs_fs_incompat(root->fs_info,
8591 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8592 key.type = BTRFS_METADATA_ITEM_KEY;
8594 key.type = BTRFS_EXTENT_ITEM_KEY;
8595 key.offset = (u64)-1;
8597 /* Search for the backref in extent tree */
8598 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8600 err |= BACKREF_MISSING;
8603 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8605 err |= BACKREF_MISSING;
8609 leaf = path.nodes[0];
8610 slot = path.slots[0];
8611 btrfs_item_key_to_cpu(leaf, &key, slot);
8613 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8615 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8616 skinny_level = (int)key.offset;
8617 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8619 struct btrfs_tree_block_info *info;
8621 info = (struct btrfs_tree_block_info *)(ei + 1);
8622 skinny_level = btrfs_tree_block_level(leaf, info);
8623 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8630 if (!(btrfs_extent_flags(leaf, ei) &
8631 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8633 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8634 key.objectid, nodesize,
8635 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8636 err = BACKREF_MISMATCH;
8638 header_gen = btrfs_header_generation(eb);
8639 extent_gen = btrfs_extent_generation(leaf, ei);
8640 if (header_gen != extent_gen) {
8642 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8643 key.objectid, nodesize, header_gen,
8645 err = BACKREF_MISMATCH;
8647 if (level != skinny_level) {
8649 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8650 key.objectid, nodesize, level, skinny_level);
8651 err = BACKREF_MISMATCH;
8653 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8655 "extent[%llu %u] is referred by other roots than %llu",
8656 key.objectid, nodesize, root->objectid);
8657 err = BACKREF_MISMATCH;
8662 * Iterate the extent/metadata item to find the exact backref
8664 item_size = btrfs_item_size_nr(leaf, slot);
8665 ptr = (unsigned long)iref;
8666 end = (unsigned long)ei + item_size;
8668 iref = (struct btrfs_extent_inline_ref *)ptr;
8669 type = btrfs_extent_inline_ref_type(leaf, iref);
8670 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8672 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8673 (offset == root->objectid || offset == owner)) {
8675 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8676 /* Check if the backref points to valid referencer */
8677 found_ref = !check_tree_block_ref(root, NULL, offset,
8683 ptr += btrfs_extent_inline_ref_size(type);
8687 * Inlined extent item doesn't have what we need, check
8688 * TREE_BLOCK_REF_KEY
8691 btrfs_release_path(&path);
8692 key.objectid = bytenr;
8693 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8694 key.offset = root->objectid;
8696 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8701 err |= BACKREF_MISSING;
8703 btrfs_release_path(&path);
8704 if (eb && (err & BACKREF_MISSING))
8705 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8706 bytenr, nodesize, owner, level);
8711 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8713 * Return >0 any error found and output error message
8714 * Return 0 for no error found
8716 static int check_extent_data_item(struct btrfs_root *root,
8717 struct extent_buffer *eb, int slot)
8719 struct btrfs_file_extent_item *fi;
8720 struct btrfs_path path;
8721 struct btrfs_root *extent_root = root->fs_info->extent_root;
8722 struct btrfs_key fi_key;
8723 struct btrfs_key dbref_key;
8724 struct extent_buffer *leaf;
8725 struct btrfs_extent_item *ei;
8726 struct btrfs_extent_inline_ref *iref;
8727 struct btrfs_extent_data_ref *dref;
8729 u64 file_extent_gen;
8732 u64 extent_num_bytes;
8740 int found_dbackref = 0;
8744 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8745 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8746 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8748 /* Nothing to check for hole and inline data extents */
8749 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8750 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8753 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8754 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8755 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8757 /* Check unaligned disk_num_bytes and num_bytes */
8758 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8760 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8761 fi_key.objectid, fi_key.offset, disk_num_bytes,
8763 err |= BYTES_UNALIGNED;
8765 data_bytes_allocated += disk_num_bytes;
8767 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8769 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8770 fi_key.objectid, fi_key.offset, extent_num_bytes,
8772 err |= BYTES_UNALIGNED;
8774 data_bytes_referenced += extent_num_bytes;
8776 owner = btrfs_header_owner(eb);
8778 /* Check the extent item of the file extent in extent tree */
8779 btrfs_init_path(&path);
8780 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8781 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8782 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8784 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8786 err |= BACKREF_MISSING;
8790 leaf = path.nodes[0];
8791 slot = path.slots[0];
8792 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8794 extent_flags = btrfs_extent_flags(leaf, ei);
8795 extent_gen = btrfs_extent_generation(leaf, ei);
8797 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8799 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8800 disk_bytenr, disk_num_bytes,
8801 BTRFS_EXTENT_FLAG_DATA);
8802 err |= BACKREF_MISMATCH;
8805 if (file_extent_gen < extent_gen) {
8807 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8808 disk_bytenr, disk_num_bytes, file_extent_gen,
8810 err |= BACKREF_MISMATCH;
8813 /* Check data backref inside that extent item */
8814 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8815 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8816 ptr = (unsigned long)iref;
8817 end = (unsigned long)ei + item_size;
8819 iref = (struct btrfs_extent_inline_ref *)ptr;
8820 type = btrfs_extent_inline_ref_type(leaf, iref);
8821 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8823 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8824 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8825 if (ref_root == owner || ref_root == root->objectid)
8827 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8828 found_dbackref = !check_tree_block_ref(root, NULL,
8829 btrfs_extent_inline_ref_offset(leaf, iref),
8835 ptr += btrfs_extent_inline_ref_size(type);
8838 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8839 if (!found_dbackref) {
8840 btrfs_release_path(&path);
8842 btrfs_init_path(&path);
8843 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8844 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8845 dbref_key.offset = hash_extent_data_ref(root->objectid,
8846 fi_key.objectid, fi_key.offset);
8848 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8849 &dbref_key, &path, 0, 0);
8854 if (!found_dbackref)
8855 err |= BACKREF_MISSING;
8857 btrfs_release_path(&path);
8858 if (err & BACKREF_MISSING) {
8859 error("data extent[%llu %llu] backref lost",
8860 disk_bytenr, disk_num_bytes);
8866 * Get real tree block level for the case like shared block
8867 * Return >= 0 as tree level
8868 * Return <0 for error
8870 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8872 struct extent_buffer *eb;
8873 struct btrfs_path path;
8874 struct btrfs_key key;
8875 struct btrfs_extent_item *ei;
8878 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8883 /* Search extent tree for extent generation and level */
8884 key.objectid = bytenr;
8885 key.type = BTRFS_METADATA_ITEM_KEY;
8886 key.offset = (u64)-1;
8888 btrfs_init_path(&path);
8889 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8892 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8900 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8901 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8902 struct btrfs_extent_item);
8903 flags = btrfs_extent_flags(path.nodes[0], ei);
8904 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8909 /* Get transid for later read_tree_block() check */
8910 transid = btrfs_extent_generation(path.nodes[0], ei);
8912 /* Get backref level as one source */
8913 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8914 backref_level = key.offset;
8916 struct btrfs_tree_block_info *info;
8918 info = (struct btrfs_tree_block_info *)(ei + 1);
8919 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8921 btrfs_release_path(&path);
8923 /* Get level from tree block as an alternative source */
8924 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8925 if (!extent_buffer_uptodate(eb)) {
8926 free_extent_buffer(eb);
8929 header_level = btrfs_header_level(eb);
8930 free_extent_buffer(eb);
8932 if (header_level != backref_level)
8934 return header_level;
8937 btrfs_release_path(&path);
8942 * Check if a tree block backref is valid (points to a valid tree block)
8943 * if level == -1, level will be resolved
8944 * Return >0 for any error found and print error message
8946 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8947 u64 bytenr, int level)
8949 struct btrfs_root *root;
8950 struct btrfs_key key;
8951 struct btrfs_path path;
8952 struct extent_buffer *eb;
8953 struct extent_buffer *node;
8954 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8958 /* Query level for level == -1 special case */
8960 level = query_tree_block_level(fs_info, bytenr);
8962 err |= REFERENCER_MISSING;
8966 key.objectid = root_id;
8967 key.type = BTRFS_ROOT_ITEM_KEY;
8968 key.offset = (u64)-1;
8970 root = btrfs_read_fs_root(fs_info, &key);
8972 err |= REFERENCER_MISSING;
8976 /* Read out the tree block to get item/node key */
8977 eb = read_tree_block(root, bytenr, root->nodesize, 0);
8978 if (!extent_buffer_uptodate(eb)) {
8979 err |= REFERENCER_MISSING;
8980 free_extent_buffer(eb);
8984 /* Empty tree, no need to check key */
8985 if (!btrfs_header_nritems(eb) && !level) {
8986 free_extent_buffer(eb);
8991 btrfs_node_key_to_cpu(eb, &key, 0);
8993 btrfs_item_key_to_cpu(eb, &key, 0);
8995 free_extent_buffer(eb);
8997 btrfs_init_path(&path);
8998 /* Search with the first key, to ensure we can reach it */
8999 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9001 err |= REFERENCER_MISSING;
9005 node = path.nodes[level];
9006 if (btrfs_header_bytenr(node) != bytenr) {
9008 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9009 bytenr, nodesize, bytenr,
9010 btrfs_header_bytenr(node));
9011 err |= REFERENCER_MISMATCH;
9013 if (btrfs_header_level(node) != level) {
9015 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9016 bytenr, nodesize, level,
9017 btrfs_header_level(node));
9018 err |= REFERENCER_MISMATCH;
9022 btrfs_release_path(&path);
9024 if (err & REFERENCER_MISSING) {
9026 error("extent [%llu %d] lost referencer (owner: %llu)",
9027 bytenr, nodesize, root_id);
9030 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9031 bytenr, nodesize, root_id, level);
9038 * Check referencer for shared block backref
9039 * If level == -1, this function will resolve the level.
9041 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9042 u64 parent, u64 bytenr, int level)
9044 struct extent_buffer *eb;
9045 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9047 int found_parent = 0;
9050 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9051 if (!extent_buffer_uptodate(eb))
9055 level = query_tree_block_level(fs_info, bytenr);
9059 if (level + 1 != btrfs_header_level(eb))
9062 nr = btrfs_header_nritems(eb);
9063 for (i = 0; i < nr; i++) {
9064 if (bytenr == btrfs_node_blockptr(eb, i)) {
9070 free_extent_buffer(eb);
9071 if (!found_parent) {
9073 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9074 bytenr, nodesize, parent, level);
9075 return REFERENCER_MISSING;
9080 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9081 struct btrfs_root *root, int overwrite)
9083 struct extent_buffer *c;
9084 struct extent_buffer *old = root->node;
9087 struct btrfs_disk_key disk_key = {0,0,0};
9093 extent_buffer_get(c);
9096 c = btrfs_alloc_free_block(trans, root,
9098 root->root_key.objectid,
9099 &disk_key, level, 0, 0);
9102 extent_buffer_get(c);
9106 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9107 btrfs_set_header_level(c, level);
9108 btrfs_set_header_bytenr(c, c->start);
9109 btrfs_set_header_generation(c, trans->transid);
9110 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9111 btrfs_set_header_owner(c, root->root_key.objectid);
9113 write_extent_buffer(c, root->fs_info->fsid,
9114 btrfs_header_fsid(), BTRFS_FSID_SIZE);
9116 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9117 btrfs_header_chunk_tree_uuid(c),
9120 btrfs_mark_buffer_dirty(c);
9122 * this case can happen in the following case:
9124 * 1.overwrite previous root.
9126 * 2.reinit reloc data root, this is because we skip pin
9127 * down reloc data tree before which means we can allocate
9128 * same block bytenr here.
9130 if (old->start == c->start) {
9131 btrfs_set_root_generation(&root->root_item,
9133 root->root_item.level = btrfs_header_level(root->node);
9134 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9135 &root->root_key, &root->root_item);
9137 free_extent_buffer(c);
9141 free_extent_buffer(old);
9143 add_root_to_dirty_list(root);
9147 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9148 struct extent_buffer *eb, int tree_root)
9150 struct extent_buffer *tmp;
9151 struct btrfs_root_item *ri;
9152 struct btrfs_key key;
9155 int level = btrfs_header_level(eb);
9161 * If we have pinned this block before, don't pin it again.
9162 * This can not only avoid forever loop with broken filesystem
9163 * but also give us some speedups.
9165 if (test_range_bit(&fs_info->pinned_extents, eb->start,
9166 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9169 btrfs_pin_extent(fs_info, eb->start, eb->len);
9171 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9172 nritems = btrfs_header_nritems(eb);
9173 for (i = 0; i < nritems; i++) {
9175 btrfs_item_key_to_cpu(eb, &key, i);
9176 if (key.type != BTRFS_ROOT_ITEM_KEY)
9178 /* Skip the extent root and reloc roots */
9179 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9180 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9181 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9183 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9184 bytenr = btrfs_disk_root_bytenr(eb, ri);
9187 * If at any point we start needing the real root we
9188 * will have to build a stump root for the root we are
9189 * in, but for now this doesn't actually use the root so
9190 * just pass in extent_root.
9192 tmp = read_tree_block(fs_info->extent_root, bytenr,
9194 if (!extent_buffer_uptodate(tmp)) {
9195 fprintf(stderr, "Error reading root block\n");
9198 ret = pin_down_tree_blocks(fs_info, tmp, 0);
9199 free_extent_buffer(tmp);
9203 bytenr = btrfs_node_blockptr(eb, i);
9205 /* If we aren't the tree root don't read the block */
9206 if (level == 1 && !tree_root) {
9207 btrfs_pin_extent(fs_info, bytenr, nodesize);
9211 tmp = read_tree_block(fs_info->extent_root, bytenr,
9213 if (!extent_buffer_uptodate(tmp)) {
9214 fprintf(stderr, "Error reading tree block\n");
9217 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9218 free_extent_buffer(tmp);
9227 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9231 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9235 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9238 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9240 struct btrfs_block_group_cache *cache;
9241 struct btrfs_path *path;
9242 struct extent_buffer *leaf;
9243 struct btrfs_chunk *chunk;
9244 struct btrfs_key key;
9248 path = btrfs_alloc_path();
9253 key.type = BTRFS_CHUNK_ITEM_KEY;
9256 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9258 btrfs_free_path(path);
9263 * We do this in case the block groups were screwed up and had alloc
9264 * bits that aren't actually set on the chunks. This happens with
9265 * restored images every time and could happen in real life I guess.
9267 fs_info->avail_data_alloc_bits = 0;
9268 fs_info->avail_metadata_alloc_bits = 0;
9269 fs_info->avail_system_alloc_bits = 0;
9271 /* First we need to create the in-memory block groups */
9273 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9274 ret = btrfs_next_leaf(fs_info->chunk_root, path);
9276 btrfs_free_path(path);
9284 leaf = path->nodes[0];
9285 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9286 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9291 chunk = btrfs_item_ptr(leaf, path->slots[0],
9292 struct btrfs_chunk);
9293 btrfs_add_block_group(fs_info, 0,
9294 btrfs_chunk_type(leaf, chunk),
9295 key.objectid, key.offset,
9296 btrfs_chunk_length(leaf, chunk));
9297 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9298 key.offset + btrfs_chunk_length(leaf, chunk),
9304 cache = btrfs_lookup_first_block_group(fs_info, start);
9308 start = cache->key.objectid + cache->key.offset;
9311 btrfs_free_path(path);
9315 static int reset_balance(struct btrfs_trans_handle *trans,
9316 struct btrfs_fs_info *fs_info)
9318 struct btrfs_root *root = fs_info->tree_root;
9319 struct btrfs_path *path;
9320 struct extent_buffer *leaf;
9321 struct btrfs_key key;
9322 int del_slot, del_nr = 0;
9326 path = btrfs_alloc_path();
9330 key.objectid = BTRFS_BALANCE_OBJECTID;
9331 key.type = BTRFS_BALANCE_ITEM_KEY;
9334 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9339 goto reinit_data_reloc;
9344 ret = btrfs_del_item(trans, root, path);
9347 btrfs_release_path(path);
9349 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9350 key.type = BTRFS_ROOT_ITEM_KEY;
9353 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9357 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9362 ret = btrfs_del_items(trans, root, path,
9369 btrfs_release_path(path);
9372 ret = btrfs_search_slot(trans, root, &key, path,
9379 leaf = path->nodes[0];
9380 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9381 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9383 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9388 del_slot = path->slots[0];
9397 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9401 btrfs_release_path(path);
9404 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9405 key.type = BTRFS_ROOT_ITEM_KEY;
9406 key.offset = (u64)-1;
9407 root = btrfs_read_fs_root(fs_info, &key);
9409 fprintf(stderr, "Error reading data reloc tree\n");
9410 ret = PTR_ERR(root);
9413 record_root_in_trans(trans, root);
9414 ret = btrfs_fsck_reinit_root(trans, root, 0);
9417 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9419 btrfs_free_path(path);
9423 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9424 struct btrfs_fs_info *fs_info)
9430 * The only reason we don't do this is because right now we're just
9431 * walking the trees we find and pinning down their bytes, we don't look
9432 * at any of the leaves. In order to do mixed groups we'd have to check
9433 * the leaves of any fs roots and pin down the bytes for any file
9434 * extents we find. Not hard but why do it if we don't have to?
9436 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9437 fprintf(stderr, "We don't support re-initing the extent tree "
9438 "for mixed block groups yet, please notify a btrfs "
9439 "developer you want to do this so they can add this "
9440 "functionality.\n");
9445 * first we need to walk all of the trees except the extent tree and pin
9446 * down the bytes that are in use so we don't overwrite any existing
9449 ret = pin_metadata_blocks(fs_info);
9451 fprintf(stderr, "error pinning down used bytes\n");
9456 * Need to drop all the block groups since we're going to recreate all
9459 btrfs_free_block_groups(fs_info);
9460 ret = reset_block_groups(fs_info);
9462 fprintf(stderr, "error resetting the block groups\n");
9466 /* Ok we can allocate now, reinit the extent root */
9467 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9469 fprintf(stderr, "extent root initialization failed\n");
9471 * When the transaction code is updated we should end the
9472 * transaction, but for now progs only knows about commit so
9473 * just return an error.
9479 * Now we have all the in-memory block groups setup so we can make
9480 * allocations properly, and the metadata we care about is safe since we
9481 * pinned all of it above.
9484 struct btrfs_block_group_cache *cache;
9486 cache = btrfs_lookup_first_block_group(fs_info, start);
9489 start = cache->key.objectid + cache->key.offset;
9490 ret = btrfs_insert_item(trans, fs_info->extent_root,
9491 &cache->key, &cache->item,
9492 sizeof(cache->item));
9494 fprintf(stderr, "Error adding block group\n");
9497 btrfs_extent_post_op(trans, fs_info->extent_root);
9500 ret = reset_balance(trans, fs_info);
9502 fprintf(stderr, "error resetting the pending balance\n");
9507 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9509 struct btrfs_path *path;
9510 struct btrfs_trans_handle *trans;
9511 struct btrfs_key key;
9514 printf("Recowing metadata block %llu\n", eb->start);
9515 key.objectid = btrfs_header_owner(eb);
9516 key.type = BTRFS_ROOT_ITEM_KEY;
9517 key.offset = (u64)-1;
9519 root = btrfs_read_fs_root(root->fs_info, &key);
9521 fprintf(stderr, "Couldn't find owner root %llu\n",
9523 return PTR_ERR(root);
9526 path = btrfs_alloc_path();
9530 trans = btrfs_start_transaction(root, 1);
9531 if (IS_ERR(trans)) {
9532 btrfs_free_path(path);
9533 return PTR_ERR(trans);
9536 path->lowest_level = btrfs_header_level(eb);
9537 if (path->lowest_level)
9538 btrfs_node_key_to_cpu(eb, &key, 0);
9540 btrfs_item_key_to_cpu(eb, &key, 0);
9542 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9543 btrfs_commit_transaction(trans, root);
9544 btrfs_free_path(path);
9548 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9550 struct btrfs_path *path;
9551 struct btrfs_trans_handle *trans;
9552 struct btrfs_key key;
9555 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9556 bad->key.type, bad->key.offset);
9557 key.objectid = bad->root_id;
9558 key.type = BTRFS_ROOT_ITEM_KEY;
9559 key.offset = (u64)-1;
9561 root = btrfs_read_fs_root(root->fs_info, &key);
9563 fprintf(stderr, "Couldn't find owner root %llu\n",
9565 return PTR_ERR(root);
9568 path = btrfs_alloc_path();
9572 trans = btrfs_start_transaction(root, 1);
9573 if (IS_ERR(trans)) {
9574 btrfs_free_path(path);
9575 return PTR_ERR(trans);
9578 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9584 ret = btrfs_del_item(trans, root, path);
9586 btrfs_commit_transaction(trans, root);
9587 btrfs_free_path(path);
9591 static int zero_log_tree(struct btrfs_root *root)
9593 struct btrfs_trans_handle *trans;
9596 trans = btrfs_start_transaction(root, 1);
9597 if (IS_ERR(trans)) {
9598 ret = PTR_ERR(trans);
9601 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9602 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9603 ret = btrfs_commit_transaction(trans, root);
9607 static int populate_csum(struct btrfs_trans_handle *trans,
9608 struct btrfs_root *csum_root, char *buf, u64 start,
9615 while (offset < len) {
9616 sectorsize = csum_root->sectorsize;
9617 ret = read_extent_data(csum_root, buf, start + offset,
9621 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9622 start + offset, buf, sectorsize);
9625 offset += sectorsize;
9630 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9631 struct btrfs_root *csum_root,
9632 struct btrfs_root *cur_root)
9634 struct btrfs_path *path;
9635 struct btrfs_key key;
9636 struct extent_buffer *node;
9637 struct btrfs_file_extent_item *fi;
9644 path = btrfs_alloc_path();
9647 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9657 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9660 /* Iterate all regular file extents and fill its csum */
9662 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9664 if (key.type != BTRFS_EXTENT_DATA_KEY)
9666 node = path->nodes[0];
9667 slot = path->slots[0];
9668 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9669 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9671 start = btrfs_file_extent_disk_bytenr(node, fi);
9672 len = btrfs_file_extent_disk_num_bytes(node, fi);
9674 ret = populate_csum(trans, csum_root, buf, start, len);
9681 * TODO: if next leaf is corrupted, jump to nearest next valid
9684 ret = btrfs_next_item(cur_root, path);
9694 btrfs_free_path(path);
9699 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9700 struct btrfs_root *csum_root)
9702 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9703 struct btrfs_path *path;
9704 struct btrfs_root *tree_root = fs_info->tree_root;
9705 struct btrfs_root *cur_root;
9706 struct extent_buffer *node;
9707 struct btrfs_key key;
9711 path = btrfs_alloc_path();
9715 key.objectid = BTRFS_FS_TREE_OBJECTID;
9717 key.type = BTRFS_ROOT_ITEM_KEY;
9719 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9728 node = path->nodes[0];
9729 slot = path->slots[0];
9730 btrfs_item_key_to_cpu(node, &key, slot);
9731 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9733 if (key.type != BTRFS_ROOT_ITEM_KEY)
9735 if (!is_fstree(key.objectid))
9737 key.offset = (u64)-1;
9739 cur_root = btrfs_read_fs_root(fs_info, &key);
9740 if (IS_ERR(cur_root) || !cur_root) {
9741 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9745 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9750 ret = btrfs_next_item(tree_root, path);
9760 btrfs_free_path(path);
9764 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9765 struct btrfs_root *csum_root)
9767 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9768 struct btrfs_path *path;
9769 struct btrfs_extent_item *ei;
9770 struct extent_buffer *leaf;
9772 struct btrfs_key key;
9775 path = btrfs_alloc_path();
9780 key.type = BTRFS_EXTENT_ITEM_KEY;
9783 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9785 btrfs_free_path(path);
9789 buf = malloc(csum_root->sectorsize);
9791 btrfs_free_path(path);
9796 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9797 ret = btrfs_next_leaf(extent_root, path);
9805 leaf = path->nodes[0];
9807 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9808 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9813 ei = btrfs_item_ptr(leaf, path->slots[0],
9814 struct btrfs_extent_item);
9815 if (!(btrfs_extent_flags(leaf, ei) &
9816 BTRFS_EXTENT_FLAG_DATA)) {
9821 ret = populate_csum(trans, csum_root, buf, key.objectid,
9828 btrfs_free_path(path);
9834 * Recalculate the csum and put it into the csum tree.
9836 * Extent tree init will wipe out all the extent info, so in that case, we
9837 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9838 * will use fs/subvol trees to init the csum tree.
9840 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9841 struct btrfs_root *csum_root,
9845 return fill_csum_tree_from_fs(trans, csum_root);
9847 return fill_csum_tree_from_extent(trans, csum_root);
9850 static void free_roots_info_cache(void)
9852 if (!roots_info_cache)
9855 while (!cache_tree_empty(roots_info_cache)) {
9856 struct cache_extent *entry;
9857 struct root_item_info *rii;
9859 entry = first_cache_extent(roots_info_cache);
9862 remove_cache_extent(roots_info_cache, entry);
9863 rii = container_of(entry, struct root_item_info, cache_extent);
9867 free(roots_info_cache);
9868 roots_info_cache = NULL;
9871 static int build_roots_info_cache(struct btrfs_fs_info *info)
9874 struct btrfs_key key;
9875 struct extent_buffer *leaf;
9876 struct btrfs_path *path;
9878 if (!roots_info_cache) {
9879 roots_info_cache = malloc(sizeof(*roots_info_cache));
9880 if (!roots_info_cache)
9882 cache_tree_init(roots_info_cache);
9885 path = btrfs_alloc_path();
9890 key.type = BTRFS_EXTENT_ITEM_KEY;
9893 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9896 leaf = path->nodes[0];
9899 struct btrfs_key found_key;
9900 struct btrfs_extent_item *ei;
9901 struct btrfs_extent_inline_ref *iref;
9902 int slot = path->slots[0];
9907 struct cache_extent *entry;
9908 struct root_item_info *rii;
9910 if (slot >= btrfs_header_nritems(leaf)) {
9911 ret = btrfs_next_leaf(info->extent_root, path);
9918 leaf = path->nodes[0];
9919 slot = path->slots[0];
9922 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9924 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9925 found_key.type != BTRFS_METADATA_ITEM_KEY)
9928 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9929 flags = btrfs_extent_flags(leaf, ei);
9931 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9932 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9935 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9936 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9937 level = found_key.offset;
9939 struct btrfs_tree_block_info *binfo;
9941 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9942 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9943 level = btrfs_tree_block_level(leaf, binfo);
9947 * For a root extent, it must be of the following type and the
9948 * first (and only one) iref in the item.
9950 type = btrfs_extent_inline_ref_type(leaf, iref);
9951 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9954 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9955 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9957 rii = malloc(sizeof(struct root_item_info));
9962 rii->cache_extent.start = root_id;
9963 rii->cache_extent.size = 1;
9964 rii->level = (u8)-1;
9965 entry = &rii->cache_extent;
9966 ret = insert_cache_extent(roots_info_cache, entry);
9969 rii = container_of(entry, struct root_item_info,
9973 ASSERT(rii->cache_extent.start == root_id);
9974 ASSERT(rii->cache_extent.size == 1);
9976 if (level > rii->level || rii->level == (u8)-1) {
9978 rii->bytenr = found_key.objectid;
9979 rii->gen = btrfs_extent_generation(leaf, ei);
9980 rii->node_count = 1;
9981 } else if (level == rii->level) {
9989 btrfs_free_path(path);
9994 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9995 struct btrfs_path *path,
9996 const struct btrfs_key *root_key,
9997 const int read_only_mode)
9999 const u64 root_id = root_key->objectid;
10000 struct cache_extent *entry;
10001 struct root_item_info *rii;
10002 struct btrfs_root_item ri;
10003 unsigned long offset;
10005 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10008 "Error: could not find extent items for root %llu\n",
10009 root_key->objectid);
10013 rii = container_of(entry, struct root_item_info, cache_extent);
10014 ASSERT(rii->cache_extent.start == root_id);
10015 ASSERT(rii->cache_extent.size == 1);
10017 if (rii->node_count != 1) {
10019 "Error: could not find btree root extent for root %llu\n",
10024 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10025 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10027 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10028 btrfs_root_level(&ri) != rii->level ||
10029 btrfs_root_generation(&ri) != rii->gen) {
10032 * If we're in repair mode but our caller told us to not update
10033 * the root item, i.e. just check if it needs to be updated, don't
10034 * print this message, since the caller will call us again shortly
10035 * for the same root item without read only mode (the caller will
10036 * open a transaction first).
10038 if (!(read_only_mode && repair))
10040 "%sroot item for root %llu,"
10041 " current bytenr %llu, current gen %llu, current level %u,"
10042 " new bytenr %llu, new gen %llu, new level %u\n",
10043 (read_only_mode ? "" : "fixing "),
10045 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10046 btrfs_root_level(&ri),
10047 rii->bytenr, rii->gen, rii->level);
10049 if (btrfs_root_generation(&ri) > rii->gen) {
10051 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10052 root_id, btrfs_root_generation(&ri), rii->gen);
10056 if (!read_only_mode) {
10057 btrfs_set_root_bytenr(&ri, rii->bytenr);
10058 btrfs_set_root_level(&ri, rii->level);
10059 btrfs_set_root_generation(&ri, rii->gen);
10060 write_extent_buffer(path->nodes[0], &ri,
10061 offset, sizeof(ri));
10071 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10072 * caused read-only snapshots to be corrupted if they were created at a moment
10073 * when the source subvolume/snapshot had orphan items. The issue was that the
10074 * on-disk root items became incorrect, referring to the pre orphan cleanup root
10075 * node instead of the post orphan cleanup root node.
10076 * So this function, and its callees, just detects and fixes those cases. Even
10077 * though the regression was for read-only snapshots, this function applies to
10078 * any snapshot/subvolume root.
10079 * This must be run before any other repair code - not doing it so, makes other
10080 * repair code delete or modify backrefs in the extent tree for example, which
10081 * will result in an inconsistent fs after repairing the root items.
10083 static int repair_root_items(struct btrfs_fs_info *info)
10085 struct btrfs_path *path = NULL;
10086 struct btrfs_key key;
10087 struct extent_buffer *leaf;
10088 struct btrfs_trans_handle *trans = NULL;
10091 int need_trans = 0;
10093 ret = build_roots_info_cache(info);
10097 path = btrfs_alloc_path();
10103 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10104 key.type = BTRFS_ROOT_ITEM_KEY;
10109 * Avoid opening and committing transactions if a leaf doesn't have
10110 * any root items that need to be fixed, so that we avoid rotating
10111 * backup roots unnecessarily.
10114 trans = btrfs_start_transaction(info->tree_root, 1);
10115 if (IS_ERR(trans)) {
10116 ret = PTR_ERR(trans);
10121 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10125 leaf = path->nodes[0];
10128 struct btrfs_key found_key;
10130 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10131 int no_more_keys = find_next_key(path, &key);
10133 btrfs_release_path(path);
10135 ret = btrfs_commit_transaction(trans,
10147 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10149 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10151 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10154 ret = maybe_repair_root_item(info, path, &found_key,
10159 if (!trans && repair) {
10162 btrfs_release_path(path);
10172 free_roots_info_cache();
10173 btrfs_free_path(path);
10175 btrfs_commit_transaction(trans, info->tree_root);
10182 const char * const cmd_check_usage[] = {
10183 "btrfs check [options] <device>",
10184 "Check structural integrity of a filesystem (unmounted).",
10185 "Check structural integrity of an unmounted filesystem. Verify internal",
10186 "trees' consistency and item connectivity. In the repair mode try to",
10187 "fix the problems found.",
10188 "WARNING: the repair mode is considered dangerous",
10190 "-s|--super <superblock> use this superblock copy",
10191 "-b|--backup use the first valid backup root copy",
10192 "--repair try to repair the filesystem",
10193 "--readonly run in read-only mode (default)",
10194 "--init-csum-tree create a new CRC tree",
10195 "--init-extent-tree create a new extent tree",
10196 "--check-data-csum verify checksums of data blocks",
10197 "-Q|--qgroup-report print a report on qgroup consistency",
10198 "-E|--subvol-extents <subvolid>",
10199 " print subvolume extents and sharing state",
10200 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
10201 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
10202 "-p|--progress indicate progress",
10206 int cmd_check(int argc, char **argv)
10208 struct cache_tree root_cache;
10209 struct btrfs_root *root;
10210 struct btrfs_fs_info *info;
10213 u64 tree_root_bytenr = 0;
10214 u64 chunk_root_bytenr = 0;
10215 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10218 int init_csum_tree = 0;
10220 int qgroup_report = 0;
10221 int qgroups_repaired = 0;
10222 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10226 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10227 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10228 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10229 static const struct option long_options[] = {
10230 { "super", required_argument, NULL, 's' },
10231 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10232 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10233 { "init-csum-tree", no_argument, NULL,
10234 GETOPT_VAL_INIT_CSUM },
10235 { "init-extent-tree", no_argument, NULL,
10236 GETOPT_VAL_INIT_EXTENT },
10237 { "check-data-csum", no_argument, NULL,
10238 GETOPT_VAL_CHECK_CSUM },
10239 { "backup", no_argument, NULL, 'b' },
10240 { "subvol-extents", required_argument, NULL, 'E' },
10241 { "qgroup-report", no_argument, NULL, 'Q' },
10242 { "tree-root", required_argument, NULL, 'r' },
10243 { "chunk-root", required_argument, NULL,
10244 GETOPT_VAL_CHUNK_TREE },
10245 { "progress", no_argument, NULL, 'p' },
10246 { NULL, 0, NULL, 0}
10249 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10253 case 'a': /* ignored */ break;
10255 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10258 num = arg_strtou64(optarg);
10259 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10261 "ERROR: super mirror should be less than: %d\n",
10262 BTRFS_SUPER_MIRROR_MAX);
10265 bytenr = btrfs_sb_offset(((int)num));
10266 printf("using SB copy %llu, bytenr %llu\n", num,
10267 (unsigned long long)bytenr);
10273 subvolid = arg_strtou64(optarg);
10276 tree_root_bytenr = arg_strtou64(optarg);
10278 case GETOPT_VAL_CHUNK_TREE:
10279 chunk_root_bytenr = arg_strtou64(optarg);
10282 ctx.progress_enabled = true;
10286 usage(cmd_check_usage);
10287 case GETOPT_VAL_REPAIR:
10288 printf("enabling repair mode\n");
10290 ctree_flags |= OPEN_CTREE_WRITES;
10292 case GETOPT_VAL_READONLY:
10295 case GETOPT_VAL_INIT_CSUM:
10296 printf("Creating a new CRC tree\n");
10297 init_csum_tree = 1;
10299 ctree_flags |= OPEN_CTREE_WRITES;
10301 case GETOPT_VAL_INIT_EXTENT:
10302 init_extent_tree = 1;
10303 ctree_flags |= (OPEN_CTREE_WRITES |
10304 OPEN_CTREE_NO_BLOCK_GROUPS);
10307 case GETOPT_VAL_CHECK_CSUM:
10308 check_data_csum = 1;
10313 if (check_argc_exact(argc - optind, 1))
10314 usage(cmd_check_usage);
10316 if (ctx.progress_enabled) {
10317 ctx.tp = TASK_NOTHING;
10318 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10321 /* This check is the only reason for --readonly to exist */
10322 if (readonly && repair) {
10323 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10328 cache_tree_init(&root_cache);
10330 if((ret = check_mounted(argv[optind])) < 0) {
10331 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10334 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10339 /* only allow partial opening under repair mode */
10341 ctree_flags |= OPEN_CTREE_PARTIAL;
10343 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10344 chunk_root_bytenr, ctree_flags);
10346 fprintf(stderr, "Couldn't open file system\n");
10351 global_info = info;
10352 root = info->fs_root;
10355 * repair mode will force us to commit transaction which
10356 * will make us fail to load log tree when mounting.
10358 if (repair && btrfs_super_log_root(info->super_copy)) {
10359 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10364 ret = zero_log_tree(root);
10366 fprintf(stderr, "fail to zero log tree\n");
10371 uuid_unparse(info->super_copy->fsid, uuidbuf);
10372 if (qgroup_report) {
10373 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10375 ret = qgroup_verify_all(info);
10381 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10382 subvolid, argv[optind], uuidbuf);
10383 ret = print_extent_state(info, subvolid);
10386 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10388 if (!extent_buffer_uptodate(info->tree_root->node) ||
10389 !extent_buffer_uptodate(info->dev_root->node) ||
10390 !extent_buffer_uptodate(info->chunk_root->node)) {
10391 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10396 if (init_extent_tree || init_csum_tree) {
10397 struct btrfs_trans_handle *trans;
10399 trans = btrfs_start_transaction(info->extent_root, 0);
10400 if (IS_ERR(trans)) {
10401 fprintf(stderr, "Error starting transaction\n");
10402 ret = PTR_ERR(trans);
10406 if (init_extent_tree) {
10407 printf("Creating a new extent tree\n");
10408 ret = reinit_extent_tree(trans, info);
10413 if (init_csum_tree) {
10414 fprintf(stderr, "Reinit crc root\n");
10415 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10417 fprintf(stderr, "crc root initialization failed\n");
10422 ret = fill_csum_tree(trans, info->csum_root,
10425 fprintf(stderr, "crc refilling failed\n");
10430 * Ok now we commit and run the normal fsck, which will add
10431 * extent entries for all of the items it finds.
10433 ret = btrfs_commit_transaction(trans, info->extent_root);
10437 if (!extent_buffer_uptodate(info->extent_root->node)) {
10438 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10442 if (!extent_buffer_uptodate(info->csum_root->node)) {
10443 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10448 if (!ctx.progress_enabled)
10449 fprintf(stderr, "checking extents\n");
10450 ret = check_chunks_and_extents(root);
10452 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10454 ret = repair_root_items(info);
10458 fprintf(stderr, "Fixed %d roots.\n", ret);
10460 } else if (ret > 0) {
10462 "Found %d roots with an outdated root item.\n",
10465 "Please run a filesystem check with the option --repair to fix them.\n");
10470 if (!ctx.progress_enabled) {
10471 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10472 fprintf(stderr, "checking free space tree\n");
10474 fprintf(stderr, "checking free space cache\n");
10476 ret = check_space_cache(root);
10481 * We used to have to have these hole extents in between our real
10482 * extents so if we don't have this flag set we need to make sure there
10483 * are no gaps in the file extents for inodes, otherwise we can just
10484 * ignore it when this happens.
10486 no_holes = btrfs_fs_incompat(root->fs_info,
10487 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10488 if (!ctx.progress_enabled)
10489 fprintf(stderr, "checking fs roots\n");
10490 ret = check_fs_roots(root, &root_cache);
10494 fprintf(stderr, "checking csums\n");
10495 ret = check_csums(root);
10499 fprintf(stderr, "checking root refs\n");
10500 ret = check_root_refs(root, &root_cache);
10504 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10505 struct extent_buffer *eb;
10507 eb = list_first_entry(&root->fs_info->recow_ebs,
10508 struct extent_buffer, recow);
10509 list_del_init(&eb->recow);
10510 ret = recow_extent_buffer(root, eb);
10515 while (!list_empty(&delete_items)) {
10516 struct bad_item *bad;
10518 bad = list_first_entry(&delete_items, struct bad_item, list);
10519 list_del_init(&bad->list);
10521 ret = delete_bad_item(root, bad);
10525 if (info->quota_enabled) {
10527 fprintf(stderr, "checking quota groups\n");
10528 err = qgroup_verify_all(info);
10532 err = repair_qgroups(info, &qgroups_repaired);
10537 if (!list_empty(&root->fs_info->recow_ebs)) {
10538 fprintf(stderr, "Transid errors in file system\n");
10542 /* Don't override original ret */
10543 if (!ret && qgroups_repaired)
10544 ret = qgroups_repaired;
10546 if (found_old_backref) { /*
10547 * there was a disk format change when mixed
10548 * backref was in testing tree. The old format
10549 * existed about one week.
10551 printf("\n * Found old mixed backref format. "
10552 "The old format is not supported! *"
10553 "\n * Please mount the FS in readonly mode, "
10554 "backup data and re-format the FS. *\n\n");
10557 printf("found %llu bytes used err is %d\n",
10558 (unsigned long long)bytes_used, ret);
10559 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10560 printf("total tree bytes: %llu\n",
10561 (unsigned long long)total_btree_bytes);
10562 printf("total fs tree bytes: %llu\n",
10563 (unsigned long long)total_fs_tree_bytes);
10564 printf("total extent tree bytes: %llu\n",
10565 (unsigned long long)total_extent_tree_bytes);
10566 printf("btree space waste bytes: %llu\n",
10567 (unsigned long long)btree_space_waste);
10568 printf("file data blocks allocated: %llu\n referenced %llu\n",
10569 (unsigned long long)data_bytes_allocated,
10570 (unsigned long long)data_bytes_referenced);
10572 free_qgroup_counts();
10573 free_root_recs_tree(&root_cache);
10577 if (ctx.progress_enabled)
10578 task_deinit(ctx.info);