btrfs-progs: check: reduce size of extent_record
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 struct data_backref {
88         struct extent_backref node;
89         union {
90                 u64 parent;
91                 u64 root;
92         };
93         u64 owner;
94         u64 offset;
95         u64 disk_bytenr;
96         u64 bytes;
97         u64 ram_bytes;
98         u32 num_refs;
99         u32 found_ref;
100 };
101
102 /*
103  * Much like data_backref, just removed the undetermined members
104  * and change it to use list_head.
105  * During extent scan, it is stored in root->orphan_data_extent.
106  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
107  */
108 struct orphan_data_extent {
109         struct list_head list;
110         u64 root;
111         u64 objectid;
112         u64 offset;
113         u64 disk_bytenr;
114         u64 disk_len;
115 };
116
117 struct tree_backref {
118         struct extent_backref node;
119         union {
120                 u64 parent;
121                 u64 root;
122         };
123 };
124
125 /* Explicit initialization for extent_record::flag_block_full_backref */
126 enum { FLAG_UNSET = 2 };
127
128 struct extent_record {
129         struct list_head backrefs;
130         struct list_head dups;
131         struct list_head list;
132         struct cache_extent cache;
133         struct btrfs_disk_key parent_key;
134         u64 start;
135         u64 max_size;
136         u64 nr;
137         u64 refs;
138         u64 extent_item_refs;
139         u64 generation;
140         u64 parent_generation;
141         u64 info_objectid;
142         u32 num_duplicates;
143         u8 info_level;
144         unsigned int flag_block_full_backref:2;
145         unsigned int found_rec:1;
146         unsigned int content_checked:1;
147         unsigned int owner_ref_checked:1;
148         unsigned int is_root:1;
149         unsigned int metadata:1;
150         unsigned int bad_full_backref:1;
151         unsigned int crossing_stripes:1;
152         unsigned int wrong_chunk_type:1;
153 };
154
155 struct inode_backref {
156         struct list_head list;
157         unsigned int found_dir_item:1;
158         unsigned int found_dir_index:1;
159         unsigned int found_inode_ref:1;
160         unsigned int filetype:8;
161         int errors;
162         unsigned int ref_type;
163         u64 dir;
164         u64 index;
165         u16 namelen;
166         char name[0];
167 };
168
169 struct root_item_record {
170         struct list_head list;
171         u64 objectid;
172         u64 bytenr;
173         u64 last_snapshot;
174         u8 level;
175         u8 drop_level;
176         int level_size;
177         struct btrfs_key drop_key;
178 };
179
180 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
181 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
182 #define REF_ERR_NO_INODE_REF            (1 << 2)
183 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
184 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
185 #define REF_ERR_DUP_INODE_REF           (1 << 5)
186 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
187 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
188 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
189 #define REF_ERR_NO_ROOT_REF             (1 << 9)
190 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
191 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
192 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
193
194 struct file_extent_hole {
195         struct rb_node node;
196         u64 start;
197         u64 len;
198 };
199
200 struct inode_record {
201         struct list_head backrefs;
202         unsigned int checked:1;
203         unsigned int merging:1;
204         unsigned int found_inode_item:1;
205         unsigned int found_dir_item:1;
206         unsigned int found_file_extent:1;
207         unsigned int found_csum_item:1;
208         unsigned int some_csum_missing:1;
209         unsigned int nodatasum:1;
210         int errors;
211
212         u64 ino;
213         u32 nlink;
214         u32 imode;
215         u64 isize;
216         u64 nbytes;
217
218         u32 found_link;
219         u64 found_size;
220         u64 extent_start;
221         u64 extent_end;
222         struct rb_root holes;
223         struct list_head orphan_extents;
224
225         u32 refs;
226 };
227
228 #define I_ERR_NO_INODE_ITEM             (1 << 0)
229 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
230 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
231 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
232 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
233 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
234 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
235 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
236 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
237 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
238 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
239 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
240 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
241 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
242 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
243
244 struct root_backref {
245         struct list_head list;
246         unsigned int found_dir_item:1;
247         unsigned int found_dir_index:1;
248         unsigned int found_back_ref:1;
249         unsigned int found_forward_ref:1;
250         unsigned int reachable:1;
251         int errors;
252         u64 ref_root;
253         u64 dir;
254         u64 index;
255         u16 namelen;
256         char name[0];
257 };
258
259 struct root_record {
260         struct list_head backrefs;
261         struct cache_extent cache;
262         unsigned int found_root_item:1;
263         u64 objectid;
264         u32 found_ref;
265 };
266
267 struct ptr_node {
268         struct cache_extent cache;
269         void *data;
270 };
271
272 struct shared_node {
273         struct cache_extent cache;
274         struct cache_tree root_cache;
275         struct cache_tree inode_cache;
276         struct inode_record *current;
277         u32 refs;
278 };
279
280 struct block_info {
281         u64 start;
282         u32 size;
283 };
284
285 struct walk_control {
286         struct cache_tree shared;
287         struct shared_node *nodes[BTRFS_MAX_LEVEL];
288         int active_node;
289         int root_level;
290 };
291
292 struct bad_item {
293         struct btrfs_key key;
294         u64 root_id;
295         struct list_head list;
296 };
297
298 struct extent_entry {
299         u64 bytenr;
300         u64 bytes;
301         int count;
302         int broken;
303         struct list_head list;
304 };
305
306 struct root_item_info {
307         /* level of the root */
308         u8 level;
309         /* number of nodes at this level, must be 1 for a root */
310         int node_count;
311         u64 bytenr;
312         u64 gen;
313         struct cache_extent cache_extent;
314 };
315
316 static void *print_status_check(void *p)
317 {
318         struct task_ctx *priv = p;
319         const char work_indicator[] = { '.', 'o', 'O', 'o' };
320         uint32_t count = 0;
321         static char *task_position_string[] = {
322                 "checking extents",
323                 "checking free space cache",
324                 "checking fs roots",
325         };
326
327         task_period_start(priv->info, 1000 /* 1s */);
328
329         if (priv->tp == TASK_NOTHING)
330                 return NULL;
331
332         while (1) {
333                 printf("%s [%c]\r", task_position_string[priv->tp],
334                                 work_indicator[count % 4]);
335                 count++;
336                 fflush(stdout);
337                 task_period_wait(priv->info);
338         }
339         return NULL;
340 }
341
342 static int print_status_return(void *p)
343 {
344         printf("\n");
345         fflush(stdout);
346
347         return 0;
348 }
349
350 /* Compatible function to allow reuse of old codes */
351 static u64 first_extent_gap(struct rb_root *holes)
352 {
353         struct file_extent_hole *hole;
354
355         if (RB_EMPTY_ROOT(holes))
356                 return (u64)-1;
357
358         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
359         return hole->start;
360 }
361
362 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
363 {
364         struct file_extent_hole *hole1;
365         struct file_extent_hole *hole2;
366
367         hole1 = rb_entry(node1, struct file_extent_hole, node);
368         hole2 = rb_entry(node2, struct file_extent_hole, node);
369
370         if (hole1->start > hole2->start)
371                 return -1;
372         if (hole1->start < hole2->start)
373                 return 1;
374         /* Now hole1->start == hole2->start */
375         if (hole1->len >= hole2->len)
376                 /*
377                  * Hole 1 will be merge center
378                  * Same hole will be merged later
379                  */
380                 return -1;
381         /* Hole 2 will be merge center */
382         return 1;
383 }
384
385 /*
386  * Add a hole to the record
387  *
388  * This will do hole merge for copy_file_extent_holes(),
389  * which will ensure there won't be continuous holes.
390  */
391 static int add_file_extent_hole(struct rb_root *holes,
392                                 u64 start, u64 len)
393 {
394         struct file_extent_hole *hole;
395         struct file_extent_hole *prev = NULL;
396         struct file_extent_hole *next = NULL;
397
398         hole = malloc(sizeof(*hole));
399         if (!hole)
400                 return -ENOMEM;
401         hole->start = start;
402         hole->len = len;
403         /* Since compare will not return 0, no -EEXIST will happen */
404         rb_insert(holes, &hole->node, compare_hole);
405
406         /* simple merge with previous hole */
407         if (rb_prev(&hole->node))
408                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
409                                 node);
410         if (prev && prev->start + prev->len >= hole->start) {
411                 hole->len = hole->start + hole->len - prev->start;
412                 hole->start = prev->start;
413                 rb_erase(&prev->node, holes);
414                 free(prev);
415                 prev = NULL;
416         }
417
418         /* iterate merge with next holes */
419         while (1) {
420                 if (!rb_next(&hole->node))
421                         break;
422                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
423                                         node);
424                 if (hole->start + hole->len >= next->start) {
425                         if (hole->start + hole->len <= next->start + next->len)
426                                 hole->len = next->start + next->len -
427                                             hole->start;
428                         rb_erase(&next->node, holes);
429                         free(next);
430                         next = NULL;
431                 } else
432                         break;
433         }
434         return 0;
435 }
436
437 static int compare_hole_range(struct rb_node *node, void *data)
438 {
439         struct file_extent_hole *hole;
440         u64 start;
441
442         hole = (struct file_extent_hole *)data;
443         start = hole->start;
444
445         hole = rb_entry(node, struct file_extent_hole, node);
446         if (start < hole->start)
447                 return -1;
448         if (start >= hole->start && start < hole->start + hole->len)
449                 return 0;
450         return 1;
451 }
452
453 /*
454  * Delete a hole in the record
455  *
456  * This will do the hole split and is much restrict than add.
457  */
458 static int del_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole tmp;
463         u64 prev_start = 0;
464         u64 prev_len = 0;
465         u64 next_start = 0;
466         u64 next_len = 0;
467         struct rb_node *node;
468         int have_prev = 0;
469         int have_next = 0;
470         int ret = 0;
471
472         tmp.start = start;
473         tmp.len = len;
474         node = rb_search(holes, &tmp, compare_hole_range, NULL);
475         if (!node)
476                 return -EEXIST;
477         hole = rb_entry(node, struct file_extent_hole, node);
478         if (start + len > hole->start + hole->len)
479                 return -EEXIST;
480
481         /*
482          * Now there will be no overflap, delete the hole and re-add the
483          * split(s) if they exists.
484          */
485         if (start > hole->start) {
486                 prev_start = hole->start;
487                 prev_len = start - hole->start;
488                 have_prev = 1;
489         }
490         if (hole->start + hole->len > start + len) {
491                 next_start = start + len;
492                 next_len = hole->start + hole->len - start - len;
493                 have_next = 1;
494         }
495         rb_erase(node, holes);
496         free(hole);
497         if (have_prev) {
498                 ret = add_file_extent_hole(holes, prev_start, prev_len);
499                 if (ret < 0)
500                         return ret;
501         }
502         if (have_next) {
503                 ret = add_file_extent_hole(holes, next_start, next_len);
504                 if (ret < 0)
505                         return ret;
506         }
507         return 0;
508 }
509
510 static int copy_file_extent_holes(struct rb_root *dst,
511                                   struct rb_root *src)
512 {
513         struct file_extent_hole *hole;
514         struct rb_node *node;
515         int ret = 0;
516
517         node = rb_first(src);
518         while (node) {
519                 hole = rb_entry(node, struct file_extent_hole, node);
520                 ret = add_file_extent_hole(dst, hole->start, hole->len);
521                 if (ret)
522                         break;
523                 node = rb_next(node);
524         }
525         return ret;
526 }
527
528 static void free_file_extent_holes(struct rb_root *holes)
529 {
530         struct rb_node *node;
531         struct file_extent_hole *hole;
532
533         node = rb_first(holes);
534         while (node) {
535                 hole = rb_entry(node, struct file_extent_hole, node);
536                 rb_erase(node, holes);
537                 free(hole);
538                 node = rb_first(holes);
539         }
540 }
541
542 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
543
544 static void record_root_in_trans(struct btrfs_trans_handle *trans,
545                                  struct btrfs_root *root)
546 {
547         if (root->last_trans != trans->transid) {
548                 root->track_dirty = 1;
549                 root->last_trans = trans->transid;
550                 root->commit_root = root->node;
551                 extent_buffer_get(root->node);
552         }
553 }
554
555 static u8 imode_to_type(u32 imode)
556 {
557 #define S_SHIFT 12
558         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
559                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
560                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
561                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
562                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
563                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
564                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
565                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
566         };
567
568         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
569 #undef S_SHIFT
570 }
571
572 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
573 {
574         struct device_record *rec1;
575         struct device_record *rec2;
576
577         rec1 = rb_entry(node1, struct device_record, node);
578         rec2 = rb_entry(node2, struct device_record, node);
579         if (rec1->devid > rec2->devid)
580                 return -1;
581         else if (rec1->devid < rec2->devid)
582                 return 1;
583         else
584                 return 0;
585 }
586
587 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
588 {
589         struct inode_record *rec;
590         struct inode_backref *backref;
591         struct inode_backref *orig;
592         struct inode_backref *tmp;
593         struct orphan_data_extent *src_orphan;
594         struct orphan_data_extent *dst_orphan;
595         size_t size;
596         int ret;
597
598         rec = malloc(sizeof(*rec));
599         if (!rec)
600                 return ERR_PTR(-ENOMEM);
601         memcpy(rec, orig_rec, sizeof(*rec));
602         rec->refs = 1;
603         INIT_LIST_HEAD(&rec->backrefs);
604         INIT_LIST_HEAD(&rec->orphan_extents);
605         rec->holes = RB_ROOT;
606
607         list_for_each_entry(orig, &orig_rec->backrefs, list) {
608                 size = sizeof(*orig) + orig->namelen + 1;
609                 backref = malloc(size);
610                 if (!backref) {
611                         ret = -ENOMEM;
612                         goto cleanup;
613                 }
614                 memcpy(backref, orig, size);
615                 list_add_tail(&backref->list, &rec->backrefs);
616         }
617         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
618                 dst_orphan = malloc(sizeof(*dst_orphan));
619                 if (!dst_orphan) {
620                         ret = -ENOMEM;
621                         goto cleanup;
622                 }
623                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
624                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
625         }
626         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
627         BUG_ON(ret < 0);
628
629         return rec;
630
631 cleanup:
632         if (!list_empty(&rec->backrefs))
633                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
634                         list_del(&orig->list);
635                         free(orig);
636                 }
637
638         if (!list_empty(&rec->orphan_extents))
639                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
640                         list_del(&orig->list);
641                         free(orig);
642                 }
643
644         free(rec);
645
646         return ERR_PTR(ret);
647 }
648
649 static void print_orphan_data_extents(struct list_head *orphan_extents,
650                                       u64 objectid)
651 {
652         struct orphan_data_extent *orphan;
653
654         if (list_empty(orphan_extents))
655                 return;
656         printf("The following data extent is lost in tree %llu:\n",
657                objectid);
658         list_for_each_entry(orphan, orphan_extents, list) {
659                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
660                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
661                        orphan->disk_len);
662         }
663 }
664
665 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
666 {
667         u64 root_objectid = root->root_key.objectid;
668         int errors = rec->errors;
669
670         if (!errors)
671                 return;
672         /* reloc root errors, we print its corresponding fs root objectid*/
673         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
674                 root_objectid = root->root_key.offset;
675                 fprintf(stderr, "reloc");
676         }
677         fprintf(stderr, "root %llu inode %llu errors %x",
678                 (unsigned long long) root_objectid,
679                 (unsigned long long) rec->ino, rec->errors);
680
681         if (errors & I_ERR_NO_INODE_ITEM)
682                 fprintf(stderr, ", no inode item");
683         if (errors & I_ERR_NO_ORPHAN_ITEM)
684                 fprintf(stderr, ", no orphan item");
685         if (errors & I_ERR_DUP_INODE_ITEM)
686                 fprintf(stderr, ", dup inode item");
687         if (errors & I_ERR_DUP_DIR_INDEX)
688                 fprintf(stderr, ", dup dir index");
689         if (errors & I_ERR_ODD_DIR_ITEM)
690                 fprintf(stderr, ", odd dir item");
691         if (errors & I_ERR_ODD_FILE_EXTENT)
692                 fprintf(stderr, ", odd file extent");
693         if (errors & I_ERR_BAD_FILE_EXTENT)
694                 fprintf(stderr, ", bad file extent");
695         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
696                 fprintf(stderr, ", file extent overlap");
697         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
698                 fprintf(stderr, ", file extent discount");
699         if (errors & I_ERR_DIR_ISIZE_WRONG)
700                 fprintf(stderr, ", dir isize wrong");
701         if (errors & I_ERR_FILE_NBYTES_WRONG)
702                 fprintf(stderr, ", nbytes wrong");
703         if (errors & I_ERR_ODD_CSUM_ITEM)
704                 fprintf(stderr, ", odd csum item");
705         if (errors & I_ERR_SOME_CSUM_MISSING)
706                 fprintf(stderr, ", some csum missing");
707         if (errors & I_ERR_LINK_COUNT_WRONG)
708                 fprintf(stderr, ", link count wrong");
709         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
710                 fprintf(stderr, ", orphan file extent");
711         fprintf(stderr, "\n");
712         /* Print the orphan extents if needed */
713         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
714                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
715
716         /* Print the holes if needed */
717         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
718                 struct file_extent_hole *hole;
719                 struct rb_node *node;
720                 int found = 0;
721
722                 node = rb_first(&rec->holes);
723                 fprintf(stderr, "Found file extent holes:\n");
724                 while (node) {
725                         found = 1;
726                         hole = rb_entry(node, struct file_extent_hole, node);
727                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
728                                 hole->start, hole->len);
729                         node = rb_next(node);
730                 }
731                 if (!found)
732                         fprintf(stderr, "\tstart: 0, len: %llu\n",
733                                 round_up(rec->isize, root->sectorsize));
734         }
735 }
736
737 static void print_ref_error(int errors)
738 {
739         if (errors & REF_ERR_NO_DIR_ITEM)
740                 fprintf(stderr, ", no dir item");
741         if (errors & REF_ERR_NO_DIR_INDEX)
742                 fprintf(stderr, ", no dir index");
743         if (errors & REF_ERR_NO_INODE_REF)
744                 fprintf(stderr, ", no inode ref");
745         if (errors & REF_ERR_DUP_DIR_ITEM)
746                 fprintf(stderr, ", dup dir item");
747         if (errors & REF_ERR_DUP_DIR_INDEX)
748                 fprintf(stderr, ", dup dir index");
749         if (errors & REF_ERR_DUP_INODE_REF)
750                 fprintf(stderr, ", dup inode ref");
751         if (errors & REF_ERR_INDEX_UNMATCH)
752                 fprintf(stderr, ", index unmatch");
753         if (errors & REF_ERR_FILETYPE_UNMATCH)
754                 fprintf(stderr, ", filetype unmatch");
755         if (errors & REF_ERR_NAME_TOO_LONG)
756                 fprintf(stderr, ", name too long");
757         if (errors & REF_ERR_NO_ROOT_REF)
758                 fprintf(stderr, ", no root ref");
759         if (errors & REF_ERR_NO_ROOT_BACKREF)
760                 fprintf(stderr, ", no root backref");
761         if (errors & REF_ERR_DUP_ROOT_REF)
762                 fprintf(stderr, ", dup root ref");
763         if (errors & REF_ERR_DUP_ROOT_BACKREF)
764                 fprintf(stderr, ", dup root backref");
765         fprintf(stderr, "\n");
766 }
767
768 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
769                                           u64 ino, int mod)
770 {
771         struct ptr_node *node;
772         struct cache_extent *cache;
773         struct inode_record *rec = NULL;
774         int ret;
775
776         cache = lookup_cache_extent(inode_cache, ino, 1);
777         if (cache) {
778                 node = container_of(cache, struct ptr_node, cache);
779                 rec = node->data;
780                 if (mod && rec->refs > 1) {
781                         node->data = clone_inode_rec(rec);
782                         if (IS_ERR(node->data))
783                                 return node->data;
784                         rec->refs--;
785                         rec = node->data;
786                 }
787         } else if (mod) {
788                 rec = calloc(1, sizeof(*rec));
789                 if (!rec)
790                         return ERR_PTR(-ENOMEM);
791                 rec->ino = ino;
792                 rec->extent_start = (u64)-1;
793                 rec->refs = 1;
794                 INIT_LIST_HEAD(&rec->backrefs);
795                 INIT_LIST_HEAD(&rec->orphan_extents);
796                 rec->holes = RB_ROOT;
797
798                 node = malloc(sizeof(*node));
799                 if (!node) {
800                         free(rec);
801                         return ERR_PTR(-ENOMEM);
802                 }
803                 node->cache.start = ino;
804                 node->cache.size = 1;
805                 node->data = rec;
806
807                 if (ino == BTRFS_FREE_INO_OBJECTID)
808                         rec->found_link = 1;
809
810                 ret = insert_cache_extent(inode_cache, &node->cache);
811                 if (ret)
812                         return ERR_PTR(-EEXIST);
813         }
814         return rec;
815 }
816
817 static void free_orphan_data_extents(struct list_head *orphan_extents)
818 {
819         struct orphan_data_extent *orphan;
820
821         while (!list_empty(orphan_extents)) {
822                 orphan = list_entry(orphan_extents->next,
823                                     struct orphan_data_extent, list);
824                 list_del(&orphan->list);
825                 free(orphan);
826         }
827 }
828
829 static void free_inode_rec(struct inode_record *rec)
830 {
831         struct inode_backref *backref;
832
833         if (--rec->refs > 0)
834                 return;
835
836         while (!list_empty(&rec->backrefs)) {
837                 backref = list_entry(rec->backrefs.next,
838                                      struct inode_backref, list);
839                 list_del(&backref->list);
840                 free(backref);
841         }
842         free_orphan_data_extents(&rec->orphan_extents);
843         free_file_extent_holes(&rec->holes);
844         free(rec);
845 }
846
847 static int can_free_inode_rec(struct inode_record *rec)
848 {
849         if (!rec->errors && rec->checked && rec->found_inode_item &&
850             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
851                 return 1;
852         return 0;
853 }
854
855 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
856                                  struct inode_record *rec)
857 {
858         struct cache_extent *cache;
859         struct inode_backref *tmp, *backref;
860         struct ptr_node *node;
861         unsigned char filetype;
862
863         if (!rec->found_inode_item)
864                 return;
865
866         filetype = imode_to_type(rec->imode);
867         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
868                 if (backref->found_dir_item && backref->found_dir_index) {
869                         if (backref->filetype != filetype)
870                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
871                         if (!backref->errors && backref->found_inode_ref &&
872                             rec->nlink == rec->found_link) {
873                                 list_del(&backref->list);
874                                 free(backref);
875                         }
876                 }
877         }
878
879         if (!rec->checked || rec->merging)
880                 return;
881
882         if (S_ISDIR(rec->imode)) {
883                 if (rec->found_size != rec->isize)
884                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
885                 if (rec->found_file_extent)
886                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
887         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
888                 if (rec->found_dir_item)
889                         rec->errors |= I_ERR_ODD_DIR_ITEM;
890                 if (rec->found_size != rec->nbytes)
891                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
892                 if (rec->nlink > 0 && !no_holes &&
893                     (rec->extent_end < rec->isize ||
894                      first_extent_gap(&rec->holes) < rec->isize))
895                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
896         }
897
898         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
899                 if (rec->found_csum_item && rec->nodatasum)
900                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
901                 if (rec->some_csum_missing && !rec->nodatasum)
902                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
903         }
904
905         BUG_ON(rec->refs != 1);
906         if (can_free_inode_rec(rec)) {
907                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
908                 node = container_of(cache, struct ptr_node, cache);
909                 BUG_ON(node->data != rec);
910                 remove_cache_extent(inode_cache, &node->cache);
911                 free(node);
912                 free_inode_rec(rec);
913         }
914 }
915
916 static int check_orphan_item(struct btrfs_root *root, u64 ino)
917 {
918         struct btrfs_path path;
919         struct btrfs_key key;
920         int ret;
921
922         key.objectid = BTRFS_ORPHAN_OBJECTID;
923         key.type = BTRFS_ORPHAN_ITEM_KEY;
924         key.offset = ino;
925
926         btrfs_init_path(&path);
927         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
928         btrfs_release_path(&path);
929         if (ret > 0)
930                 ret = -ENOENT;
931         return ret;
932 }
933
934 static int process_inode_item(struct extent_buffer *eb,
935                               int slot, struct btrfs_key *key,
936                               struct shared_node *active_node)
937 {
938         struct inode_record *rec;
939         struct btrfs_inode_item *item;
940
941         rec = active_node->current;
942         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
943         if (rec->found_inode_item) {
944                 rec->errors |= I_ERR_DUP_INODE_ITEM;
945                 return 1;
946         }
947         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
948         rec->nlink = btrfs_inode_nlink(eb, item);
949         rec->isize = btrfs_inode_size(eb, item);
950         rec->nbytes = btrfs_inode_nbytes(eb, item);
951         rec->imode = btrfs_inode_mode(eb, item);
952         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
953                 rec->nodatasum = 1;
954         rec->found_inode_item = 1;
955         if (rec->nlink == 0)
956                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
957         maybe_free_inode_rec(&active_node->inode_cache, rec);
958         return 0;
959 }
960
961 static struct inode_backref *get_inode_backref(struct inode_record *rec,
962                                                 const char *name,
963                                                 int namelen, u64 dir)
964 {
965         struct inode_backref *backref;
966
967         list_for_each_entry(backref, &rec->backrefs, list) {
968                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
969                         break;
970                 if (backref->dir != dir || backref->namelen != namelen)
971                         continue;
972                 if (memcmp(name, backref->name, namelen))
973                         continue;
974                 return backref;
975         }
976
977         backref = malloc(sizeof(*backref) + namelen + 1);
978         if (!backref)
979                 return NULL;
980         memset(backref, 0, sizeof(*backref));
981         backref->dir = dir;
982         backref->namelen = namelen;
983         memcpy(backref->name, name, namelen);
984         backref->name[namelen] = '\0';
985         list_add_tail(&backref->list, &rec->backrefs);
986         return backref;
987 }
988
989 static int add_inode_backref(struct cache_tree *inode_cache,
990                              u64 ino, u64 dir, u64 index,
991                              const char *name, int namelen,
992                              int filetype, int itemtype, int errors)
993 {
994         struct inode_record *rec;
995         struct inode_backref *backref;
996
997         rec = get_inode_rec(inode_cache, ino, 1);
998         BUG_ON(IS_ERR(rec));
999         backref = get_inode_backref(rec, name, namelen, dir);
1000         BUG_ON(!backref);
1001         if (errors)
1002                 backref->errors |= errors;
1003         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1004                 if (backref->found_dir_index)
1005                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1006                 if (backref->found_inode_ref && backref->index != index)
1007                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1008                 if (backref->found_dir_item && backref->filetype != filetype)
1009                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1010
1011                 backref->index = index;
1012                 backref->filetype = filetype;
1013                 backref->found_dir_index = 1;
1014         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1015                 rec->found_link++;
1016                 if (backref->found_dir_item)
1017                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1018                 if (backref->found_dir_index && backref->filetype != filetype)
1019                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1020
1021                 backref->filetype = filetype;
1022                 backref->found_dir_item = 1;
1023         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1024                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1025                 if (backref->found_inode_ref)
1026                         backref->errors |= REF_ERR_DUP_INODE_REF;
1027                 if (backref->found_dir_index && backref->index != index)
1028                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1029                 else
1030                         backref->index = index;
1031
1032                 backref->ref_type = itemtype;
1033                 backref->found_inode_ref = 1;
1034         } else {
1035                 BUG_ON(1);
1036         }
1037
1038         maybe_free_inode_rec(inode_cache, rec);
1039         return 0;
1040 }
1041
1042 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1043                             struct cache_tree *dst_cache)
1044 {
1045         struct inode_backref *backref;
1046         u32 dir_count = 0;
1047         int ret = 0;
1048
1049         dst->merging = 1;
1050         list_for_each_entry(backref, &src->backrefs, list) {
1051                 if (backref->found_dir_index) {
1052                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1053                                         backref->index, backref->name,
1054                                         backref->namelen, backref->filetype,
1055                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1056                 }
1057                 if (backref->found_dir_item) {
1058                         dir_count++;
1059                         add_inode_backref(dst_cache, dst->ino,
1060                                         backref->dir, 0, backref->name,
1061                                         backref->namelen, backref->filetype,
1062                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1063                 }
1064                 if (backref->found_inode_ref) {
1065                         add_inode_backref(dst_cache, dst->ino,
1066                                         backref->dir, backref->index,
1067                                         backref->name, backref->namelen, 0,
1068                                         backref->ref_type, backref->errors);
1069                 }
1070         }
1071
1072         if (src->found_dir_item)
1073                 dst->found_dir_item = 1;
1074         if (src->found_file_extent)
1075                 dst->found_file_extent = 1;
1076         if (src->found_csum_item)
1077                 dst->found_csum_item = 1;
1078         if (src->some_csum_missing)
1079                 dst->some_csum_missing = 1;
1080         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1081                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1082                 if (ret < 0)
1083                         return ret;
1084         }
1085
1086         BUG_ON(src->found_link < dir_count);
1087         dst->found_link += src->found_link - dir_count;
1088         dst->found_size += src->found_size;
1089         if (src->extent_start != (u64)-1) {
1090                 if (dst->extent_start == (u64)-1) {
1091                         dst->extent_start = src->extent_start;
1092                         dst->extent_end = src->extent_end;
1093                 } else {
1094                         if (dst->extent_end > src->extent_start)
1095                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1096                         else if (dst->extent_end < src->extent_start) {
1097                                 ret = add_file_extent_hole(&dst->holes,
1098                                         dst->extent_end,
1099                                         src->extent_start - dst->extent_end);
1100                         }
1101                         if (dst->extent_end < src->extent_end)
1102                                 dst->extent_end = src->extent_end;
1103                 }
1104         }
1105
1106         dst->errors |= src->errors;
1107         if (src->found_inode_item) {
1108                 if (!dst->found_inode_item) {
1109                         dst->nlink = src->nlink;
1110                         dst->isize = src->isize;
1111                         dst->nbytes = src->nbytes;
1112                         dst->imode = src->imode;
1113                         dst->nodatasum = src->nodatasum;
1114                         dst->found_inode_item = 1;
1115                 } else {
1116                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1117                 }
1118         }
1119         dst->merging = 0;
1120
1121         return 0;
1122 }
1123
1124 static int splice_shared_node(struct shared_node *src_node,
1125                               struct shared_node *dst_node)
1126 {
1127         struct cache_extent *cache;
1128         struct ptr_node *node, *ins;
1129         struct cache_tree *src, *dst;
1130         struct inode_record *rec, *conflict;
1131         u64 current_ino = 0;
1132         int splice = 0;
1133         int ret;
1134
1135         if (--src_node->refs == 0)
1136                 splice = 1;
1137         if (src_node->current)
1138                 current_ino = src_node->current->ino;
1139
1140         src = &src_node->root_cache;
1141         dst = &dst_node->root_cache;
1142 again:
1143         cache = search_cache_extent(src, 0);
1144         while (cache) {
1145                 node = container_of(cache, struct ptr_node, cache);
1146                 rec = node->data;
1147                 cache = next_cache_extent(cache);
1148
1149                 if (splice) {
1150                         remove_cache_extent(src, &node->cache);
1151                         ins = node;
1152                 } else {
1153                         ins = malloc(sizeof(*ins));
1154                         BUG_ON(!ins);
1155                         ins->cache.start = node->cache.start;
1156                         ins->cache.size = node->cache.size;
1157                         ins->data = rec;
1158                         rec->refs++;
1159                 }
1160                 ret = insert_cache_extent(dst, &ins->cache);
1161                 if (ret == -EEXIST) {
1162                         conflict = get_inode_rec(dst, rec->ino, 1);
1163                         BUG_ON(IS_ERR(conflict));
1164                         merge_inode_recs(rec, conflict, dst);
1165                         if (rec->checked) {
1166                                 conflict->checked = 1;
1167                                 if (dst_node->current == conflict)
1168                                         dst_node->current = NULL;
1169                         }
1170                         maybe_free_inode_rec(dst, conflict);
1171                         free_inode_rec(rec);
1172                         free(ins);
1173                 } else {
1174                         BUG_ON(ret);
1175                 }
1176         }
1177
1178         if (src == &src_node->root_cache) {
1179                 src = &src_node->inode_cache;
1180                 dst = &dst_node->inode_cache;
1181                 goto again;
1182         }
1183
1184         if (current_ino > 0 && (!dst_node->current ||
1185             current_ino > dst_node->current->ino)) {
1186                 if (dst_node->current) {
1187                         dst_node->current->checked = 1;
1188                         maybe_free_inode_rec(dst, dst_node->current);
1189                 }
1190                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1191                 BUG_ON(IS_ERR(dst_node->current));
1192         }
1193         return 0;
1194 }
1195
1196 static void free_inode_ptr(struct cache_extent *cache)
1197 {
1198         struct ptr_node *node;
1199         struct inode_record *rec;
1200
1201         node = container_of(cache, struct ptr_node, cache);
1202         rec = node->data;
1203         free_inode_rec(rec);
1204         free(node);
1205 }
1206
1207 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1208
1209 static struct shared_node *find_shared_node(struct cache_tree *shared,
1210                                             u64 bytenr)
1211 {
1212         struct cache_extent *cache;
1213         struct shared_node *node;
1214
1215         cache = lookup_cache_extent(shared, bytenr, 1);
1216         if (cache) {
1217                 node = container_of(cache, struct shared_node, cache);
1218                 return node;
1219         }
1220         return NULL;
1221 }
1222
1223 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1224 {
1225         int ret;
1226         struct shared_node *node;
1227
1228         node = calloc(1, sizeof(*node));
1229         if (!node)
1230                 return -ENOMEM;
1231         node->cache.start = bytenr;
1232         node->cache.size = 1;
1233         cache_tree_init(&node->root_cache);
1234         cache_tree_init(&node->inode_cache);
1235         node->refs = refs;
1236
1237         ret = insert_cache_extent(shared, &node->cache);
1238
1239         return ret;
1240 }
1241
1242 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1243                              struct walk_control *wc, int level)
1244 {
1245         struct shared_node *node;
1246         struct shared_node *dest;
1247         int ret;
1248
1249         if (level == wc->active_node)
1250                 return 0;
1251
1252         BUG_ON(wc->active_node <= level);
1253         node = find_shared_node(&wc->shared, bytenr);
1254         if (!node) {
1255                 ret = add_shared_node(&wc->shared, bytenr, refs);
1256                 BUG_ON(ret);
1257                 node = find_shared_node(&wc->shared, bytenr);
1258                 wc->nodes[level] = node;
1259                 wc->active_node = level;
1260                 return 0;
1261         }
1262
1263         if (wc->root_level == wc->active_node &&
1264             btrfs_root_refs(&root->root_item) == 0) {
1265                 if (--node->refs == 0) {
1266                         free_inode_recs_tree(&node->root_cache);
1267                         free_inode_recs_tree(&node->inode_cache);
1268                         remove_cache_extent(&wc->shared, &node->cache);
1269                         free(node);
1270                 }
1271                 return 1;
1272         }
1273
1274         dest = wc->nodes[wc->active_node];
1275         splice_shared_node(node, dest);
1276         if (node->refs == 0) {
1277                 remove_cache_extent(&wc->shared, &node->cache);
1278                 free(node);
1279         }
1280         return 1;
1281 }
1282
1283 static int leave_shared_node(struct btrfs_root *root,
1284                              struct walk_control *wc, int level)
1285 {
1286         struct shared_node *node;
1287         struct shared_node *dest;
1288         int i;
1289
1290         if (level == wc->root_level)
1291                 return 0;
1292
1293         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1294                 if (wc->nodes[i])
1295                         break;
1296         }
1297         BUG_ON(i >= BTRFS_MAX_LEVEL);
1298
1299         node = wc->nodes[wc->active_node];
1300         wc->nodes[wc->active_node] = NULL;
1301         wc->active_node = i;
1302
1303         dest = wc->nodes[wc->active_node];
1304         if (wc->active_node < wc->root_level ||
1305             btrfs_root_refs(&root->root_item) > 0) {
1306                 BUG_ON(node->refs <= 1);
1307                 splice_shared_node(node, dest);
1308         } else {
1309                 BUG_ON(node->refs < 2);
1310                 node->refs--;
1311         }
1312         return 0;
1313 }
1314
1315 /*
1316  * Returns:
1317  * < 0 - on error
1318  * 1   - if the root with id child_root_id is a child of root parent_root_id
1319  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1320  *       has other root(s) as parent(s)
1321  * 2   - if the root child_root_id doesn't have any parent roots
1322  */
1323 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1324                          u64 child_root_id)
1325 {
1326         struct btrfs_path path;
1327         struct btrfs_key key;
1328         struct extent_buffer *leaf;
1329         int has_parent = 0;
1330         int ret;
1331
1332         btrfs_init_path(&path);
1333
1334         key.objectid = parent_root_id;
1335         key.type = BTRFS_ROOT_REF_KEY;
1336         key.offset = child_root_id;
1337         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1338                                 0, 0);
1339         if (ret < 0)
1340                 return ret;
1341         btrfs_release_path(&path);
1342         if (!ret)
1343                 return 1;
1344
1345         key.objectid = child_root_id;
1346         key.type = BTRFS_ROOT_BACKREF_KEY;
1347         key.offset = 0;
1348         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1349                                 0, 0);
1350         if (ret < 0)
1351                 goto out;
1352
1353         while (1) {
1354                 leaf = path.nodes[0];
1355                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1356                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1357                         if (ret)
1358                                 break;
1359                         leaf = path.nodes[0];
1360                 }
1361
1362                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1363                 if (key.objectid != child_root_id ||
1364                     key.type != BTRFS_ROOT_BACKREF_KEY)
1365                         break;
1366
1367                 has_parent = 1;
1368
1369                 if (key.offset == parent_root_id) {
1370                         btrfs_release_path(&path);
1371                         return 1;
1372                 }
1373
1374                 path.slots[0]++;
1375         }
1376 out:
1377         btrfs_release_path(&path);
1378         if (ret < 0)
1379                 return ret;
1380         return has_parent ? 0 : 2;
1381 }
1382
1383 static int process_dir_item(struct btrfs_root *root,
1384                             struct extent_buffer *eb,
1385                             int slot, struct btrfs_key *key,
1386                             struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u32 data_len;
1393         int error;
1394         int nritems = 0;
1395         int filetype;
1396         struct btrfs_dir_item *di;
1397         struct inode_record *rec;
1398         struct cache_tree *root_cache;
1399         struct cache_tree *inode_cache;
1400         struct btrfs_key location;
1401         char namebuf[BTRFS_NAME_LEN];
1402
1403         root_cache = &active_node->root_cache;
1404         inode_cache = &active_node->inode_cache;
1405         rec = active_node->current;
1406         rec->found_dir_item = 1;
1407
1408         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1409         total = btrfs_item_size_nr(eb, slot);
1410         while (cur < total) {
1411                 nritems++;
1412                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1413                 name_len = btrfs_dir_name_len(eb, di);
1414                 data_len = btrfs_dir_data_len(eb, di);
1415                 filetype = btrfs_dir_type(eb, di);
1416
1417                 rec->found_size += name_len;
1418                 if (name_len <= BTRFS_NAME_LEN) {
1419                         len = name_len;
1420                         error = 0;
1421                 } else {
1422                         len = BTRFS_NAME_LEN;
1423                         error = REF_ERR_NAME_TOO_LONG;
1424                 }
1425                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1426
1427                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1428                         add_inode_backref(inode_cache, location.objectid,
1429                                           key->objectid, key->offset, namebuf,
1430                                           len, filetype, key->type, error);
1431                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1432                         add_inode_backref(root_cache, location.objectid,
1433                                           key->objectid, key->offset,
1434                                           namebuf, len, filetype,
1435                                           key->type, error);
1436                 } else {
1437                         fprintf(stderr, "invalid location in dir item %u\n",
1438                                 location.type);
1439                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1440                                           key->objectid, key->offset, namebuf,
1441                                           len, filetype, key->type, error);
1442                 }
1443
1444                 len = sizeof(*di) + name_len + data_len;
1445                 di = (struct btrfs_dir_item *)((char *)di + len);
1446                 cur += len;
1447         }
1448         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1449                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1450
1451         return 0;
1452 }
1453
1454 static int process_inode_ref(struct extent_buffer *eb,
1455                              int slot, struct btrfs_key *key,
1456                              struct shared_node *active_node)
1457 {
1458         u32 total;
1459         u32 cur = 0;
1460         u32 len;
1461         u32 name_len;
1462         u64 index;
1463         int error;
1464         struct cache_tree *inode_cache;
1465         struct btrfs_inode_ref *ref;
1466         char namebuf[BTRFS_NAME_LEN];
1467
1468         inode_cache = &active_node->inode_cache;
1469
1470         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1471         total = btrfs_item_size_nr(eb, slot);
1472         while (cur < total) {
1473                 name_len = btrfs_inode_ref_name_len(eb, ref);
1474                 index = btrfs_inode_ref_index(eb, ref);
1475                 if (name_len <= BTRFS_NAME_LEN) {
1476                         len = name_len;
1477                         error = 0;
1478                 } else {
1479                         len = BTRFS_NAME_LEN;
1480                         error = REF_ERR_NAME_TOO_LONG;
1481                 }
1482                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1483                 add_inode_backref(inode_cache, key->objectid, key->offset,
1484                                   index, namebuf, len, 0, key->type, error);
1485
1486                 len = sizeof(*ref) + name_len;
1487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1488                 cur += len;
1489         }
1490         return 0;
1491 }
1492
1493 static int process_inode_extref(struct extent_buffer *eb,
1494                                 int slot, struct btrfs_key *key,
1495                                 struct shared_node *active_node)
1496 {
1497         u32 total;
1498         u32 cur = 0;
1499         u32 len;
1500         u32 name_len;
1501         u64 index;
1502         u64 parent;
1503         int error;
1504         struct cache_tree *inode_cache;
1505         struct btrfs_inode_extref *extref;
1506         char namebuf[BTRFS_NAME_LEN];
1507
1508         inode_cache = &active_node->inode_cache;
1509
1510         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1511         total = btrfs_item_size_nr(eb, slot);
1512         while (cur < total) {
1513                 name_len = btrfs_inode_extref_name_len(eb, extref);
1514                 index = btrfs_inode_extref_index(eb, extref);
1515                 parent = btrfs_inode_extref_parent(eb, extref);
1516                 if (name_len <= BTRFS_NAME_LEN) {
1517                         len = name_len;
1518                         error = 0;
1519                 } else {
1520                         len = BTRFS_NAME_LEN;
1521                         error = REF_ERR_NAME_TOO_LONG;
1522                 }
1523                 read_extent_buffer(eb, namebuf,
1524                                    (unsigned long)(extref + 1), len);
1525                 add_inode_backref(inode_cache, key->objectid, parent,
1526                                   index, namebuf, len, 0, key->type, error);
1527
1528                 len = sizeof(*extref) + name_len;
1529                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1530                 cur += len;
1531         }
1532         return 0;
1533
1534 }
1535
1536 static int count_csum_range(struct btrfs_root *root, u64 start,
1537                             u64 len, u64 *found)
1538 {
1539         struct btrfs_key key;
1540         struct btrfs_path path;
1541         struct extent_buffer *leaf;
1542         int ret;
1543         size_t size;
1544         *found = 0;
1545         u64 csum_end;
1546         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1547
1548         btrfs_init_path(&path);
1549
1550         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1551         key.offset = start;
1552         key.type = BTRFS_EXTENT_CSUM_KEY;
1553
1554         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1555                                 &key, &path, 0, 0);
1556         if (ret < 0)
1557                 goto out;
1558         if (ret > 0 && path.slots[0] > 0) {
1559                 leaf = path.nodes[0];
1560                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1561                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1562                     key.type == BTRFS_EXTENT_CSUM_KEY)
1563                         path.slots[0]--;
1564         }
1565
1566         while (len > 0) {
1567                 leaf = path.nodes[0];
1568                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1569                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1570                         if (ret > 0)
1571                                 break;
1572                         else if (ret < 0)
1573                                 goto out;
1574                         leaf = path.nodes[0];
1575                 }
1576
1577                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1578                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1579                     key.type != BTRFS_EXTENT_CSUM_KEY)
1580                         break;
1581
1582                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1583                 if (key.offset >= start + len)
1584                         break;
1585
1586                 if (key.offset > start)
1587                         start = key.offset;
1588
1589                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1590                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1591                 if (csum_end > start) {
1592                         size = min(csum_end - start, len);
1593                         len -= size;
1594                         start += size;
1595                         *found += size;
1596                 }
1597
1598                 path.slots[0]++;
1599         }
1600 out:
1601         btrfs_release_path(&path);
1602         if (ret < 0)
1603                 return ret;
1604         return 0;
1605 }
1606
1607 static int process_file_extent(struct btrfs_root *root,
1608                                 struct extent_buffer *eb,
1609                                 int slot, struct btrfs_key *key,
1610                                 struct shared_node *active_node)
1611 {
1612         struct inode_record *rec;
1613         struct btrfs_file_extent_item *fi;
1614         u64 num_bytes = 0;
1615         u64 disk_bytenr = 0;
1616         u64 extent_offset = 0;
1617         u64 mask = root->sectorsize - 1;
1618         int extent_type;
1619         int ret;
1620
1621         rec = active_node->current;
1622         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1623         rec->found_file_extent = 1;
1624
1625         if (rec->extent_start == (u64)-1) {
1626                 rec->extent_start = key->offset;
1627                 rec->extent_end = key->offset;
1628         }
1629
1630         if (rec->extent_end > key->offset)
1631                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1632         else if (rec->extent_end < key->offset) {
1633                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1634                                            key->offset - rec->extent_end);
1635                 if (ret < 0)
1636                         return ret;
1637         }
1638
1639         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1640         extent_type = btrfs_file_extent_type(eb, fi);
1641
1642         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1643                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1644                 if (num_bytes == 0)
1645                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1646                 rec->found_size += num_bytes;
1647                 num_bytes = (num_bytes + mask) & ~mask;
1648         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1649                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1650                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1651                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1652                 extent_offset = btrfs_file_extent_offset(eb, fi);
1653                 if (num_bytes == 0 || (num_bytes & mask))
1654                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1655                 if (num_bytes + extent_offset >
1656                     btrfs_file_extent_ram_bytes(eb, fi))
1657                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1658                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1659                     (btrfs_file_extent_compression(eb, fi) ||
1660                      btrfs_file_extent_encryption(eb, fi) ||
1661                      btrfs_file_extent_other_encoding(eb, fi)))
1662                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1663                 if (disk_bytenr > 0)
1664                         rec->found_size += num_bytes;
1665         } else {
1666                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1667         }
1668         rec->extent_end = key->offset + num_bytes;
1669
1670         /*
1671          * The data reloc tree will copy full extents into its inode and then
1672          * copy the corresponding csums.  Because the extent it copied could be
1673          * a preallocated extent that hasn't been written to yet there may be no
1674          * csums to copy, ergo we won't have csums for our file extent.  This is
1675          * ok so just don't bother checking csums if the inode belongs to the
1676          * data reloc tree.
1677          */
1678         if (disk_bytenr > 0 &&
1679             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1680                 u64 found;
1681                 if (btrfs_file_extent_compression(eb, fi))
1682                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1683                 else
1684                         disk_bytenr += extent_offset;
1685
1686                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1687                 if (ret < 0)
1688                         return ret;
1689                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1690                         if (found > 0)
1691                                 rec->found_csum_item = 1;
1692                         if (found < num_bytes)
1693                                 rec->some_csum_missing = 1;
1694                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1695                         if (found > 0)
1696                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1697                 }
1698         }
1699         return 0;
1700 }
1701
1702 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1703                             struct walk_control *wc)
1704 {
1705         struct btrfs_key key;
1706         u32 nritems;
1707         int i;
1708         int ret = 0;
1709         struct cache_tree *inode_cache;
1710         struct shared_node *active_node;
1711
1712         if (wc->root_level == wc->active_node &&
1713             btrfs_root_refs(&root->root_item) == 0)
1714                 return 0;
1715
1716         active_node = wc->nodes[wc->active_node];
1717         inode_cache = &active_node->inode_cache;
1718         nritems = btrfs_header_nritems(eb);
1719         for (i = 0; i < nritems; i++) {
1720                 btrfs_item_key_to_cpu(eb, &key, i);
1721
1722                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1723                         continue;
1724                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1725                         continue;
1726
1727                 if (active_node->current == NULL ||
1728                     active_node->current->ino < key.objectid) {
1729                         if (active_node->current) {
1730                                 active_node->current->checked = 1;
1731                                 maybe_free_inode_rec(inode_cache,
1732                                                      active_node->current);
1733                         }
1734                         active_node->current = get_inode_rec(inode_cache,
1735                                                              key.objectid, 1);
1736                         BUG_ON(IS_ERR(active_node->current));
1737                 }
1738                 switch (key.type) {
1739                 case BTRFS_DIR_ITEM_KEY:
1740                 case BTRFS_DIR_INDEX_KEY:
1741                         ret = process_dir_item(root, eb, i, &key, active_node);
1742                         break;
1743                 case BTRFS_INODE_REF_KEY:
1744                         ret = process_inode_ref(eb, i, &key, active_node);
1745                         break;
1746                 case BTRFS_INODE_EXTREF_KEY:
1747                         ret = process_inode_extref(eb, i, &key, active_node);
1748                         break;
1749                 case BTRFS_INODE_ITEM_KEY:
1750                         ret = process_inode_item(eb, i, &key, active_node);
1751                         break;
1752                 case BTRFS_EXTENT_DATA_KEY:
1753                         ret = process_file_extent(root, eb, i, &key,
1754                                                   active_node);
1755                         break;
1756                 default:
1757                         break;
1758                 };
1759         }
1760         return ret;
1761 }
1762
1763 static void reada_walk_down(struct btrfs_root *root,
1764                             struct extent_buffer *node, int slot)
1765 {
1766         u64 bytenr;
1767         u64 ptr_gen;
1768         u32 nritems;
1769         u32 blocksize;
1770         int i;
1771         int level;
1772
1773         level = btrfs_header_level(node);
1774         if (level != 1)
1775                 return;
1776
1777         nritems = btrfs_header_nritems(node);
1778         blocksize = root->nodesize;
1779         for (i = slot; i < nritems; i++) {
1780                 bytenr = btrfs_node_blockptr(node, i);
1781                 ptr_gen = btrfs_node_ptr_generation(node, i);
1782                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1783         }
1784 }
1785
1786 /*
1787  * Check the child node/leaf by the following condition:
1788  * 1. the first item key of the node/leaf should be the same with the one
1789  *    in parent.
1790  * 2. block in parent node should match the child node/leaf.
1791  * 3. generation of parent node and child's header should be consistent.
1792  *
1793  * Or the child node/leaf pointed by the key in parent is not valid.
1794  *
1795  * We hope to check leaf owner too, but since subvol may share leaves,
1796  * which makes leaf owner check not so strong, key check should be
1797  * sufficient enough for that case.
1798  */
1799 static int check_child_node(struct btrfs_root *root,
1800                             struct extent_buffer *parent, int slot,
1801                             struct extent_buffer *child)
1802 {
1803         struct btrfs_key parent_key;
1804         struct btrfs_key child_key;
1805         int ret = 0;
1806
1807         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1808         if (btrfs_header_level(child) == 0)
1809                 btrfs_item_key_to_cpu(child, &child_key, 0);
1810         else
1811                 btrfs_node_key_to_cpu(child, &child_key, 0);
1812
1813         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1814                 ret = -EINVAL;
1815                 fprintf(stderr,
1816                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1817                         parent_key.objectid, parent_key.type, parent_key.offset,
1818                         child_key.objectid, child_key.type, child_key.offset);
1819         }
1820         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1821                 ret = -EINVAL;
1822                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1823                         btrfs_node_blockptr(parent, slot),
1824                         btrfs_header_bytenr(child));
1825         }
1826         if (btrfs_node_ptr_generation(parent, slot) !=
1827             btrfs_header_generation(child)) {
1828                 ret = -EINVAL;
1829                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1830                         btrfs_header_generation(child),
1831                         btrfs_node_ptr_generation(parent, slot));
1832         }
1833         return ret;
1834 }
1835
1836 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1837                           struct walk_control *wc, int *level)
1838 {
1839         enum btrfs_tree_block_status status;
1840         u64 bytenr;
1841         u64 ptr_gen;
1842         struct extent_buffer *next;
1843         struct extent_buffer *cur;
1844         u32 blocksize;
1845         int ret, err = 0;
1846         u64 refs;
1847
1848         WARN_ON(*level < 0);
1849         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1850         ret = btrfs_lookup_extent_info(NULL, root,
1851                                        path->nodes[*level]->start,
1852                                        *level, 1, &refs, NULL);
1853         if (ret < 0) {
1854                 err = ret;
1855                 goto out;
1856         }
1857
1858         if (refs > 1) {
1859                 ret = enter_shared_node(root, path->nodes[*level]->start,
1860                                         refs, wc, *level);
1861                 if (ret > 0) {
1862                         err = ret;
1863                         goto out;
1864                 }
1865         }
1866
1867         while (*level >= 0) {
1868                 WARN_ON(*level < 0);
1869                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1870                 cur = path->nodes[*level];
1871
1872                 if (btrfs_header_level(cur) != *level)
1873                         WARN_ON(1);
1874
1875                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1876                         break;
1877                 if (*level == 0) {
1878                         ret = process_one_leaf(root, cur, wc);
1879                         if (ret < 0)
1880                                 err = ret;
1881                         break;
1882                 }
1883                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1884                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1885                 blocksize = root->nodesize;
1886                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1887                                                1, &refs, NULL);
1888                 if (ret < 0)
1889                         refs = 0;
1890
1891                 if (refs > 1) {
1892                         ret = enter_shared_node(root, bytenr, refs,
1893                                                 wc, *level - 1);
1894                         if (ret > 0) {
1895                                 path->slots[*level]++;
1896                                 continue;
1897                         }
1898                 }
1899
1900                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1901                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1902                         free_extent_buffer(next);
1903                         reada_walk_down(root, cur, path->slots[*level]);
1904                         next = read_tree_block(root, bytenr, blocksize,
1905                                                ptr_gen);
1906                         if (!extent_buffer_uptodate(next)) {
1907                                 struct btrfs_key node_key;
1908
1909                                 btrfs_node_key_to_cpu(path->nodes[*level],
1910                                                       &node_key,
1911                                                       path->slots[*level]);
1912                                 btrfs_add_corrupt_extent_record(root->fs_info,
1913                                                 &node_key,
1914                                                 path->nodes[*level]->start,
1915                                                 root->nodesize, *level);
1916                                 err = -EIO;
1917                                 goto out;
1918                         }
1919                 }
1920
1921                 ret = check_child_node(root, cur, path->slots[*level], next);
1922                 if (ret) {
1923                         err = ret;
1924                         goto out;
1925                 }
1926
1927                 if (btrfs_is_leaf(next))
1928                         status = btrfs_check_leaf(root, NULL, next);
1929                 else
1930                         status = btrfs_check_node(root, NULL, next);
1931                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1932                         free_extent_buffer(next);
1933                         err = -EIO;
1934                         goto out;
1935                 }
1936
1937                 *level = *level - 1;
1938                 free_extent_buffer(path->nodes[*level]);
1939                 path->nodes[*level] = next;
1940                 path->slots[*level] = 0;
1941         }
1942 out:
1943         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1944         return err;
1945 }
1946
1947 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1948                         struct walk_control *wc, int *level)
1949 {
1950         int i;
1951         struct extent_buffer *leaf;
1952
1953         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1954                 leaf = path->nodes[i];
1955                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1956                         path->slots[i]++;
1957                         *level = i;
1958                         return 0;
1959                 } else {
1960                         free_extent_buffer(path->nodes[*level]);
1961                         path->nodes[*level] = NULL;
1962                         BUG_ON(*level > wc->active_node);
1963                         if (*level == wc->active_node)
1964                                 leave_shared_node(root, wc, *level);
1965                         *level = i + 1;
1966                 }
1967         }
1968         return 1;
1969 }
1970
1971 static int check_root_dir(struct inode_record *rec)
1972 {
1973         struct inode_backref *backref;
1974         int ret = -1;
1975
1976         if (!rec->found_inode_item || rec->errors)
1977                 goto out;
1978         if (rec->nlink != 1 || rec->found_link != 0)
1979                 goto out;
1980         if (list_empty(&rec->backrefs))
1981                 goto out;
1982         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1983         if (!backref->found_inode_ref)
1984                 goto out;
1985         if (backref->index != 0 || backref->namelen != 2 ||
1986             memcmp(backref->name, "..", 2))
1987                 goto out;
1988         if (backref->found_dir_index || backref->found_dir_item)
1989                 goto out;
1990         ret = 0;
1991 out:
1992         return ret;
1993 }
1994
1995 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1996                               struct btrfs_root *root, struct btrfs_path *path,
1997                               struct inode_record *rec)
1998 {
1999         struct btrfs_inode_item *ei;
2000         struct btrfs_key key;
2001         int ret;
2002
2003         key.objectid = rec->ino;
2004         key.type = BTRFS_INODE_ITEM_KEY;
2005         key.offset = (u64)-1;
2006
2007         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2008         if (ret < 0)
2009                 goto out;
2010         if (ret) {
2011                 if (!path->slots[0]) {
2012                         ret = -ENOENT;
2013                         goto out;
2014                 }
2015                 path->slots[0]--;
2016                 ret = 0;
2017         }
2018         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2019         if (key.objectid != rec->ino) {
2020                 ret = -ENOENT;
2021                 goto out;
2022         }
2023
2024         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2025                             struct btrfs_inode_item);
2026         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2027         btrfs_mark_buffer_dirty(path->nodes[0]);
2028         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2029         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2030                root->root_key.objectid);
2031 out:
2032         btrfs_release_path(path);
2033         return ret;
2034 }
2035
2036 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2037                                     struct btrfs_root *root,
2038                                     struct btrfs_path *path,
2039                                     struct inode_record *rec)
2040 {
2041         int ret;
2042
2043         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2044         btrfs_release_path(path);
2045         if (!ret)
2046                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2047         return ret;
2048 }
2049
2050 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2051                                struct btrfs_root *root,
2052                                struct btrfs_path *path,
2053                                struct inode_record *rec)
2054 {
2055         struct btrfs_inode_item *ei;
2056         struct btrfs_key key;
2057         int ret = 0;
2058
2059         key.objectid = rec->ino;
2060         key.type = BTRFS_INODE_ITEM_KEY;
2061         key.offset = 0;
2062
2063         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2064         if (ret) {
2065                 if (ret > 0)
2066                         ret = -ENOENT;
2067                 goto out;
2068         }
2069
2070         /* Since ret == 0, no need to check anything */
2071         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2072                             struct btrfs_inode_item);
2073         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2074         btrfs_mark_buffer_dirty(path->nodes[0]);
2075         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2076         printf("reset nbytes for ino %llu root %llu\n",
2077                rec->ino, root->root_key.objectid);
2078 out:
2079         btrfs_release_path(path);
2080         return ret;
2081 }
2082
2083 static int add_missing_dir_index(struct btrfs_root *root,
2084                                  struct cache_tree *inode_cache,
2085                                  struct inode_record *rec,
2086                                  struct inode_backref *backref)
2087 {
2088         struct btrfs_path *path;
2089         struct btrfs_trans_handle *trans;
2090         struct btrfs_dir_item *dir_item;
2091         struct extent_buffer *leaf;
2092         struct btrfs_key key;
2093         struct btrfs_disk_key disk_key;
2094         struct inode_record *dir_rec;
2095         unsigned long name_ptr;
2096         u32 data_size = sizeof(*dir_item) + backref->namelen;
2097         int ret;
2098
2099         path = btrfs_alloc_path();
2100         if (!path)
2101                 return -ENOMEM;
2102
2103         trans = btrfs_start_transaction(root, 1);
2104         if (IS_ERR(trans)) {
2105                 btrfs_free_path(path);
2106                 return PTR_ERR(trans);
2107         }
2108
2109         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2110                 (unsigned long long)rec->ino);
2111         key.objectid = backref->dir;
2112         key.type = BTRFS_DIR_INDEX_KEY;
2113         key.offset = backref->index;
2114
2115         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2116         BUG_ON(ret);
2117
2118         leaf = path->nodes[0];
2119         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2120
2121         disk_key.objectid = cpu_to_le64(rec->ino);
2122         disk_key.type = BTRFS_INODE_ITEM_KEY;
2123         disk_key.offset = 0;
2124
2125         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2126         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2127         btrfs_set_dir_data_len(leaf, dir_item, 0);
2128         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2129         name_ptr = (unsigned long)(dir_item + 1);
2130         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2131         btrfs_mark_buffer_dirty(leaf);
2132         btrfs_free_path(path);
2133         btrfs_commit_transaction(trans, root);
2134
2135         backref->found_dir_index = 1;
2136         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2137         BUG_ON(IS_ERR(dir_rec));
2138         if (!dir_rec)
2139                 return 0;
2140         dir_rec->found_size += backref->namelen;
2141         if (dir_rec->found_size == dir_rec->isize &&
2142             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2143                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2144         if (dir_rec->found_size != dir_rec->isize)
2145                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2146
2147         return 0;
2148 }
2149
2150 static int delete_dir_index(struct btrfs_root *root,
2151                             struct cache_tree *inode_cache,
2152                             struct inode_record *rec,
2153                             struct inode_backref *backref)
2154 {
2155         struct btrfs_trans_handle *trans;
2156         struct btrfs_dir_item *di;
2157         struct btrfs_path *path;
2158         int ret = 0;
2159
2160         path = btrfs_alloc_path();
2161         if (!path)
2162                 return -ENOMEM;
2163
2164         trans = btrfs_start_transaction(root, 1);
2165         if (IS_ERR(trans)) {
2166                 btrfs_free_path(path);
2167                 return PTR_ERR(trans);
2168         }
2169
2170
2171         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2172                 (unsigned long long)backref->dir,
2173                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2174                 (unsigned long long)root->objectid);
2175
2176         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2177                                     backref->name, backref->namelen,
2178                                     backref->index, -1);
2179         if (IS_ERR(di)) {
2180                 ret = PTR_ERR(di);
2181                 btrfs_free_path(path);
2182                 btrfs_commit_transaction(trans, root);
2183                 if (ret == -ENOENT)
2184                         return 0;
2185                 return ret;
2186         }
2187
2188         if (!di)
2189                 ret = btrfs_del_item(trans, root, path);
2190         else
2191                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2192         BUG_ON(ret);
2193         btrfs_free_path(path);
2194         btrfs_commit_transaction(trans, root);
2195         return ret;
2196 }
2197
2198 static int create_inode_item(struct btrfs_root *root,
2199                              struct inode_record *rec,
2200                              struct inode_backref *backref, int root_dir)
2201 {
2202         struct btrfs_trans_handle *trans;
2203         struct btrfs_inode_item inode_item;
2204         time_t now = time(NULL);
2205         int ret;
2206
2207         trans = btrfs_start_transaction(root, 1);
2208         if (IS_ERR(trans)) {
2209                 ret = PTR_ERR(trans);
2210                 return ret;
2211         }
2212
2213         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2214                 "be incomplete, please check permissions and content after "
2215                 "the fsck completes.\n", (unsigned long long)root->objectid,
2216                 (unsigned long long)rec->ino);
2217
2218         memset(&inode_item, 0, sizeof(inode_item));
2219         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2220         if (root_dir)
2221                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2222         else
2223                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2224         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2225         if (rec->found_dir_item) {
2226                 if (rec->found_file_extent)
2227                         fprintf(stderr, "root %llu inode %llu has both a dir "
2228                                 "item and extents, unsure if it is a dir or a "
2229                                 "regular file so setting it as a directory\n",
2230                                 (unsigned long long)root->objectid,
2231                                 (unsigned long long)rec->ino);
2232                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2233                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2234         } else if (!rec->found_dir_item) {
2235                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2236                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2237         }
2238         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2239         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2240         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2241         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2242         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2243         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2244         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2245         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2246
2247         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2248         BUG_ON(ret);
2249         btrfs_commit_transaction(trans, root);
2250         return 0;
2251 }
2252
2253 static int repair_inode_backrefs(struct btrfs_root *root,
2254                                  struct inode_record *rec,
2255                                  struct cache_tree *inode_cache,
2256                                  int delete)
2257 {
2258         struct inode_backref *tmp, *backref;
2259         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2260         int ret = 0;
2261         int repaired = 0;
2262
2263         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2264                 if (!delete && rec->ino == root_dirid) {
2265                         if (!rec->found_inode_item) {
2266                                 ret = create_inode_item(root, rec, backref, 1);
2267                                 if (ret)
2268                                         break;
2269                                 repaired++;
2270                         }
2271                 }
2272
2273                 /* Index 0 for root dir's are special, don't mess with it */
2274                 if (rec->ino == root_dirid && backref->index == 0)
2275                         continue;
2276
2277                 if (delete &&
2278                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2279                      (backref->found_dir_index && backref->found_inode_ref &&
2280                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2281                         ret = delete_dir_index(root, inode_cache, rec, backref);
2282                         if (ret)
2283                                 break;
2284                         repaired++;
2285                         list_del(&backref->list);
2286                         free(backref);
2287                 }
2288
2289                 if (!delete && !backref->found_dir_index &&
2290                     backref->found_dir_item && backref->found_inode_ref) {
2291                         ret = add_missing_dir_index(root, inode_cache, rec,
2292                                                     backref);
2293                         if (ret)
2294                                 break;
2295                         repaired++;
2296                         if (backref->found_dir_item &&
2297                             backref->found_dir_index &&
2298                             backref->found_dir_index) {
2299                                 if (!backref->errors &&
2300                                     backref->found_inode_ref) {
2301                                         list_del(&backref->list);
2302                                         free(backref);
2303                                 }
2304                         }
2305                 }
2306
2307                 if (!delete && (!backref->found_dir_index &&
2308                                 !backref->found_dir_item &&
2309                                 backref->found_inode_ref)) {
2310                         struct btrfs_trans_handle *trans;
2311                         struct btrfs_key location;
2312
2313                         ret = check_dir_conflict(root, backref->name,
2314                                                  backref->namelen,
2315                                                  backref->dir,
2316                                                  backref->index);
2317                         if (ret) {
2318                                 /*
2319                                  * let nlink fixing routine to handle it,
2320                                  * which can do it better.
2321                                  */
2322                                 ret = 0;
2323                                 break;
2324                         }
2325                         location.objectid = rec->ino;
2326                         location.type = BTRFS_INODE_ITEM_KEY;
2327                         location.offset = 0;
2328
2329                         trans = btrfs_start_transaction(root, 1);
2330                         if (IS_ERR(trans)) {
2331                                 ret = PTR_ERR(trans);
2332                                 break;
2333                         }
2334                         fprintf(stderr, "adding missing dir index/item pair "
2335                                 "for inode %llu\n",
2336                                 (unsigned long long)rec->ino);
2337                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2338                                                     backref->namelen,
2339                                                     backref->dir, &location,
2340                                                     imode_to_type(rec->imode),
2341                                                     backref->index);
2342                         BUG_ON(ret);
2343                         btrfs_commit_transaction(trans, root);
2344                         repaired++;
2345                 }
2346
2347                 if (!delete && (backref->found_inode_ref &&
2348                                 backref->found_dir_index &&
2349                                 backref->found_dir_item &&
2350                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2351                                 !rec->found_inode_item)) {
2352                         ret = create_inode_item(root, rec, backref, 0);
2353                         if (ret)
2354                                 break;
2355                         repaired++;
2356                 }
2357
2358         }
2359         return ret ? ret : repaired;
2360 }
2361
2362 /*
2363  * To determine the file type for nlink/inode_item repair
2364  *
2365  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2366  * Return -ENOENT if file type is not found.
2367  */
2368 static int find_file_type(struct inode_record *rec, u8 *type)
2369 {
2370         struct inode_backref *backref;
2371
2372         /* For inode item recovered case */
2373         if (rec->found_inode_item) {
2374                 *type = imode_to_type(rec->imode);
2375                 return 0;
2376         }
2377
2378         list_for_each_entry(backref, &rec->backrefs, list) {
2379                 if (backref->found_dir_index || backref->found_dir_item) {
2380                         *type = backref->filetype;
2381                         return 0;
2382                 }
2383         }
2384         return -ENOENT;
2385 }
2386
2387 /*
2388  * To determine the file name for nlink repair
2389  *
2390  * Return 0 if file name is found, set name and namelen.
2391  * Return -ENOENT if file name is not found.
2392  */
2393 static int find_file_name(struct inode_record *rec,
2394                           char *name, int *namelen)
2395 {
2396         struct inode_backref *backref;
2397
2398         list_for_each_entry(backref, &rec->backrefs, list) {
2399                 if (backref->found_dir_index || backref->found_dir_item ||
2400                     backref->found_inode_ref) {
2401                         memcpy(name, backref->name, backref->namelen);
2402                         *namelen = backref->namelen;
2403                         return 0;
2404                 }
2405         }
2406         return -ENOENT;
2407 }
2408
2409 /* Reset the nlink of the inode to the correct one */
2410 static int reset_nlink(struct btrfs_trans_handle *trans,
2411                        struct btrfs_root *root,
2412                        struct btrfs_path *path,
2413                        struct inode_record *rec)
2414 {
2415         struct inode_backref *backref;
2416         struct inode_backref *tmp;
2417         struct btrfs_key key;
2418         struct btrfs_inode_item *inode_item;
2419         int ret = 0;
2420
2421         /* We don't believe this either, reset it and iterate backref */
2422         rec->found_link = 0;
2423
2424         /* Remove all backref including the valid ones */
2425         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2426                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2427                                    backref->index, backref->name,
2428                                    backref->namelen, 0);
2429                 if (ret < 0)
2430                         goto out;
2431
2432                 /* remove invalid backref, so it won't be added back */
2433                 if (!(backref->found_dir_index &&
2434                       backref->found_dir_item &&
2435                       backref->found_inode_ref)) {
2436                         list_del(&backref->list);
2437                         free(backref);
2438                 } else {
2439                         rec->found_link++;
2440                 }
2441         }
2442
2443         /* Set nlink to 0 */
2444         key.objectid = rec->ino;
2445         key.type = BTRFS_INODE_ITEM_KEY;
2446         key.offset = 0;
2447         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2448         if (ret < 0)
2449                 goto out;
2450         if (ret > 0) {
2451                 ret = -ENOENT;
2452                 goto out;
2453         }
2454         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2455                                     struct btrfs_inode_item);
2456         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2457         btrfs_mark_buffer_dirty(path->nodes[0]);
2458         btrfs_release_path(path);
2459
2460         /*
2461          * Add back valid inode_ref/dir_item/dir_index,
2462          * add_link() will handle the nlink inc, so new nlink must be correct
2463          */
2464         list_for_each_entry(backref, &rec->backrefs, list) {
2465                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2466                                      backref->name, backref->namelen,
2467                                      backref->filetype, &backref->index, 1);
2468                 if (ret < 0)
2469                         goto out;
2470         }
2471 out:
2472         btrfs_release_path(path);
2473         return ret;
2474 }
2475
2476 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2477                                struct btrfs_root *root,
2478                                struct btrfs_path *path,
2479                                struct inode_record *rec)
2480 {
2481         char *dir_name = "lost+found";
2482         char namebuf[BTRFS_NAME_LEN] = {0};
2483         u64 lost_found_ino;
2484         u32 mode = 0700;
2485         u8 type = 0;
2486         int namelen = 0;
2487         int name_recovered = 0;
2488         int type_recovered = 0;
2489         int ret = 0;
2490
2491         /*
2492          * Get file name and type first before these invalid inode ref
2493          * are deleted by remove_all_invalid_backref()
2494          */
2495         name_recovered = !find_file_name(rec, namebuf, &namelen);
2496         type_recovered = !find_file_type(rec, &type);
2497
2498         if (!name_recovered) {
2499                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2500                        rec->ino, rec->ino);
2501                 namelen = count_digits(rec->ino);
2502                 sprintf(namebuf, "%llu", rec->ino);
2503                 name_recovered = 1;
2504         }
2505         if (!type_recovered) {
2506                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2507                        rec->ino);
2508                 type = BTRFS_FT_REG_FILE;
2509                 type_recovered = 1;
2510         }
2511
2512         ret = reset_nlink(trans, root, path, rec);
2513         if (ret < 0) {
2514                 fprintf(stderr,
2515                         "Failed to reset nlink for inode %llu: %s\n",
2516                         rec->ino, strerror(-ret));
2517                 goto out;
2518         }
2519
2520         if (rec->found_link == 0) {
2521                 lost_found_ino = root->highest_inode;
2522                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2523                         ret = -EOVERFLOW;
2524                         goto out;
2525                 }
2526                 lost_found_ino++;
2527                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2528                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2529                                   mode);
2530                 if (ret < 0) {
2531                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2532                                 dir_name, strerror(-ret));
2533                         goto out;
2534                 }
2535                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2536                                      namebuf, namelen, type, NULL, 1);
2537                 /*
2538                  * Add ".INO" suffix several times to handle case where
2539                  * "FILENAME.INO" is already taken by another file.
2540                  */
2541                 while (ret == -EEXIST) {
2542                         /*
2543                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2544                          */
2545                         if (namelen + count_digits(rec->ino) + 1 >
2546                             BTRFS_NAME_LEN) {
2547                                 ret = -EFBIG;
2548                                 goto out;
2549                         }
2550                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2551                                  ".%llu", rec->ino);
2552                         namelen += count_digits(rec->ino) + 1;
2553                         ret = btrfs_add_link(trans, root, rec->ino,
2554                                              lost_found_ino, namebuf,
2555                                              namelen, type, NULL, 1);
2556                 }
2557                 if (ret < 0) {
2558                         fprintf(stderr,
2559                                 "Failed to link the inode %llu to %s dir: %s\n",
2560                                 rec->ino, dir_name, strerror(-ret));
2561                         goto out;
2562                 }
2563                 /*
2564                  * Just increase the found_link, don't actually add the
2565                  * backref. This will make things easier and this inode
2566                  * record will be freed after the repair is done.
2567                  * So fsck will not report problem about this inode.
2568                  */
2569                 rec->found_link++;
2570                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2571                        namelen, namebuf, dir_name);
2572         }
2573         printf("Fixed the nlink of inode %llu\n", rec->ino);
2574 out:
2575         /*
2576          * Clear the flag anyway, or we will loop forever for the same inode
2577          * as it will not be removed from the bad inode list and the dead loop
2578          * happens.
2579          */
2580         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2581         btrfs_release_path(path);
2582         return ret;
2583 }
2584
2585 /*
2586  * Check if there is any normal(reg or prealloc) file extent for given
2587  * ino.
2588  * This is used to determine the file type when neither its dir_index/item or
2589  * inode_item exists.
2590  *
2591  * This will *NOT* report error, if any error happens, just consider it does
2592  * not have any normal file extent.
2593  */
2594 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2595 {
2596         struct btrfs_path *path;
2597         struct btrfs_key key;
2598         struct btrfs_key found_key;
2599         struct btrfs_file_extent_item *fi;
2600         u8 type;
2601         int ret = 0;
2602
2603         path = btrfs_alloc_path();
2604         if (!path)
2605                 goto out;
2606         key.objectid = ino;
2607         key.type = BTRFS_EXTENT_DATA_KEY;
2608         key.offset = 0;
2609
2610         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2611         if (ret < 0) {
2612                 ret = 0;
2613                 goto out;
2614         }
2615         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2616                 ret = btrfs_next_leaf(root, path);
2617                 if (ret) {
2618                         ret = 0;
2619                         goto out;
2620                 }
2621         }
2622         while (1) {
2623                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2624                                       path->slots[0]);
2625                 if (found_key.objectid != ino ||
2626                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2627                         break;
2628                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2629                                     struct btrfs_file_extent_item);
2630                 type = btrfs_file_extent_type(path->nodes[0], fi);
2631                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2632                         ret = 1;
2633                         goto out;
2634                 }
2635         }
2636 out:
2637         btrfs_free_path(path);
2638         return ret;
2639 }
2640
2641 static u32 btrfs_type_to_imode(u8 type)
2642 {
2643         static u32 imode_by_btrfs_type[] = {
2644                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2645                 [BTRFS_FT_DIR]          = S_IFDIR,
2646                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2647                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2648                 [BTRFS_FT_FIFO]         = S_IFIFO,
2649                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2650                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2651         };
2652
2653         return imode_by_btrfs_type[(type)];
2654 }
2655
2656 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2657                                 struct btrfs_root *root,
2658                                 struct btrfs_path *path,
2659                                 struct inode_record *rec)
2660 {
2661         u8 filetype;
2662         u32 mode = 0700;
2663         int type_recovered = 0;
2664         int ret = 0;
2665
2666         printf("Trying to rebuild inode:%llu\n", rec->ino);
2667
2668         type_recovered = !find_file_type(rec, &filetype);
2669
2670         /*
2671          * Try to determine inode type if type not found.
2672          *
2673          * For found regular file extent, it must be FILE.
2674          * For found dir_item/index, it must be DIR.
2675          *
2676          * For undetermined one, use FILE as fallback.
2677          *
2678          * TODO:
2679          * 1. If found backref(inode_index/item is already handled) to it,
2680          *    it must be DIR.
2681          *    Need new inode-inode ref structure to allow search for that.
2682          */
2683         if (!type_recovered) {
2684                 if (rec->found_file_extent &&
2685                     find_normal_file_extent(root, rec->ino)) {
2686                         type_recovered = 1;
2687                         filetype = BTRFS_FT_REG_FILE;
2688                 } else if (rec->found_dir_item) {
2689                         type_recovered = 1;
2690                         filetype = BTRFS_FT_DIR;
2691                 } else if (!list_empty(&rec->orphan_extents)) {
2692                         type_recovered = 1;
2693                         filetype = BTRFS_FT_REG_FILE;
2694                 } else{
2695                         printf("Can't determint the filetype for inode %llu, assume it is a normal file\n",
2696                                rec->ino);
2697                         type_recovered = 1;
2698                         filetype = BTRFS_FT_REG_FILE;
2699                 }
2700         }
2701
2702         ret = btrfs_new_inode(trans, root, rec->ino,
2703                               mode | btrfs_type_to_imode(filetype));
2704         if (ret < 0)
2705                 goto out;
2706
2707         /*
2708          * Here inode rebuild is done, we only rebuild the inode item,
2709          * don't repair the nlink(like move to lost+found).
2710          * That is the job of nlink repair.
2711          *
2712          * We just fill the record and return
2713          */
2714         rec->found_dir_item = 1;
2715         rec->imode = mode | btrfs_type_to_imode(filetype);
2716         rec->nlink = 0;
2717         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2718         /* Ensure the inode_nlinks repair function will be called */
2719         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2720 out:
2721         return ret;
2722 }
2723
2724 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2725                                       struct btrfs_root *root,
2726                                       struct btrfs_path *path,
2727                                       struct inode_record *rec)
2728 {
2729         struct orphan_data_extent *orphan;
2730         struct orphan_data_extent *tmp;
2731         int ret = 0;
2732
2733         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2734                 /*
2735                  * Check for conflicting file extents
2736                  *
2737                  * Here we don't know whether the extents is compressed or not,
2738                  * so we can only assume it not compressed nor data offset,
2739                  * and use its disk_len as extent length.
2740                  */
2741                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2742                                        orphan->offset, orphan->disk_len, 0);
2743                 btrfs_release_path(path);
2744                 if (ret < 0)
2745                         goto out;
2746                 if (!ret) {
2747                         fprintf(stderr,
2748                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2749                                 orphan->disk_bytenr, orphan->disk_len);
2750                         ret = btrfs_free_extent(trans,
2751                                         root->fs_info->extent_root,
2752                                         orphan->disk_bytenr, orphan->disk_len,
2753                                         0, root->objectid, orphan->objectid,
2754                                         orphan->offset);
2755                         if (ret < 0)
2756                                 goto out;
2757                 }
2758                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2759                                 orphan->offset, orphan->disk_bytenr,
2760                                 orphan->disk_len, orphan->disk_len);
2761                 if (ret < 0)
2762                         goto out;
2763
2764                 /* Update file size info */
2765                 rec->found_size += orphan->disk_len;
2766                 if (rec->found_size == rec->nbytes)
2767                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2768
2769                 /* Update the file extent hole info too */
2770                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2771                                            orphan->disk_len);
2772                 if (ret < 0)
2773                         goto out;
2774                 if (RB_EMPTY_ROOT(&rec->holes))
2775                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2776
2777                 list_del(&orphan->list);
2778                 free(orphan);
2779         }
2780         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2781 out:
2782         return ret;
2783 }
2784
2785 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2786                                         struct btrfs_root *root,
2787                                         struct btrfs_path *path,
2788                                         struct inode_record *rec)
2789 {
2790         struct rb_node *node;
2791         struct file_extent_hole *hole;
2792         int found = 0;
2793         int ret = 0;
2794
2795         node = rb_first(&rec->holes);
2796
2797         while (node) {
2798                 found = 1;
2799                 hole = rb_entry(node, struct file_extent_hole, node);
2800                 ret = btrfs_punch_hole(trans, root, rec->ino,
2801                                        hole->start, hole->len);
2802                 if (ret < 0)
2803                         goto out;
2804                 ret = del_file_extent_hole(&rec->holes, hole->start,
2805                                            hole->len);
2806                 if (ret < 0)
2807                         goto out;
2808                 if (RB_EMPTY_ROOT(&rec->holes))
2809                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2810                 node = rb_first(&rec->holes);
2811         }
2812         /* special case for a file losing all its file extent */
2813         if (!found) {
2814                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2815                                        round_up(rec->isize, root->sectorsize));
2816                 if (ret < 0)
2817                         goto out;
2818         }
2819         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2820                rec->ino, root->objectid);
2821 out:
2822         return ret;
2823 }
2824
2825 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2826 {
2827         struct btrfs_trans_handle *trans;
2828         struct btrfs_path *path;
2829         int ret = 0;
2830
2831         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2832                              I_ERR_NO_ORPHAN_ITEM |
2833                              I_ERR_LINK_COUNT_WRONG |
2834                              I_ERR_NO_INODE_ITEM |
2835                              I_ERR_FILE_EXTENT_ORPHAN |
2836                              I_ERR_FILE_EXTENT_DISCOUNT|
2837                              I_ERR_FILE_NBYTES_WRONG)))
2838                 return rec->errors;
2839
2840         path = btrfs_alloc_path();
2841         if (!path)
2842                 return -ENOMEM;
2843
2844         /*
2845          * For nlink repair, it may create a dir and add link, so
2846          * 2 for parent(256)'s dir_index and dir_item
2847          * 2 for lost+found dir's inode_item and inode_ref
2848          * 1 for the new inode_ref of the file
2849          * 2 for lost+found dir's dir_index and dir_item for the file
2850          */
2851         trans = btrfs_start_transaction(root, 7);
2852         if (IS_ERR(trans)) {
2853                 btrfs_free_path(path);
2854                 return PTR_ERR(trans);
2855         }
2856
2857         if (rec->errors & I_ERR_NO_INODE_ITEM)
2858                 ret = repair_inode_no_item(trans, root, path, rec);
2859         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2860                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2861         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2862                 ret = repair_inode_discount_extent(trans, root, path, rec);
2863         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2864                 ret = repair_inode_isize(trans, root, path, rec);
2865         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2866                 ret = repair_inode_orphan_item(trans, root, path, rec);
2867         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2868                 ret = repair_inode_nlinks(trans, root, path, rec);
2869         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2870                 ret = repair_inode_nbytes(trans, root, path, rec);
2871         btrfs_commit_transaction(trans, root);
2872         btrfs_free_path(path);
2873         return ret;
2874 }
2875
2876 static int check_inode_recs(struct btrfs_root *root,
2877                             struct cache_tree *inode_cache)
2878 {
2879         struct cache_extent *cache;
2880         struct ptr_node *node;
2881         struct inode_record *rec;
2882         struct inode_backref *backref;
2883         int stage = 0;
2884         int ret = 0;
2885         int err = 0;
2886         u64 error = 0;
2887         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2888
2889         if (btrfs_root_refs(&root->root_item) == 0) {
2890                 if (!cache_tree_empty(inode_cache))
2891                         fprintf(stderr, "warning line %d\n", __LINE__);
2892                 return 0;
2893         }
2894
2895         /*
2896          * We need to record the highest inode number for later 'lost+found'
2897          * dir creation.
2898          * We must select a ino not used/refered by any existing inode, or
2899          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2900          * this may cause 'lost+found' dir has wrong nlinks.
2901          */
2902         cache = last_cache_extent(inode_cache);
2903         if (cache) {
2904                 node = container_of(cache, struct ptr_node, cache);
2905                 rec = node->data;
2906                 if (rec->ino > root->highest_inode)
2907                         root->highest_inode = rec->ino;
2908         }
2909
2910         /*
2911          * We need to repair backrefs first because we could change some of the
2912          * errors in the inode recs.
2913          *
2914          * We also need to go through and delete invalid backrefs first and then
2915          * add the correct ones second.  We do this because we may get EEXIST
2916          * when adding back the correct index because we hadn't yet deleted the
2917          * invalid index.
2918          *
2919          * For example, if we were missing a dir index then the directories
2920          * isize would be wrong, so if we fixed the isize to what we thought it
2921          * would be and then fixed the backref we'd still have a invalid fs, so
2922          * we need to add back the dir index and then check to see if the isize
2923          * is still wrong.
2924          */
2925         while (stage < 3) {
2926                 stage++;
2927                 if (stage == 3 && !err)
2928                         break;
2929
2930                 cache = search_cache_extent(inode_cache, 0);
2931                 while (repair && cache) {
2932                         node = container_of(cache, struct ptr_node, cache);
2933                         rec = node->data;
2934                         cache = next_cache_extent(cache);
2935
2936                         /* Need to free everything up and rescan */
2937                         if (stage == 3) {
2938                                 remove_cache_extent(inode_cache, &node->cache);
2939                                 free(node);
2940                                 free_inode_rec(rec);
2941                                 continue;
2942                         }
2943
2944                         if (list_empty(&rec->backrefs))
2945                                 continue;
2946
2947                         ret = repair_inode_backrefs(root, rec, inode_cache,
2948                                                     stage == 1);
2949                         if (ret < 0) {
2950                                 err = ret;
2951                                 stage = 2;
2952                                 break;
2953                         } if (ret > 0) {
2954                                 err = -EAGAIN;
2955                         }
2956                 }
2957         }
2958         if (err)
2959                 return err;
2960
2961         rec = get_inode_rec(inode_cache, root_dirid, 0);
2962         BUG_ON(IS_ERR(rec));
2963         if (rec) {
2964                 ret = check_root_dir(rec);
2965                 if (ret) {
2966                         fprintf(stderr, "root %llu root dir %llu error\n",
2967                                 (unsigned long long)root->root_key.objectid,
2968                                 (unsigned long long)root_dirid);
2969                         print_inode_error(root, rec);
2970                         error++;
2971                 }
2972         } else {
2973                 if (repair) {
2974                         struct btrfs_trans_handle *trans;
2975
2976                         trans = btrfs_start_transaction(root, 1);
2977                         if (IS_ERR(trans)) {
2978                                 err = PTR_ERR(trans);
2979                                 return err;
2980                         }
2981
2982                         fprintf(stderr,
2983                                 "root %llu missing its root dir, recreating\n",
2984                                 (unsigned long long)root->objectid);
2985
2986                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2987                         BUG_ON(ret);
2988
2989                         btrfs_commit_transaction(trans, root);
2990                         return -EAGAIN;
2991                 }
2992
2993                 fprintf(stderr, "root %llu root dir %llu not found\n",
2994                         (unsigned long long)root->root_key.objectid,
2995                         (unsigned long long)root_dirid);
2996         }
2997
2998         while (1) {
2999                 cache = search_cache_extent(inode_cache, 0);
3000                 if (!cache)
3001                         break;
3002                 node = container_of(cache, struct ptr_node, cache);
3003                 rec = node->data;
3004                 remove_cache_extent(inode_cache, &node->cache);
3005                 free(node);
3006                 if (rec->ino == root_dirid ||
3007                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3008                         free_inode_rec(rec);
3009                         continue;
3010                 }
3011
3012                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3013                         ret = check_orphan_item(root, rec->ino);
3014                         if (ret == 0)
3015                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3016                         if (can_free_inode_rec(rec)) {
3017                                 free_inode_rec(rec);
3018                                 continue;
3019                         }
3020                 }
3021
3022                 if (!rec->found_inode_item)
3023                         rec->errors |= I_ERR_NO_INODE_ITEM;
3024                 if (rec->found_link != rec->nlink)
3025                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3026                 if (repair) {
3027                         ret = try_repair_inode(root, rec);
3028                         if (ret == 0 && can_free_inode_rec(rec)) {
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032                         ret = 0;
3033                 }
3034
3035                 if (!(repair && ret == 0))
3036                         error++;
3037                 print_inode_error(root, rec);
3038                 list_for_each_entry(backref, &rec->backrefs, list) {
3039                         if (!backref->found_dir_item)
3040                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3041                         if (!backref->found_dir_index)
3042                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3043                         if (!backref->found_inode_ref)
3044                                 backref->errors |= REF_ERR_NO_INODE_REF;
3045                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3046                                 " namelen %u name %s filetype %d errors %x",
3047                                 (unsigned long long)backref->dir,
3048                                 (unsigned long long)backref->index,
3049                                 backref->namelen, backref->name,
3050                                 backref->filetype, backref->errors);
3051                         print_ref_error(backref->errors);
3052                 }
3053                 free_inode_rec(rec);
3054         }
3055         return (error > 0) ? -1 : 0;
3056 }
3057
3058 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3059                                         u64 objectid)
3060 {
3061         struct cache_extent *cache;
3062         struct root_record *rec = NULL;
3063         int ret;
3064
3065         cache = lookup_cache_extent(root_cache, objectid, 1);
3066         if (cache) {
3067                 rec = container_of(cache, struct root_record, cache);
3068         } else {
3069                 rec = calloc(1, sizeof(*rec));
3070                 if (!rec)
3071                         return ERR_PTR(-ENOMEM);
3072                 rec->objectid = objectid;
3073                 INIT_LIST_HEAD(&rec->backrefs);
3074                 rec->cache.start = objectid;
3075                 rec->cache.size = 1;
3076
3077                 ret = insert_cache_extent(root_cache, &rec->cache);
3078                 if (ret)
3079                         return ERR_PTR(-EEXIST);
3080         }
3081         return rec;
3082 }
3083
3084 static struct root_backref *get_root_backref(struct root_record *rec,
3085                                              u64 ref_root, u64 dir, u64 index,
3086                                              const char *name, int namelen)
3087 {
3088         struct root_backref *backref;
3089
3090         list_for_each_entry(backref, &rec->backrefs, list) {
3091                 if (backref->ref_root != ref_root || backref->dir != dir ||
3092                     backref->namelen != namelen)
3093                         continue;
3094                 if (memcmp(name, backref->name, namelen))
3095                         continue;
3096                 return backref;
3097         }
3098
3099         backref = calloc(1, sizeof(*backref) + namelen + 1);
3100         if (!backref)
3101                 return NULL;
3102         backref->ref_root = ref_root;
3103         backref->dir = dir;
3104         backref->index = index;
3105         backref->namelen = namelen;
3106         memcpy(backref->name, name, namelen);
3107         backref->name[namelen] = '\0';
3108         list_add_tail(&backref->list, &rec->backrefs);
3109         return backref;
3110 }
3111
3112 static void free_root_record(struct cache_extent *cache)
3113 {
3114         struct root_record *rec;
3115         struct root_backref *backref;
3116
3117         rec = container_of(cache, struct root_record, cache);
3118         while (!list_empty(&rec->backrefs)) {
3119                 backref = list_entry(rec->backrefs.next,
3120                                      struct root_backref, list);
3121                 list_del(&backref->list);
3122                 free(backref);
3123         }
3124
3125         kfree(rec);
3126 }
3127
3128 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3129
3130 static int add_root_backref(struct cache_tree *root_cache,
3131                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3132                             const char *name, int namelen,
3133                             int item_type, int errors)
3134 {
3135         struct root_record *rec;
3136         struct root_backref *backref;
3137
3138         rec = get_root_rec(root_cache, root_id);
3139         BUG_ON(IS_ERR(rec));
3140         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3141         BUG_ON(!backref);
3142
3143         backref->errors |= errors;
3144
3145         if (item_type != BTRFS_DIR_ITEM_KEY) {
3146                 if (backref->found_dir_index || backref->found_back_ref ||
3147                     backref->found_forward_ref) {
3148                         if (backref->index != index)
3149                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3150                 } else {
3151                         backref->index = index;
3152                 }
3153         }
3154
3155         if (item_type == BTRFS_DIR_ITEM_KEY) {
3156                 if (backref->found_forward_ref)
3157                         rec->found_ref++;
3158                 backref->found_dir_item = 1;
3159         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3160                 backref->found_dir_index = 1;
3161         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3162                 if (backref->found_forward_ref)
3163                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3164                 else if (backref->found_dir_item)
3165                         rec->found_ref++;
3166                 backref->found_forward_ref = 1;
3167         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3168                 if (backref->found_back_ref)
3169                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3170                 backref->found_back_ref = 1;
3171         } else {
3172                 BUG_ON(1);
3173         }
3174
3175         if (backref->found_forward_ref && backref->found_dir_item)
3176                 backref->reachable = 1;
3177         return 0;
3178 }
3179
3180 static int merge_root_recs(struct btrfs_root *root,
3181                            struct cache_tree *src_cache,
3182                            struct cache_tree *dst_cache)
3183 {
3184         struct cache_extent *cache;
3185         struct ptr_node *node;
3186         struct inode_record *rec;
3187         struct inode_backref *backref;
3188         int ret = 0;
3189
3190         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3191                 free_inode_recs_tree(src_cache);
3192                 return 0;
3193         }
3194
3195         while (1) {
3196                 cache = search_cache_extent(src_cache, 0);
3197                 if (!cache)
3198                         break;
3199                 node = container_of(cache, struct ptr_node, cache);
3200                 rec = node->data;
3201                 remove_cache_extent(src_cache, &node->cache);
3202                 free(node);
3203
3204                 ret = is_child_root(root, root->objectid, rec->ino);
3205                 if (ret < 0)
3206                         break;
3207                 else if (ret == 0)
3208                         goto skip;
3209
3210                 list_for_each_entry(backref, &rec->backrefs, list) {
3211                         BUG_ON(backref->found_inode_ref);
3212                         if (backref->found_dir_item)
3213                                 add_root_backref(dst_cache, rec->ino,
3214                                         root->root_key.objectid, backref->dir,
3215                                         backref->index, backref->name,
3216                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3217                                         backref->errors);
3218                         if (backref->found_dir_index)
3219                                 add_root_backref(dst_cache, rec->ino,
3220                                         root->root_key.objectid, backref->dir,
3221                                         backref->index, backref->name,
3222                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3223                                         backref->errors);
3224                 }
3225 skip:
3226                 free_inode_rec(rec);
3227         }
3228         if (ret < 0)
3229                 return ret;
3230         return 0;
3231 }
3232
3233 static int check_root_refs(struct btrfs_root *root,
3234                            struct cache_tree *root_cache)
3235 {
3236         struct root_record *rec;
3237         struct root_record *ref_root;
3238         struct root_backref *backref;
3239         struct cache_extent *cache;
3240         int loop = 1;
3241         int ret;
3242         int error;
3243         int errors = 0;
3244
3245         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3246         BUG_ON(IS_ERR(rec));
3247         rec->found_ref = 1;
3248
3249         /* fixme: this can not detect circular references */
3250         while (loop) {
3251                 loop = 0;
3252                 cache = search_cache_extent(root_cache, 0);
3253                 while (1) {
3254                         if (!cache)
3255                                 break;
3256                         rec = container_of(cache, struct root_record, cache);
3257                         cache = next_cache_extent(cache);
3258
3259                         if (rec->found_ref == 0)
3260                                 continue;
3261
3262                         list_for_each_entry(backref, &rec->backrefs, list) {
3263                                 if (!backref->reachable)
3264                                         continue;
3265
3266                                 ref_root = get_root_rec(root_cache,
3267                                                         backref->ref_root);
3268                                 BUG_ON(IS_ERR(ref_root));
3269                                 if (ref_root->found_ref > 0)
3270                                         continue;
3271
3272                                 backref->reachable = 0;
3273                                 rec->found_ref--;
3274                                 if (rec->found_ref == 0)
3275                                         loop = 1;
3276                         }
3277                 }
3278         }
3279
3280         cache = search_cache_extent(root_cache, 0);
3281         while (1) {
3282                 if (!cache)
3283                         break;
3284                 rec = container_of(cache, struct root_record, cache);
3285                 cache = next_cache_extent(cache);
3286
3287                 if (rec->found_ref == 0 &&
3288                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3289                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3290                         ret = check_orphan_item(root->fs_info->tree_root,
3291                                                 rec->objectid);
3292                         if (ret == 0)
3293                                 continue;
3294
3295                         /*
3296                          * If we don't have a root item then we likely just have
3297                          * a dir item in a snapshot for this root but no actual
3298                          * ref key or anything so it's meaningless.
3299                          */
3300                         if (!rec->found_root_item)
3301                                 continue;
3302                         errors++;
3303                         fprintf(stderr, "fs tree %llu not referenced\n",
3304                                 (unsigned long long)rec->objectid);
3305                 }
3306
3307                 error = 0;
3308                 if (rec->found_ref > 0 && !rec->found_root_item)
3309                         error = 1;
3310                 list_for_each_entry(backref, &rec->backrefs, list) {
3311                         if (!backref->found_dir_item)
3312                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3313                         if (!backref->found_dir_index)
3314                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3315                         if (!backref->found_back_ref)
3316                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3317                         if (!backref->found_forward_ref)
3318                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3319                         if (backref->reachable && backref->errors)
3320                                 error = 1;
3321                 }
3322                 if (!error)
3323                         continue;
3324
3325                 errors++;
3326                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3327                         (unsigned long long)rec->objectid, rec->found_ref,
3328                          rec->found_root_item ? "" : "not found");
3329
3330                 list_for_each_entry(backref, &rec->backrefs, list) {
3331                         if (!backref->reachable)
3332                                 continue;
3333                         if (!backref->errors && rec->found_root_item)
3334                                 continue;
3335                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3336                                 " index %llu namelen %u name %s errors %x\n",
3337                                 (unsigned long long)backref->ref_root,
3338                                 (unsigned long long)backref->dir,
3339                                 (unsigned long long)backref->index,
3340                                 backref->namelen, backref->name,
3341                                 backref->errors);
3342                         print_ref_error(backref->errors);
3343                 }
3344         }
3345         return errors > 0 ? 1 : 0;
3346 }
3347
3348 static int process_root_ref(struct extent_buffer *eb, int slot,
3349                             struct btrfs_key *key,
3350                             struct cache_tree *root_cache)
3351 {
3352         u64 dirid;
3353         u64 index;
3354         u32 len;
3355         u32 name_len;
3356         struct btrfs_root_ref *ref;
3357         char namebuf[BTRFS_NAME_LEN];
3358         int error;
3359
3360         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3361
3362         dirid = btrfs_root_ref_dirid(eb, ref);
3363         index = btrfs_root_ref_sequence(eb, ref);
3364         name_len = btrfs_root_ref_name_len(eb, ref);
3365
3366         if (name_len <= BTRFS_NAME_LEN) {
3367                 len = name_len;
3368                 error = 0;
3369         } else {
3370                 len = BTRFS_NAME_LEN;
3371                 error = REF_ERR_NAME_TOO_LONG;
3372         }
3373         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3374
3375         if (key->type == BTRFS_ROOT_REF_KEY) {
3376                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3377                                  index, namebuf, len, key->type, error);
3378         } else {
3379                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3380                                  index, namebuf, len, key->type, error);
3381         }
3382         return 0;
3383 }
3384
3385 static void free_corrupt_block(struct cache_extent *cache)
3386 {
3387         struct btrfs_corrupt_block *corrupt;
3388
3389         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3390         free(corrupt);
3391 }
3392
3393 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3394
3395 /*
3396  * Repair the btree of the given root.
3397  *
3398  * The fix is to remove the node key in corrupt_blocks cache_tree.
3399  * and rebalance the tree.
3400  * After the fix, the btree should be writeable.
3401  */
3402 static int repair_btree(struct btrfs_root *root,
3403                         struct cache_tree *corrupt_blocks)
3404 {
3405         struct btrfs_trans_handle *trans;
3406         struct btrfs_path *path;
3407         struct btrfs_corrupt_block *corrupt;
3408         struct cache_extent *cache;
3409         struct btrfs_key key;
3410         u64 offset;
3411         int level;
3412         int ret = 0;
3413
3414         if (cache_tree_empty(corrupt_blocks))
3415                 return 0;
3416
3417         path = btrfs_alloc_path();
3418         if (!path)
3419                 return -ENOMEM;
3420
3421         trans = btrfs_start_transaction(root, 1);
3422         if (IS_ERR(trans)) {
3423                 ret = PTR_ERR(trans);
3424                 fprintf(stderr, "Error starting transaction: %s\n",
3425                         strerror(-ret));
3426                 goto out_free_path;
3427         }
3428         cache = first_cache_extent(corrupt_blocks);
3429         while (cache) {
3430                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3431                                        cache);
3432                 level = corrupt->level;
3433                 path->lowest_level = level;
3434                 key.objectid = corrupt->key.objectid;
3435                 key.type = corrupt->key.type;
3436                 key.offset = corrupt->key.offset;
3437
3438                 /*
3439                  * Here we don't want to do any tree balance, since it may
3440                  * cause a balance with corrupted brother leaf/node,
3441                  * so ins_len set to 0 here.
3442                  * Balance will be done after all corrupt node/leaf is deleted.
3443                  */
3444                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3445                 if (ret < 0)
3446                         goto out;
3447                 offset = btrfs_node_blockptr(path->nodes[level],
3448                                              path->slots[level]);
3449
3450                 /* Remove the ptr */
3451                 ret = btrfs_del_ptr(trans, root, path, level,
3452                                     path->slots[level]);
3453                 if (ret < 0)
3454                         goto out;
3455                 /*
3456                  * Remove the corresponding extent
3457                  * return value is not concerned.
3458                  */
3459                 btrfs_release_path(path);
3460                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3461                                         0, root->root_key.objectid,
3462                                         level - 1, 0);
3463                 cache = next_cache_extent(cache);
3464         }
3465
3466         /* Balance the btree using btrfs_search_slot() */
3467         cache = first_cache_extent(corrupt_blocks);
3468         while (cache) {
3469                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3470                                        cache);
3471                 memcpy(&key, &corrupt->key, sizeof(key));
3472                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3473                 if (ret < 0)
3474                         goto out;
3475                 /* return will always >0 since it won't find the item */
3476                 ret = 0;
3477                 btrfs_release_path(path);
3478                 cache = next_cache_extent(cache);
3479         }
3480 out:
3481         btrfs_commit_transaction(trans, root);
3482 out_free_path:
3483         btrfs_free_path(path);
3484         return ret;
3485 }
3486
3487 static int check_fs_root(struct btrfs_root *root,
3488                          struct cache_tree *root_cache,
3489                          struct walk_control *wc)
3490 {
3491         int ret = 0;
3492         int err = 0;
3493         int wret;
3494         int level;
3495         struct btrfs_path path;
3496         struct shared_node root_node;
3497         struct root_record *rec;
3498         struct btrfs_root_item *root_item = &root->root_item;
3499         struct cache_tree corrupt_blocks;
3500         struct orphan_data_extent *orphan;
3501         struct orphan_data_extent *tmp;
3502         enum btrfs_tree_block_status status;
3503
3504         /*
3505          * Reuse the corrupt_block cache tree to record corrupted tree block
3506          *
3507          * Unlike the usage in extent tree check, here we do it in a per
3508          * fs/subvol tree base.
3509          */
3510         cache_tree_init(&corrupt_blocks);
3511         root->fs_info->corrupt_blocks = &corrupt_blocks;
3512
3513         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3514                 rec = get_root_rec(root_cache, root->root_key.objectid);
3515                 BUG_ON(IS_ERR(rec));
3516                 if (btrfs_root_refs(root_item) > 0)
3517                         rec->found_root_item = 1;
3518         }
3519
3520         btrfs_init_path(&path);
3521         memset(&root_node, 0, sizeof(root_node));
3522         cache_tree_init(&root_node.root_cache);
3523         cache_tree_init(&root_node.inode_cache);
3524
3525         /* Move the orphan extent record to corresponding inode_record */
3526         list_for_each_entry_safe(orphan, tmp,
3527                                  &root->orphan_data_extents, list) {
3528                 struct inode_record *inode;
3529
3530                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3531                                       1);
3532                 BUG_ON(IS_ERR(inode));
3533                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3534                 list_move(&orphan->list, &inode->orphan_extents);
3535         }
3536
3537         level = btrfs_header_level(root->node);
3538         memset(wc->nodes, 0, sizeof(wc->nodes));
3539         wc->nodes[level] = &root_node;
3540         wc->active_node = level;
3541         wc->root_level = level;
3542
3543         /* We may not have checked the root block, lets do that now */
3544         if (btrfs_is_leaf(root->node))
3545                 status = btrfs_check_leaf(root, NULL, root->node);
3546         else
3547                 status = btrfs_check_node(root, NULL, root->node);
3548         if (status != BTRFS_TREE_BLOCK_CLEAN)
3549                 return -EIO;
3550
3551         if (btrfs_root_refs(root_item) > 0 ||
3552             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3553                 path.nodes[level] = root->node;
3554                 extent_buffer_get(root->node);
3555                 path.slots[level] = 0;
3556         } else {
3557                 struct btrfs_key key;
3558                 struct btrfs_disk_key found_key;
3559
3560                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3561                 level = root_item->drop_level;
3562                 path.lowest_level = level;
3563                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3564                 if (wret < 0)
3565                         goto skip_walking;
3566                 btrfs_node_key(path.nodes[level], &found_key,
3567                                 path.slots[level]);
3568                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3569                                         sizeof(found_key)));
3570         }
3571
3572         while (1) {
3573                 wret = walk_down_tree(root, &path, wc, &level);
3574                 if (wret < 0)
3575                         ret = wret;
3576                 if (wret != 0)
3577                         break;
3578
3579                 wret = walk_up_tree(root, &path, wc, &level);
3580                 if (wret < 0)
3581                         ret = wret;
3582                 if (wret != 0)
3583                         break;
3584         }
3585 skip_walking:
3586         btrfs_release_path(&path);
3587
3588         if (!cache_tree_empty(&corrupt_blocks)) {
3589                 struct cache_extent *cache;
3590                 struct btrfs_corrupt_block *corrupt;
3591
3592                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3593                        root->root_key.objectid);
3594                 cache = first_cache_extent(&corrupt_blocks);
3595                 while (cache) {
3596                         corrupt = container_of(cache,
3597                                                struct btrfs_corrupt_block,
3598                                                cache);
3599                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3600                                cache->start, corrupt->level,
3601                                corrupt->key.objectid, corrupt->key.type,
3602                                corrupt->key.offset);
3603                         cache = next_cache_extent(cache);
3604                 }
3605                 if (repair) {
3606                         printf("Try to repair the btree for root %llu\n",
3607                                root->root_key.objectid);
3608                         ret = repair_btree(root, &corrupt_blocks);
3609                         if (ret < 0)
3610                                 fprintf(stderr, "Failed to repair btree: %s\n",
3611                                         strerror(-ret));
3612                         if (!ret)
3613                                 printf("Btree for root %llu is fixed\n",
3614                                        root->root_key.objectid);
3615                 }
3616         }
3617
3618         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3619         if (err < 0)
3620                 ret = err;
3621
3622         if (root_node.current) {
3623                 root_node.current->checked = 1;
3624                 maybe_free_inode_rec(&root_node.inode_cache,
3625                                 root_node.current);
3626         }
3627
3628         err = check_inode_recs(root, &root_node.inode_cache);
3629         if (!ret)
3630                 ret = err;
3631
3632         free_corrupt_blocks_tree(&corrupt_blocks);
3633         root->fs_info->corrupt_blocks = NULL;
3634         free_orphan_data_extents(&root->orphan_data_extents);
3635         return ret;
3636 }
3637
3638 static int fs_root_objectid(u64 objectid)
3639 {
3640         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3641             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3642                 return 1;
3643         return is_fstree(objectid);
3644 }
3645
3646 static int check_fs_roots(struct btrfs_root *root,
3647                           struct cache_tree *root_cache)
3648 {
3649         struct btrfs_path path;
3650         struct btrfs_key key;
3651         struct walk_control wc;
3652         struct extent_buffer *leaf, *tree_node;
3653         struct btrfs_root *tmp_root;
3654         struct btrfs_root *tree_root = root->fs_info->tree_root;
3655         int ret;
3656         int err = 0;
3657
3658         if (ctx.progress_enabled) {
3659                 ctx.tp = TASK_FS_ROOTS;
3660                 task_start(ctx.info);
3661         }
3662
3663         /*
3664          * Just in case we made any changes to the extent tree that weren't
3665          * reflected into the free space cache yet.
3666          */
3667         if (repair)
3668                 reset_cached_block_groups(root->fs_info);
3669         memset(&wc, 0, sizeof(wc));
3670         cache_tree_init(&wc.shared);
3671         btrfs_init_path(&path);
3672
3673 again:
3674         key.offset = 0;
3675         key.objectid = 0;
3676         key.type = BTRFS_ROOT_ITEM_KEY;
3677         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3678         if (ret < 0) {
3679                 err = 1;
3680                 goto out;
3681         }
3682         tree_node = tree_root->node;
3683         while (1) {
3684                 if (tree_node != tree_root->node) {
3685                         free_root_recs_tree(root_cache);
3686                         btrfs_release_path(&path);
3687                         goto again;
3688                 }
3689                 leaf = path.nodes[0];
3690                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3691                         ret = btrfs_next_leaf(tree_root, &path);
3692                         if (ret) {
3693                                 if (ret < 0)
3694                                         err = 1;
3695                                 break;
3696                         }
3697                         leaf = path.nodes[0];
3698                 }
3699                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3700                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3701                     fs_root_objectid(key.objectid)) {
3702                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3703                                 tmp_root = btrfs_read_fs_root_no_cache(
3704                                                 root->fs_info, &key);
3705                         } else {
3706                                 key.offset = (u64)-1;
3707                                 tmp_root = btrfs_read_fs_root(
3708                                                 root->fs_info, &key);
3709                         }
3710                         if (IS_ERR(tmp_root)) {
3711                                 err = 1;
3712                                 goto next;
3713                         }
3714                         ret = check_fs_root(tmp_root, root_cache, &wc);
3715                         if (ret == -EAGAIN) {
3716                                 free_root_recs_tree(root_cache);
3717                                 btrfs_release_path(&path);
3718                                 goto again;
3719                         }
3720                         if (ret)
3721                                 err = 1;
3722                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3723                                 btrfs_free_fs_root(tmp_root);
3724                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3725                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3726                         process_root_ref(leaf, path.slots[0], &key,
3727                                          root_cache);
3728                 }
3729 next:
3730                 path.slots[0]++;
3731         }
3732 out:
3733         btrfs_release_path(&path);
3734         if (err)
3735                 free_extent_cache_tree(&wc.shared);
3736         if (!cache_tree_empty(&wc.shared))
3737                 fprintf(stderr, "warning line %d\n", __LINE__);
3738
3739         task_stop(ctx.info);
3740
3741         return err;
3742 }
3743
3744 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3745 {
3746         struct list_head *cur = rec->backrefs.next;
3747         struct extent_backref *back;
3748         struct tree_backref *tback;
3749         struct data_backref *dback;
3750         u64 found = 0;
3751         int err = 0;
3752
3753         while(cur != &rec->backrefs) {
3754                 back = list_entry(cur, struct extent_backref, list);
3755                 cur = cur->next;
3756                 if (!back->found_extent_tree) {
3757                         err = 1;
3758                         if (!print_errs)
3759                                 goto out;
3760                         if (back->is_data) {
3761                                 dback = (struct data_backref *)back;
3762                                 fprintf(stderr, "Backref %llu %s %llu"
3763                                         " owner %llu offset %llu num_refs %lu"
3764                                         " not found in extent tree\n",
3765                                         (unsigned long long)rec->start,
3766                                         back->full_backref ?
3767                                         "parent" : "root",
3768                                         back->full_backref ?
3769                                         (unsigned long long)dback->parent:
3770                                         (unsigned long long)dback->root,
3771                                         (unsigned long long)dback->owner,
3772                                         (unsigned long long)dback->offset,
3773                                         (unsigned long)dback->num_refs);
3774                         } else {
3775                                 tback = (struct tree_backref *)back;
3776                                 fprintf(stderr, "Backref %llu parent %llu"
3777                                         " root %llu not found in extent tree\n",
3778                                         (unsigned long long)rec->start,
3779                                         (unsigned long long)tback->parent,
3780                                         (unsigned long long)tback->root);
3781                         }
3782                 }
3783                 if (!back->is_data && !back->found_ref) {
3784                         err = 1;
3785                         if (!print_errs)
3786                                 goto out;
3787                         tback = (struct tree_backref *)back;
3788                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3789                                 (unsigned long long)rec->start,
3790                                 back->full_backref ? "parent" : "root",
3791                                 back->full_backref ?
3792                                 (unsigned long long)tback->parent :
3793                                 (unsigned long long)tback->root, back);
3794                 }
3795                 if (back->is_data) {
3796                         dback = (struct data_backref *)back;
3797                         if (dback->found_ref != dback->num_refs) {
3798                                 err = 1;
3799                                 if (!print_errs)
3800                                         goto out;
3801                                 fprintf(stderr, "Incorrect local backref count"
3802                                         " on %llu %s %llu owner %llu"
3803                                         " offset %llu found %u wanted %u back %p\n",
3804                                         (unsigned long long)rec->start,
3805                                         back->full_backref ?
3806                                         "parent" : "root",
3807                                         back->full_backref ?
3808                                         (unsigned long long)dback->parent:
3809                                         (unsigned long long)dback->root,
3810                                         (unsigned long long)dback->owner,
3811                                         (unsigned long long)dback->offset,
3812                                         dback->found_ref, dback->num_refs, back);
3813                         }
3814                         if (dback->disk_bytenr != rec->start) {
3815                                 err = 1;
3816                                 if (!print_errs)
3817                                         goto out;
3818                                 fprintf(stderr, "Backref disk bytenr does not"
3819                                         " match extent record, bytenr=%llu, "
3820                                         "ref bytenr=%llu\n",
3821                                         (unsigned long long)rec->start,
3822                                         (unsigned long long)dback->disk_bytenr);
3823                         }
3824
3825                         if (dback->bytes != rec->nr) {
3826                                 err = 1;
3827                                 if (!print_errs)
3828                                         goto out;
3829                                 fprintf(stderr, "Backref bytes do not match "
3830                                         "extent backref, bytenr=%llu, ref "
3831                                         "bytes=%llu, backref bytes=%llu\n",
3832                                         (unsigned long long)rec->start,
3833                                         (unsigned long long)rec->nr,
3834                                         (unsigned long long)dback->bytes);
3835                         }
3836                 }
3837                 if (!back->is_data) {
3838                         found += 1;
3839                 } else {
3840                         dback = (struct data_backref *)back;
3841                         found += dback->found_ref;
3842                 }
3843         }
3844         if (found != rec->refs) {
3845                 err = 1;
3846                 if (!print_errs)
3847                         goto out;
3848                 fprintf(stderr, "Incorrect global backref count "
3849                         "on %llu found %llu wanted %llu\n",
3850                         (unsigned long long)rec->start,
3851                         (unsigned long long)found,
3852                         (unsigned long long)rec->refs);
3853         }
3854 out:
3855         return err;
3856 }
3857
3858 static int free_all_extent_backrefs(struct extent_record *rec)
3859 {
3860         struct extent_backref *back;
3861         struct list_head *cur;
3862         while (!list_empty(&rec->backrefs)) {
3863                 cur = rec->backrefs.next;
3864                 back = list_entry(cur, struct extent_backref, list);
3865                 list_del(cur);
3866                 free(back);
3867         }
3868         return 0;
3869 }
3870
3871 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3872                                      struct cache_tree *extent_cache)
3873 {
3874         struct cache_extent *cache;
3875         struct extent_record *rec;
3876
3877         while (1) {
3878                 cache = first_cache_extent(extent_cache);
3879                 if (!cache)
3880                         break;
3881                 rec = container_of(cache, struct extent_record, cache);
3882                 remove_cache_extent(extent_cache, cache);
3883                 free_all_extent_backrefs(rec);
3884                 free(rec);
3885         }
3886 }
3887
3888 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3889                                  struct extent_record *rec)
3890 {
3891         if (rec->content_checked && rec->owner_ref_checked &&
3892             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3893             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3894             !rec->bad_full_backref && !rec->crossing_stripes &&
3895             !rec->wrong_chunk_type) {
3896                 remove_cache_extent(extent_cache, &rec->cache);
3897                 free_all_extent_backrefs(rec);
3898                 list_del_init(&rec->list);
3899                 free(rec);
3900         }
3901         return 0;
3902 }
3903
3904 static int check_owner_ref(struct btrfs_root *root,
3905                             struct extent_record *rec,
3906                             struct extent_buffer *buf)
3907 {
3908         struct extent_backref *node;
3909         struct tree_backref *back;
3910         struct btrfs_root *ref_root;
3911         struct btrfs_key key;
3912         struct btrfs_path path;
3913         struct extent_buffer *parent;
3914         int level;
3915         int found = 0;
3916         int ret;
3917
3918         list_for_each_entry(node, &rec->backrefs, list) {
3919                 if (node->is_data)
3920                         continue;
3921                 if (!node->found_ref)
3922                         continue;
3923                 if (node->full_backref)
3924                         continue;
3925                 back = (struct tree_backref *)node;
3926                 if (btrfs_header_owner(buf) == back->root)
3927                         return 0;
3928         }
3929         BUG_ON(rec->is_root);
3930
3931         /* try to find the block by search corresponding fs tree */
3932         key.objectid = btrfs_header_owner(buf);
3933         key.type = BTRFS_ROOT_ITEM_KEY;
3934         key.offset = (u64)-1;
3935
3936         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3937         if (IS_ERR(ref_root))
3938                 return 1;
3939
3940         level = btrfs_header_level(buf);
3941         if (level == 0)
3942                 btrfs_item_key_to_cpu(buf, &key, 0);
3943         else
3944                 btrfs_node_key_to_cpu(buf, &key, 0);
3945
3946         btrfs_init_path(&path);
3947         path.lowest_level = level + 1;
3948         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3949         if (ret < 0)
3950                 return 0;
3951
3952         parent = path.nodes[level + 1];
3953         if (parent && buf->start == btrfs_node_blockptr(parent,
3954                                                         path.slots[level + 1]))
3955                 found = 1;
3956
3957         btrfs_release_path(&path);
3958         return found ? 0 : 1;
3959 }
3960
3961 static int is_extent_tree_record(struct extent_record *rec)
3962 {
3963         struct list_head *cur = rec->backrefs.next;
3964         struct extent_backref *node;
3965         struct tree_backref *back;
3966         int is_extent = 0;
3967
3968         while(cur != &rec->backrefs) {
3969                 node = list_entry(cur, struct extent_backref, list);
3970                 cur = cur->next;
3971                 if (node->is_data)
3972                         return 0;
3973                 back = (struct tree_backref *)node;
3974                 if (node->full_backref)
3975                         return 0;
3976                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3977                         is_extent = 1;
3978         }
3979         return is_extent;
3980 }
3981
3982
3983 static int record_bad_block_io(struct btrfs_fs_info *info,
3984                                struct cache_tree *extent_cache,
3985                                u64 start, u64 len)
3986 {
3987         struct extent_record *rec;
3988         struct cache_extent *cache;
3989         struct btrfs_key key;
3990
3991         cache = lookup_cache_extent(extent_cache, start, len);
3992         if (!cache)
3993                 return 0;
3994
3995         rec = container_of(cache, struct extent_record, cache);
3996         if (!is_extent_tree_record(rec))
3997                 return 0;
3998
3999         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4000         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4001 }
4002
4003 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4004                        struct extent_buffer *buf, int slot)
4005 {
4006         if (btrfs_header_level(buf)) {
4007                 struct btrfs_key_ptr ptr1, ptr2;
4008
4009                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4010                                    sizeof(struct btrfs_key_ptr));
4011                 read_extent_buffer(buf, &ptr2,
4012                                    btrfs_node_key_ptr_offset(slot + 1),
4013                                    sizeof(struct btrfs_key_ptr));
4014                 write_extent_buffer(buf, &ptr1,
4015                                     btrfs_node_key_ptr_offset(slot + 1),
4016                                     sizeof(struct btrfs_key_ptr));
4017                 write_extent_buffer(buf, &ptr2,
4018                                     btrfs_node_key_ptr_offset(slot),
4019                                     sizeof(struct btrfs_key_ptr));
4020                 if (slot == 0) {
4021                         struct btrfs_disk_key key;
4022                         btrfs_node_key(buf, &key, 0);
4023                         btrfs_fixup_low_keys(root, path, &key,
4024                                              btrfs_header_level(buf) + 1);
4025                 }
4026         } else {
4027                 struct btrfs_item *item1, *item2;
4028                 struct btrfs_key k1, k2;
4029                 char *item1_data, *item2_data;
4030                 u32 item1_offset, item2_offset, item1_size, item2_size;
4031
4032                 item1 = btrfs_item_nr(slot);
4033                 item2 = btrfs_item_nr(slot + 1);
4034                 btrfs_item_key_to_cpu(buf, &k1, slot);
4035                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4036                 item1_offset = btrfs_item_offset(buf, item1);
4037                 item2_offset = btrfs_item_offset(buf, item2);
4038                 item1_size = btrfs_item_size(buf, item1);
4039                 item2_size = btrfs_item_size(buf, item2);
4040
4041                 item1_data = malloc(item1_size);
4042                 if (!item1_data)
4043                         return -ENOMEM;
4044                 item2_data = malloc(item2_size);
4045                 if (!item2_data) {
4046                         free(item1_data);
4047                         return -ENOMEM;
4048                 }
4049
4050                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4051                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4052
4053                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4054                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4055                 free(item1_data);
4056                 free(item2_data);
4057
4058                 btrfs_set_item_offset(buf, item1, item2_offset);
4059                 btrfs_set_item_offset(buf, item2, item1_offset);
4060                 btrfs_set_item_size(buf, item1, item2_size);
4061                 btrfs_set_item_size(buf, item2, item1_size);
4062
4063                 path->slots[0] = slot;
4064                 btrfs_set_item_key_unsafe(root, path, &k2);
4065                 path->slots[0] = slot + 1;
4066                 btrfs_set_item_key_unsafe(root, path, &k1);
4067         }
4068         return 0;
4069 }
4070
4071 static int fix_key_order(struct btrfs_trans_handle *trans,
4072                          struct btrfs_root *root,
4073                          struct btrfs_path *path)
4074 {
4075         struct extent_buffer *buf;
4076         struct btrfs_key k1, k2;
4077         int i;
4078         int level = path->lowest_level;
4079         int ret = -EIO;
4080
4081         buf = path->nodes[level];
4082         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4083                 if (level) {
4084                         btrfs_node_key_to_cpu(buf, &k1, i);
4085                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4086                 } else {
4087                         btrfs_item_key_to_cpu(buf, &k1, i);
4088                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4089                 }
4090                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4091                         continue;
4092                 ret = swap_values(root, path, buf, i);
4093                 if (ret)
4094                         break;
4095                 btrfs_mark_buffer_dirty(buf);
4096                 i = 0;
4097         }
4098         return ret;
4099 }
4100
4101 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4102                              struct btrfs_root *root,
4103                              struct btrfs_path *path,
4104                              struct extent_buffer *buf, int slot)
4105 {
4106         struct btrfs_key key;
4107         int nritems = btrfs_header_nritems(buf);
4108
4109         btrfs_item_key_to_cpu(buf, &key, slot);
4110
4111         /* These are all the keys we can deal with missing. */
4112         if (key.type != BTRFS_DIR_INDEX_KEY &&
4113             key.type != BTRFS_EXTENT_ITEM_KEY &&
4114             key.type != BTRFS_METADATA_ITEM_KEY &&
4115             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4116             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4117                 return -1;
4118
4119         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4120                (unsigned long long)key.objectid, key.type,
4121                (unsigned long long)key.offset, slot, buf->start);
4122         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4123                               btrfs_item_nr_offset(slot + 1),
4124                               sizeof(struct btrfs_item) *
4125                               (nritems - slot - 1));
4126         btrfs_set_header_nritems(buf, nritems - 1);
4127         if (slot == 0) {
4128                 struct btrfs_disk_key disk_key;
4129
4130                 btrfs_item_key(buf, &disk_key, 0);
4131                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4132         }
4133         btrfs_mark_buffer_dirty(buf);
4134         return 0;
4135 }
4136
4137 static int fix_item_offset(struct btrfs_trans_handle *trans,
4138                            struct btrfs_root *root,
4139                            struct btrfs_path *path)
4140 {
4141         struct extent_buffer *buf;
4142         int i;
4143         int ret = 0;
4144
4145         /* We should only get this for leaves */
4146         BUG_ON(path->lowest_level);
4147         buf = path->nodes[0];
4148 again:
4149         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4150                 unsigned int shift = 0, offset;
4151
4152                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4153                     BTRFS_LEAF_DATA_SIZE(root)) {
4154                         if (btrfs_item_end_nr(buf, i) >
4155                             BTRFS_LEAF_DATA_SIZE(root)) {
4156                                 ret = delete_bogus_item(trans, root, path,
4157                                                         buf, i);
4158                                 if (!ret)
4159                                         goto again;
4160                                 fprintf(stderr, "item is off the end of the "
4161                                         "leaf, can't fix\n");
4162                                 ret = -EIO;
4163                                 break;
4164                         }
4165                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4166                                 btrfs_item_end_nr(buf, i);
4167                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4168                            btrfs_item_offset_nr(buf, i - 1)) {
4169                         if (btrfs_item_end_nr(buf, i) >
4170                             btrfs_item_offset_nr(buf, i - 1)) {
4171                                 ret = delete_bogus_item(trans, root, path,
4172                                                         buf, i);
4173                                 if (!ret)
4174                                         goto again;
4175                                 fprintf(stderr, "items overlap, can't fix\n");
4176                                 ret = -EIO;
4177                                 break;
4178                         }
4179                         shift = btrfs_item_offset_nr(buf, i - 1) -
4180                                 btrfs_item_end_nr(buf, i);
4181                 }
4182                 if (!shift)
4183                         continue;
4184
4185                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4186                        i, shift, (unsigned long long)buf->start);
4187                 offset = btrfs_item_offset_nr(buf, i);
4188                 memmove_extent_buffer(buf,
4189                                       btrfs_leaf_data(buf) + offset + shift,
4190                                       btrfs_leaf_data(buf) + offset,
4191                                       btrfs_item_size_nr(buf, i));
4192                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4193                                       offset + shift);
4194                 btrfs_mark_buffer_dirty(buf);
4195         }
4196
4197         /*
4198          * We may have moved things, in which case we want to exit so we don't
4199          * write those changes out.  Once we have proper abort functionality in
4200          * progs this can be changed to something nicer.
4201          */
4202         BUG_ON(ret);
4203         return ret;
4204 }
4205
4206 /*
4207  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4208  * then just return -EIO.
4209  */
4210 static int try_to_fix_bad_block(struct btrfs_root *root,
4211                                 struct extent_buffer *buf,
4212                                 enum btrfs_tree_block_status status)
4213 {
4214         struct btrfs_trans_handle *trans;
4215         struct ulist *roots;
4216         struct ulist_node *node;
4217         struct btrfs_root *search_root;
4218         struct btrfs_path *path;
4219         struct ulist_iterator iter;
4220         struct btrfs_key root_key, key;
4221         int ret;
4222
4223         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4224             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4225                 return -EIO;
4226
4227         path = btrfs_alloc_path();
4228         if (!path)
4229                 return -EIO;
4230
4231         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4232                                    0, &roots);
4233         if (ret) {
4234                 btrfs_free_path(path);
4235                 return -EIO;
4236         }
4237
4238         ULIST_ITER_INIT(&iter);
4239         while ((node = ulist_next(roots, &iter))) {
4240                 root_key.objectid = node->val;
4241                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4242                 root_key.offset = (u64)-1;
4243
4244                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4245                 if (IS_ERR(root)) {
4246                         ret = -EIO;
4247                         break;
4248                 }
4249
4250
4251                 trans = btrfs_start_transaction(search_root, 0);
4252                 if (IS_ERR(trans)) {
4253                         ret = PTR_ERR(trans);
4254                         break;
4255                 }
4256
4257                 path->lowest_level = btrfs_header_level(buf);
4258                 path->skip_check_block = 1;
4259                 if (path->lowest_level)
4260                         btrfs_node_key_to_cpu(buf, &key, 0);
4261                 else
4262                         btrfs_item_key_to_cpu(buf, &key, 0);
4263                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4264                 if (ret) {
4265                         ret = -EIO;
4266                         btrfs_commit_transaction(trans, search_root);
4267                         break;
4268                 }
4269                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4270                         ret = fix_key_order(trans, search_root, path);
4271                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4272                         ret = fix_item_offset(trans, search_root, path);
4273                 if (ret) {
4274                         btrfs_commit_transaction(trans, search_root);
4275                         break;
4276                 }
4277                 btrfs_release_path(path);
4278                 btrfs_commit_transaction(trans, search_root);
4279         }
4280         ulist_free(roots);
4281         btrfs_free_path(path);
4282         return ret;
4283 }
4284
4285 static int check_block(struct btrfs_root *root,
4286                        struct cache_tree *extent_cache,
4287                        struct extent_buffer *buf, u64 flags)
4288 {
4289         struct extent_record *rec;
4290         struct cache_extent *cache;
4291         struct btrfs_key key;
4292         enum btrfs_tree_block_status status;
4293         int ret = 0;
4294         int level;
4295
4296         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4297         if (!cache)
4298                 return 1;
4299         rec = container_of(cache, struct extent_record, cache);
4300         rec->generation = btrfs_header_generation(buf);
4301
4302         level = btrfs_header_level(buf);
4303         if (btrfs_header_nritems(buf) > 0) {
4304
4305                 if (level == 0)
4306                         btrfs_item_key_to_cpu(buf, &key, 0);
4307                 else
4308                         btrfs_node_key_to_cpu(buf, &key, 0);
4309
4310                 rec->info_objectid = key.objectid;
4311         }
4312         rec->info_level = level;
4313
4314         if (btrfs_is_leaf(buf))
4315                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4316         else
4317                 status = btrfs_check_node(root, &rec->parent_key, buf);
4318
4319         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4320                 if (repair)
4321                         status = try_to_fix_bad_block(root, buf, status);
4322                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4323                         ret = -EIO;
4324                         fprintf(stderr, "bad block %llu\n",
4325                                 (unsigned long long)buf->start);
4326                 } else {
4327                         /*
4328                          * Signal to callers we need to start the scan over
4329                          * again since we'll have cow'ed blocks.
4330                          */
4331                         ret = -EAGAIN;
4332                 }
4333         } else {
4334                 rec->content_checked = 1;
4335                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4336                         rec->owner_ref_checked = 1;
4337                 else {
4338                         ret = check_owner_ref(root, rec, buf);
4339                         if (!ret)
4340                                 rec->owner_ref_checked = 1;
4341                 }
4342         }
4343         if (!ret)
4344                 maybe_free_extent_rec(extent_cache, rec);
4345         return ret;
4346 }
4347
4348 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4349                                                 u64 parent, u64 root)
4350 {
4351         struct list_head *cur = rec->backrefs.next;
4352         struct extent_backref *node;
4353         struct tree_backref *back;
4354
4355         while(cur != &rec->backrefs) {
4356                 node = list_entry(cur, struct extent_backref, list);
4357                 cur = cur->next;
4358                 if (node->is_data)
4359                         continue;
4360                 back = (struct tree_backref *)node;
4361                 if (parent > 0) {
4362                         if (!node->full_backref)
4363                                 continue;
4364                         if (parent == back->parent)
4365                                 return back;
4366                 } else {
4367                         if (node->full_backref)
4368                                 continue;
4369                         if (back->root == root)
4370                                 return back;
4371                 }
4372         }
4373         return NULL;
4374 }
4375
4376 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4377                                                 u64 parent, u64 root)
4378 {
4379         struct tree_backref *ref = malloc(sizeof(*ref));
4380
4381         if (!ref)
4382                 return NULL;
4383         memset(&ref->node, 0, sizeof(ref->node));
4384         if (parent > 0) {
4385                 ref->parent = parent;
4386                 ref->node.full_backref = 1;
4387         } else {
4388                 ref->root = root;
4389                 ref->node.full_backref = 0;
4390         }
4391         list_add_tail(&ref->node.list, &rec->backrefs);
4392
4393         return ref;
4394 }
4395
4396 static struct data_backref *find_data_backref(struct extent_record *rec,
4397                                                 u64 parent, u64 root,
4398                                                 u64 owner, u64 offset,
4399                                                 int found_ref,
4400                                                 u64 disk_bytenr, u64 bytes)
4401 {
4402         struct list_head *cur = rec->backrefs.next;
4403         struct extent_backref *node;
4404         struct data_backref *back;
4405
4406         while(cur != &rec->backrefs) {
4407                 node = list_entry(cur, struct extent_backref, list);
4408                 cur = cur->next;
4409                 if (!node->is_data)
4410                         continue;
4411                 back = (struct data_backref *)node;
4412                 if (parent > 0) {
4413                         if (!node->full_backref)
4414                                 continue;
4415                         if (parent == back->parent)
4416                                 return back;
4417                 } else {
4418                         if (node->full_backref)
4419                                 continue;
4420                         if (back->root == root && back->owner == owner &&
4421                             back->offset == offset) {
4422                                 if (found_ref && node->found_ref &&
4423                                     (back->bytes != bytes ||
4424                                     back->disk_bytenr != disk_bytenr))
4425                                         continue;
4426                                 return back;
4427                         }
4428                 }
4429         }
4430         return NULL;
4431 }
4432
4433 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4434                                                 u64 parent, u64 root,
4435                                                 u64 owner, u64 offset,
4436                                                 u64 max_size)
4437 {
4438         struct data_backref *ref = malloc(sizeof(*ref));
4439
4440         if (!ref)
4441                 return NULL;
4442         memset(&ref->node, 0, sizeof(ref->node));
4443         ref->node.is_data = 1;
4444
4445         if (parent > 0) {
4446                 ref->parent = parent;
4447                 ref->owner = 0;
4448                 ref->offset = 0;
4449                 ref->node.full_backref = 1;
4450         } else {
4451                 ref->root = root;
4452                 ref->owner = owner;
4453                 ref->offset = offset;
4454                 ref->node.full_backref = 0;
4455         }
4456         ref->bytes = max_size;
4457         ref->found_ref = 0;
4458         ref->num_refs = 0;
4459         list_add_tail(&ref->node.list, &rec->backrefs);
4460         if (max_size > rec->max_size)
4461                 rec->max_size = max_size;
4462         return ref;
4463 }
4464
4465 /* Check if the type of extent matches with its chunk */
4466 static void check_extent_type(struct extent_record *rec)
4467 {
4468         struct btrfs_block_group_cache *bg_cache;
4469
4470         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4471         if (!bg_cache)
4472                 return;
4473
4474         /* data extent, check chunk directly*/
4475         if (!rec->metadata) {
4476                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4477                         rec->wrong_chunk_type = 1;
4478                 return;
4479         }
4480
4481         /* metadata extent, check the obvious case first */
4482         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4483                                  BTRFS_BLOCK_GROUP_METADATA))) {
4484                 rec->wrong_chunk_type = 1;
4485                 return;
4486         }
4487
4488         /*
4489          * Check SYSTEM extent, as it's also marked as metadata, we can only
4490          * make sure it's a SYSTEM extent by its backref
4491          */
4492         if (!list_empty(&rec->backrefs)) {
4493                 struct extent_backref *node;
4494                 struct tree_backref *tback;
4495                 u64 bg_type;
4496
4497                 node = list_entry(rec->backrefs.next, struct extent_backref,
4498                                   list);
4499                 if (node->is_data) {
4500                         /* tree block shouldn't have data backref */
4501                         rec->wrong_chunk_type = 1;
4502                         return;
4503                 }
4504                 tback = container_of(node, struct tree_backref, node);
4505
4506                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4507                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4508                 else
4509                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4510                 if (!(bg_cache->flags & bg_type))
4511                         rec->wrong_chunk_type = 1;
4512         }
4513 }
4514
4515 /*
4516  * Allocate a new extent record, fill default values from @tmpl and insert int
4517  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4518  * the cache, otherwise it fails.
4519  */
4520 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4521                 struct extent_record *tmpl)
4522 {
4523         struct extent_record *rec;
4524         int ret = 0;
4525
4526         rec = malloc(sizeof(*rec));
4527         if (!rec)
4528                 return -ENOMEM;
4529         rec->start = tmpl->start;
4530         rec->max_size = tmpl->max_size;
4531         rec->nr = max(tmpl->nr, tmpl->max_size);
4532         rec->found_rec = tmpl->found_rec;
4533         rec->content_checked = tmpl->content_checked;
4534         rec->owner_ref_checked = tmpl->owner_ref_checked;
4535         rec->num_duplicates = 0;
4536         rec->metadata = tmpl->metadata;
4537         rec->flag_block_full_backref = FLAG_UNSET;
4538         rec->bad_full_backref = 0;
4539         rec->crossing_stripes = 0;
4540         rec->wrong_chunk_type = 0;
4541         rec->is_root = tmpl->is_root;
4542         rec->refs = tmpl->refs;
4543         rec->extent_item_refs = tmpl->extent_item_refs;
4544         rec->parent_generation = tmpl->parent_generation;
4545         INIT_LIST_HEAD(&rec->backrefs);
4546         INIT_LIST_HEAD(&rec->dups);
4547         INIT_LIST_HEAD(&rec->list);
4548         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4549         rec->cache.start = tmpl->start;
4550         rec->cache.size = tmpl->nr;
4551         ret = insert_cache_extent(extent_cache, &rec->cache);
4552         BUG_ON(ret);
4553         bytes_used += tmpl->nr;
4554
4555         if (tmpl->metadata)
4556                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4557                                 rec->max_size);
4558         check_extent_type(rec);
4559         return ret;
4560 }
4561
4562 static int add_extent_rec(struct cache_tree *extent_cache,
4563                           struct btrfs_key *parent_key, u64 parent_gen,
4564                           u64 start, u64 nr, u64 extent_item_refs,
4565                           int is_root, int inc_ref, int set_checked,
4566                           int metadata, int extent_rec, u64 max_size)
4567 {
4568         struct extent_record *rec;
4569         struct cache_extent *cache;
4570         struct extent_record tmpl;
4571         int ret = 0;
4572         int dup = 0;
4573
4574         cache = lookup_cache_extent(extent_cache, start, nr);
4575         if (cache) {
4576                 rec = container_of(cache, struct extent_record, cache);
4577                 if (inc_ref)
4578                         rec->refs++;
4579                 if (rec->nr == 1)
4580                         rec->nr = max(nr, max_size);
4581
4582                 /*
4583                  * We need to make sure to reset nr to whatever the extent
4584                  * record says was the real size, this way we can compare it to
4585                  * the backrefs.
4586                  */
4587                 if (extent_rec) {
4588                         if (start != rec->start || rec->found_rec) {
4589                                 struct extent_record *tmp;
4590
4591                                 dup = 1;
4592                                 if (list_empty(&rec->list))
4593                                         list_add_tail(&rec->list,
4594                                                       &duplicate_extents);
4595
4596                                 /*
4597                                  * We have to do this song and dance in case we
4598                                  * find an extent record that falls inside of
4599                                  * our current extent record but does not have
4600                                  * the same objectid.
4601                                  */
4602                                 tmp = malloc(sizeof(*tmp));
4603                                 if (!tmp)
4604                                         return -ENOMEM;
4605                                 tmp->start = start;
4606                                 tmp->max_size = max_size;
4607                                 tmp->nr = nr;
4608                                 tmp->found_rec = 1;
4609                                 tmp->metadata = metadata;
4610                                 tmp->extent_item_refs = extent_item_refs;
4611                                 INIT_LIST_HEAD(&tmp->list);
4612                                 list_add_tail(&tmp->list, &rec->dups);
4613                                 rec->num_duplicates++;
4614                         } else {
4615                                 rec->nr = nr;
4616                                 rec->found_rec = 1;
4617                         }
4618                 }
4619
4620                 if (extent_item_refs && !dup) {
4621                         if (rec->extent_item_refs) {
4622                                 fprintf(stderr, "block %llu rec "
4623                                         "extent_item_refs %llu, passed %llu\n",
4624                                         (unsigned long long)start,
4625                                         (unsigned long long)
4626                                                         rec->extent_item_refs,
4627                                         (unsigned long long)extent_item_refs);
4628                         }
4629                         rec->extent_item_refs = extent_item_refs;
4630                 }
4631                 if (is_root)
4632                         rec->is_root = 1;
4633                 if (set_checked) {
4634                         rec->content_checked = 1;
4635                         rec->owner_ref_checked = 1;
4636                 }
4637
4638                 if (parent_key)
4639                         btrfs_cpu_key_to_disk(&rec->parent_key, parent_key);
4640                 if (parent_gen)
4641                         rec->parent_generation = parent_gen;
4642
4643                 if (rec->max_size < max_size)
4644                         rec->max_size = max_size;
4645
4646                 /*
4647                  * A metadata extent can't cross stripe_len boundary, otherwise
4648                  * kernel scrub won't be able to handle it.
4649                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4650                  * it.
4651                  */
4652                 if (metadata)
4653                         rec->crossing_stripes = check_crossing_stripes(
4654                                         rec->start, rec->max_size);
4655                 check_extent_type(rec);
4656                 maybe_free_extent_rec(extent_cache, rec);
4657                 return ret;
4658         }
4659
4660         memset(&tmpl, 0, sizeof(tmpl));
4661
4662         if (parent_key)
4663                 btrfs_cpu_key_to_disk(&tmpl.parent_key, parent_key);
4664         tmpl.parent_generation = parent_gen;
4665         tmpl.start = start;
4666         tmpl.nr = nr;
4667         tmpl.extent_item_refs = extent_item_refs;
4668         tmpl.is_root = is_root;
4669         tmpl.metadata = metadata;
4670         tmpl.found_rec = extent_rec;
4671         tmpl.max_size = max_size;
4672         tmpl.content_checked = set_checked;
4673         tmpl.owner_ref_checked = set_checked;
4674         tmpl.refs = !!inc_ref;
4675
4676         ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4677
4678         return ret;
4679 }
4680
4681 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4682                             u64 parent, u64 root, int found_ref)
4683 {
4684         struct extent_record *rec;
4685         struct tree_backref *back;
4686         struct cache_extent *cache;
4687
4688         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4689         if (!cache) {
4690                 struct extent_record tmpl;
4691
4692                 memset(&tmpl, 0, sizeof(tmpl));
4693                 tmpl.start = bytenr;
4694                 tmpl.nr = 1;
4695                 tmpl.metadata = 1;
4696
4697                 add_extent_rec_nolookup(extent_cache, &tmpl);
4698
4699                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4700                 if (!cache)
4701                         abort();
4702         }
4703
4704         rec = container_of(cache, struct extent_record, cache);
4705         if (rec->start != bytenr) {
4706                 abort();
4707         }
4708
4709         back = find_tree_backref(rec, parent, root);
4710         if (!back) {
4711                 back = alloc_tree_backref(rec, parent, root);
4712                 BUG_ON(!back);
4713         }
4714
4715         if (found_ref) {
4716                 if (back->node.found_ref) {
4717                         fprintf(stderr, "Extent back ref already exists "
4718                                 "for %llu parent %llu root %llu \n",
4719                                 (unsigned long long)bytenr,
4720                                 (unsigned long long)parent,
4721                                 (unsigned long long)root);
4722                 }
4723                 back->node.found_ref = 1;
4724         } else {
4725                 if (back->node.found_extent_tree) {
4726                         fprintf(stderr, "Extent back ref already exists "
4727                                 "for %llu parent %llu root %llu \n",
4728                                 (unsigned long long)bytenr,
4729                                 (unsigned long long)parent,
4730                                 (unsigned long long)root);
4731                 }
4732                 back->node.found_extent_tree = 1;
4733         }
4734         check_extent_type(rec);
4735         maybe_free_extent_rec(extent_cache, rec);
4736         return 0;
4737 }
4738
4739 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4740                             u64 parent, u64 root, u64 owner, u64 offset,
4741                             u32 num_refs, int found_ref, u64 max_size)
4742 {
4743         struct extent_record *rec;
4744         struct data_backref *back;
4745         struct cache_extent *cache;
4746
4747         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4748         if (!cache) {
4749                 struct extent_record tmpl;
4750
4751                 memset(&tmpl, 0, sizeof(tmpl));
4752                 tmpl.start = bytenr;
4753                 tmpl.nr = 1;
4754                 tmpl.max_size = max_size;
4755
4756                 add_extent_rec_nolookup(extent_cache, &tmpl);
4757
4758                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4759                 if (!cache)
4760                         abort();
4761         }
4762
4763         rec = container_of(cache, struct extent_record, cache);
4764         if (rec->max_size < max_size)
4765                 rec->max_size = max_size;
4766
4767         /*
4768          * If found_ref is set then max_size is the real size and must match the
4769          * existing refs.  So if we have already found a ref then we need to
4770          * make sure that this ref matches the existing one, otherwise we need
4771          * to add a new backref so we can notice that the backrefs don't match
4772          * and we need to figure out who is telling the truth.  This is to
4773          * account for that awful fsync bug I introduced where we'd end up with
4774          * a btrfs_file_extent_item that would have its length include multiple
4775          * prealloc extents or point inside of a prealloc extent.
4776          */
4777         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4778                                  bytenr, max_size);
4779         if (!back) {
4780                 back = alloc_data_backref(rec, parent, root, owner, offset,
4781                                           max_size);
4782                 BUG_ON(!back);
4783         }
4784
4785         if (found_ref) {
4786                 BUG_ON(num_refs != 1);
4787                 if (back->node.found_ref)
4788                         BUG_ON(back->bytes != max_size);
4789                 back->node.found_ref = 1;
4790                 back->found_ref += 1;
4791                 back->bytes = max_size;
4792                 back->disk_bytenr = bytenr;
4793                 rec->refs += 1;
4794                 rec->content_checked = 1;
4795                 rec->owner_ref_checked = 1;
4796         } else {
4797                 if (back->node.found_extent_tree) {
4798                         fprintf(stderr, "Extent back ref already exists "
4799                                 "for %llu parent %llu root %llu "
4800                                 "owner %llu offset %llu num_refs %lu\n",
4801                                 (unsigned long long)bytenr,
4802                                 (unsigned long long)parent,
4803                                 (unsigned long long)root,
4804                                 (unsigned long long)owner,
4805                                 (unsigned long long)offset,
4806                                 (unsigned long)num_refs);
4807                 }
4808                 back->num_refs = num_refs;
4809                 back->node.found_extent_tree = 1;
4810         }
4811         maybe_free_extent_rec(extent_cache, rec);
4812         return 0;
4813 }
4814
4815 static int add_pending(struct cache_tree *pending,
4816                        struct cache_tree *seen, u64 bytenr, u32 size)
4817 {
4818         int ret;
4819         ret = add_cache_extent(seen, bytenr, size);
4820         if (ret)
4821                 return ret;
4822         add_cache_extent(pending, bytenr, size);
4823         return 0;
4824 }
4825
4826 static int pick_next_pending(struct cache_tree *pending,
4827                         struct cache_tree *reada,
4828                         struct cache_tree *nodes,
4829                         u64 last, struct block_info *bits, int bits_nr,
4830                         int *reada_bits)
4831 {
4832         unsigned long node_start = last;
4833         struct cache_extent *cache;
4834         int ret;
4835
4836         cache = search_cache_extent(reada, 0);
4837         if (cache) {
4838                 bits[0].start = cache->start;
4839                 bits[0].size = cache->size;
4840                 *reada_bits = 1;
4841                 return 1;
4842         }
4843         *reada_bits = 0;
4844         if (node_start > 32768)
4845                 node_start -= 32768;
4846
4847         cache = search_cache_extent(nodes, node_start);
4848         if (!cache)
4849                 cache = search_cache_extent(nodes, 0);
4850
4851         if (!cache) {
4852                  cache = search_cache_extent(pending, 0);
4853                  if (!cache)
4854                          return 0;
4855                  ret = 0;
4856                  do {
4857                          bits[ret].start = cache->start;
4858                          bits[ret].size = cache->size;
4859                          cache = next_cache_extent(cache);
4860                          ret++;
4861                  } while (cache && ret < bits_nr);
4862                  return ret;
4863         }
4864
4865         ret = 0;
4866         do {
4867                 bits[ret].start = cache->start;
4868                 bits[ret].size = cache->size;
4869                 cache = next_cache_extent(cache);
4870                 ret++;
4871         } while (cache && ret < bits_nr);
4872
4873         if (bits_nr - ret > 8) {
4874                 u64 lookup = bits[0].start + bits[0].size;
4875                 struct cache_extent *next;
4876                 next = search_cache_extent(pending, lookup);
4877                 while(next) {
4878                         if (next->start - lookup > 32768)
4879                                 break;
4880                         bits[ret].start = next->start;
4881                         bits[ret].size = next->size;
4882                         lookup = next->start + next->size;
4883                         ret++;
4884                         if (ret == bits_nr)
4885                                 break;
4886                         next = next_cache_extent(next);
4887                         if (!next)
4888                                 break;
4889                 }
4890         }
4891         return ret;
4892 }
4893
4894 static void free_chunk_record(struct cache_extent *cache)
4895 {
4896         struct chunk_record *rec;
4897
4898         rec = container_of(cache, struct chunk_record, cache);
4899         list_del_init(&rec->list);
4900         list_del_init(&rec->dextents);
4901         free(rec);
4902 }
4903
4904 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4905 {
4906         cache_tree_free_extents(chunk_cache, free_chunk_record);
4907 }
4908
4909 static void free_device_record(struct rb_node *node)
4910 {
4911         struct device_record *rec;
4912
4913         rec = container_of(node, struct device_record, node);
4914         free(rec);
4915 }
4916
4917 FREE_RB_BASED_TREE(device_cache, free_device_record);
4918
4919 int insert_block_group_record(struct block_group_tree *tree,
4920                               struct block_group_record *bg_rec)
4921 {
4922         int ret;
4923
4924         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4925         if (ret)
4926                 return ret;
4927
4928         list_add_tail(&bg_rec->list, &tree->block_groups);
4929         return 0;
4930 }
4931
4932 static void free_block_group_record(struct cache_extent *cache)
4933 {
4934         struct block_group_record *rec;
4935
4936         rec = container_of(cache, struct block_group_record, cache);
4937         list_del_init(&rec->list);
4938         free(rec);
4939 }
4940
4941 void free_block_group_tree(struct block_group_tree *tree)
4942 {
4943         cache_tree_free_extents(&tree->tree, free_block_group_record);
4944 }
4945
4946 int insert_device_extent_record(struct device_extent_tree *tree,
4947                                 struct device_extent_record *de_rec)
4948 {
4949         int ret;
4950
4951         /*
4952          * Device extent is a bit different from the other extents, because
4953          * the extents which belong to the different devices may have the
4954          * same start and size, so we need use the special extent cache
4955          * search/insert functions.
4956          */
4957         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4958         if (ret)
4959                 return ret;
4960
4961         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4962         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4963         return 0;
4964 }
4965
4966 static void free_device_extent_record(struct cache_extent *cache)
4967 {
4968         struct device_extent_record *rec;
4969
4970         rec = container_of(cache, struct device_extent_record, cache);
4971         if (!list_empty(&rec->chunk_list))
4972                 list_del_init(&rec->chunk_list);
4973         if (!list_empty(&rec->device_list))
4974                 list_del_init(&rec->device_list);
4975         free(rec);
4976 }
4977
4978 void free_device_extent_tree(struct device_extent_tree *tree)
4979 {
4980         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4981 }
4982
4983 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4984 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4985                                  struct extent_buffer *leaf, int slot)
4986 {
4987         struct btrfs_extent_ref_v0 *ref0;
4988         struct btrfs_key key;
4989
4990         btrfs_item_key_to_cpu(leaf, &key, slot);
4991         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4992         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4993                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4994         } else {
4995                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4996                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4997         }
4998         return 0;
4999 }
5000 #endif
5001
5002 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5003                                             struct btrfs_key *key,
5004                                             int slot)
5005 {
5006         struct btrfs_chunk *ptr;
5007         struct chunk_record *rec;
5008         int num_stripes, i;
5009
5010         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5011         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5012
5013         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5014         if (!rec) {
5015                 fprintf(stderr, "memory allocation failed\n");
5016                 exit(-1);
5017         }
5018
5019         INIT_LIST_HEAD(&rec->list);
5020         INIT_LIST_HEAD(&rec->dextents);
5021         rec->bg_rec = NULL;
5022
5023         rec->cache.start = key->offset;
5024         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5025
5026         rec->generation = btrfs_header_generation(leaf);
5027
5028         rec->objectid = key->objectid;
5029         rec->type = key->type;
5030         rec->offset = key->offset;
5031
5032         rec->length = rec->cache.size;
5033         rec->owner = btrfs_chunk_owner(leaf, ptr);
5034         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5035         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5036         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5037         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5038         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5039         rec->num_stripes = num_stripes;
5040         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5041
5042         for (i = 0; i < rec->num_stripes; ++i) {
5043                 rec->stripes[i].devid =
5044                         btrfs_stripe_devid_nr(leaf, ptr, i);
5045                 rec->stripes[i].offset =
5046                         btrfs_stripe_offset_nr(leaf, ptr, i);
5047                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5048                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5049                                 BTRFS_UUID_SIZE);
5050         }
5051
5052         return rec;
5053 }
5054
5055 static int process_chunk_item(struct cache_tree *chunk_cache,
5056                               struct btrfs_key *key, struct extent_buffer *eb,
5057                               int slot)
5058 {
5059         struct chunk_record *rec;
5060         int ret = 0;
5061
5062         rec = btrfs_new_chunk_record(eb, key, slot);
5063         ret = insert_cache_extent(chunk_cache, &rec->cache);
5064         if (ret) {
5065                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5066                         rec->offset, rec->length);
5067                 free(rec);
5068         }
5069
5070         return ret;
5071 }
5072
5073 static int process_device_item(struct rb_root *dev_cache,
5074                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5075 {
5076         struct btrfs_dev_item *ptr;
5077         struct device_record *rec;
5078         int ret = 0;
5079
5080         ptr = btrfs_item_ptr(eb,
5081                 slot, struct btrfs_dev_item);
5082
5083         rec = malloc(sizeof(*rec));
5084         if (!rec) {
5085                 fprintf(stderr, "memory allocation failed\n");
5086                 return -ENOMEM;
5087         }
5088
5089         rec->devid = key->offset;
5090         rec->generation = btrfs_header_generation(eb);
5091
5092         rec->objectid = key->objectid;
5093         rec->type = key->type;
5094         rec->offset = key->offset;
5095
5096         rec->devid = btrfs_device_id(eb, ptr);
5097         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5098         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5099
5100         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5101         if (ret) {
5102                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5103                 free(rec);
5104         }
5105
5106         return ret;
5107 }
5108
5109 struct block_group_record *
5110 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5111                              int slot)
5112 {
5113         struct btrfs_block_group_item *ptr;
5114         struct block_group_record *rec;
5115
5116         rec = calloc(1, sizeof(*rec));
5117         if (!rec) {
5118                 fprintf(stderr, "memory allocation failed\n");
5119                 exit(-1);
5120         }
5121
5122         rec->cache.start = key->objectid;
5123         rec->cache.size = key->offset;
5124
5125         rec->generation = btrfs_header_generation(leaf);
5126
5127         rec->objectid = key->objectid;
5128         rec->type = key->type;
5129         rec->offset = key->offset;
5130
5131         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5132         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5133
5134         INIT_LIST_HEAD(&rec->list);
5135
5136         return rec;
5137 }
5138
5139 static int process_block_group_item(struct block_group_tree *block_group_cache,
5140                                     struct btrfs_key *key,
5141                                     struct extent_buffer *eb, int slot)
5142 {
5143         struct block_group_record *rec;
5144         int ret = 0;
5145
5146         rec = btrfs_new_block_group_record(eb, key, slot);
5147         ret = insert_block_group_record(block_group_cache, rec);
5148         if (ret) {
5149                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5150                         rec->objectid, rec->offset);
5151                 free(rec);
5152         }
5153
5154         return ret;
5155 }
5156
5157 struct device_extent_record *
5158 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5159                                struct btrfs_key *key, int slot)
5160 {
5161         struct device_extent_record *rec;
5162         struct btrfs_dev_extent *ptr;
5163
5164         rec = calloc(1, sizeof(*rec));
5165         if (!rec) {
5166                 fprintf(stderr, "memory allocation failed\n");
5167                 exit(-1);
5168         }
5169
5170         rec->cache.objectid = key->objectid;
5171         rec->cache.start = key->offset;
5172
5173         rec->generation = btrfs_header_generation(leaf);
5174
5175         rec->objectid = key->objectid;
5176         rec->type = key->type;
5177         rec->offset = key->offset;
5178
5179         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5180         rec->chunk_objecteid =
5181                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5182         rec->chunk_offset =
5183                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5184         rec->length = btrfs_dev_extent_length(leaf, ptr);
5185         rec->cache.size = rec->length;
5186
5187         INIT_LIST_HEAD(&rec->chunk_list);
5188         INIT_LIST_HEAD(&rec->device_list);
5189
5190         return rec;
5191 }
5192
5193 static int
5194 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5195                            struct btrfs_key *key, struct extent_buffer *eb,
5196                            int slot)
5197 {
5198         struct device_extent_record *rec;
5199         int ret;
5200
5201         rec = btrfs_new_device_extent_record(eb, key, slot);
5202         ret = insert_device_extent_record(dev_extent_cache, rec);
5203         if (ret) {
5204                 fprintf(stderr,
5205                         "Device extent[%llu, %llu, %llu] existed.\n",
5206                         rec->objectid, rec->offset, rec->length);
5207                 free(rec);
5208         }
5209
5210         return ret;
5211 }
5212
5213 static int process_extent_item(struct btrfs_root *root,
5214                                struct cache_tree *extent_cache,
5215                                struct extent_buffer *eb, int slot)
5216 {
5217         struct btrfs_extent_item *ei;
5218         struct btrfs_extent_inline_ref *iref;
5219         struct btrfs_extent_data_ref *dref;
5220         struct btrfs_shared_data_ref *sref;
5221         struct btrfs_key key;
5222         unsigned long end;
5223         unsigned long ptr;
5224         int type;
5225         u32 item_size = btrfs_item_size_nr(eb, slot);
5226         u64 refs = 0;
5227         u64 offset;
5228         u64 num_bytes;
5229         int metadata = 0;
5230
5231         btrfs_item_key_to_cpu(eb, &key, slot);
5232
5233         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5234                 metadata = 1;
5235                 num_bytes = root->nodesize;
5236         } else {
5237                 num_bytes = key.offset;
5238         }
5239
5240         if (item_size < sizeof(*ei)) {
5241 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5242                 struct btrfs_extent_item_v0 *ei0;
5243                 BUG_ON(item_size != sizeof(*ei0));
5244                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5245                 refs = btrfs_extent_refs_v0(eb, ei0);
5246 #else
5247                 BUG();
5248 #endif
5249                 return add_extent_rec(extent_cache, NULL, 0, key.objectid,
5250                                       num_bytes, refs, 0, 0, 0, metadata, 1,
5251                                       num_bytes);
5252         }
5253
5254         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5255         refs = btrfs_extent_refs(eb, ei);
5256         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5257                 metadata = 1;
5258         else
5259                 metadata = 0;
5260
5261         add_extent_rec(extent_cache, NULL, 0, key.objectid, num_bytes,
5262                        refs, 0, 0, 0, metadata, 1, num_bytes);
5263
5264         ptr = (unsigned long)(ei + 1);
5265         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5266             key.type == BTRFS_EXTENT_ITEM_KEY)
5267                 ptr += sizeof(struct btrfs_tree_block_info);
5268
5269         end = (unsigned long)ei + item_size;
5270         while (ptr < end) {
5271                 iref = (struct btrfs_extent_inline_ref *)ptr;
5272                 type = btrfs_extent_inline_ref_type(eb, iref);
5273                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5274                 switch (type) {
5275                 case BTRFS_TREE_BLOCK_REF_KEY:
5276                         add_tree_backref(extent_cache, key.objectid,
5277                                          0, offset, 0);
5278                         break;
5279                 case BTRFS_SHARED_BLOCK_REF_KEY:
5280                         add_tree_backref(extent_cache, key.objectid,
5281                                          offset, 0, 0);
5282                         break;
5283                 case BTRFS_EXTENT_DATA_REF_KEY:
5284                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5285                         add_data_backref(extent_cache, key.objectid, 0,
5286                                         btrfs_extent_data_ref_root(eb, dref),
5287                                         btrfs_extent_data_ref_objectid(eb,
5288                                                                        dref),
5289                                         btrfs_extent_data_ref_offset(eb, dref),
5290                                         btrfs_extent_data_ref_count(eb, dref),
5291                                         0, num_bytes);
5292                         break;
5293                 case BTRFS_SHARED_DATA_REF_KEY:
5294                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5295                         add_data_backref(extent_cache, key.objectid, offset,
5296                                         0, 0, 0,
5297                                         btrfs_shared_data_ref_count(eb, sref),
5298                                         0, num_bytes);
5299                         break;
5300                 default:
5301                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5302                                 key.objectid, key.type, num_bytes);
5303                         goto out;
5304                 }
5305                 ptr += btrfs_extent_inline_ref_size(type);
5306         }
5307         WARN_ON(ptr > end);
5308 out:
5309         return 0;
5310 }
5311
5312 static int check_cache_range(struct btrfs_root *root,
5313                              struct btrfs_block_group_cache *cache,
5314                              u64 offset, u64 bytes)
5315 {
5316         struct btrfs_free_space *entry;
5317         u64 *logical;
5318         u64 bytenr;
5319         int stripe_len;
5320         int i, nr, ret;
5321
5322         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5323                 bytenr = btrfs_sb_offset(i);
5324                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5325                                        cache->key.objectid, bytenr, 0,
5326                                        &logical, &nr, &stripe_len);
5327                 if (ret)
5328                         return ret;
5329
5330                 while (nr--) {
5331                         if (logical[nr] + stripe_len <= offset)
5332                                 continue;
5333                         if (offset + bytes <= logical[nr])
5334                                 continue;
5335                         if (logical[nr] == offset) {
5336                                 if (stripe_len >= bytes) {
5337                                         kfree(logical);
5338                                         return 0;
5339                                 }
5340                                 bytes -= stripe_len;
5341                                 offset += stripe_len;
5342                         } else if (logical[nr] < offset) {
5343                                 if (logical[nr] + stripe_len >=
5344                                     offset + bytes) {
5345                                         kfree(logical);
5346                                         return 0;
5347                                 }
5348                                 bytes = (offset + bytes) -
5349                                         (logical[nr] + stripe_len);
5350                                 offset = logical[nr] + stripe_len;
5351                         } else {
5352                                 /*
5353                                  * Could be tricky, the super may land in the
5354                                  * middle of the area we're checking.  First
5355                                  * check the easiest case, it's at the end.
5356                                  */
5357                                 if (logical[nr] + stripe_len >=
5358                                     bytes + offset) {
5359                                         bytes = logical[nr] - offset;
5360                                         continue;
5361                                 }
5362
5363                                 /* Check the left side */
5364                                 ret = check_cache_range(root, cache,
5365                                                         offset,
5366                                                         logical[nr] - offset);
5367                                 if (ret) {
5368                                         kfree(logical);
5369                                         return ret;
5370                                 }
5371
5372                                 /* Now we continue with the right side */
5373                                 bytes = (offset + bytes) -
5374                                         (logical[nr] + stripe_len);
5375                                 offset = logical[nr] + stripe_len;
5376                         }
5377                 }
5378
5379                 kfree(logical);
5380         }
5381
5382         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5383         if (!entry) {
5384                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5385                         offset, offset+bytes);
5386                 return -EINVAL;
5387         }
5388
5389         if (entry->offset != offset) {
5390                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5391                         entry->offset);
5392                 return -EINVAL;
5393         }
5394
5395         if (entry->bytes != bytes) {
5396                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5397                         bytes, entry->bytes, offset);
5398                 return -EINVAL;
5399         }
5400
5401         unlink_free_space(cache->free_space_ctl, entry);
5402         free(entry);
5403         return 0;
5404 }
5405
5406 static int verify_space_cache(struct btrfs_root *root,
5407                               struct btrfs_block_group_cache *cache)
5408 {
5409         struct btrfs_path *path;
5410         struct extent_buffer *leaf;
5411         struct btrfs_key key;
5412         u64 last;
5413         int ret = 0;
5414
5415         path = btrfs_alloc_path();
5416         if (!path)
5417                 return -ENOMEM;
5418
5419         root = root->fs_info->extent_root;
5420
5421         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5422
5423         key.objectid = last;
5424         key.offset = 0;
5425         key.type = BTRFS_EXTENT_ITEM_KEY;
5426
5427         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5428         if (ret < 0)
5429                 goto out;
5430         ret = 0;
5431         while (1) {
5432                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5433                         ret = btrfs_next_leaf(root, path);
5434                         if (ret < 0)
5435                                 goto out;
5436                         if (ret > 0) {
5437                                 ret = 0;
5438                                 break;
5439                         }
5440                 }
5441                 leaf = path->nodes[0];
5442                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5443                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5444                         break;
5445                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5446                     key.type != BTRFS_METADATA_ITEM_KEY) {
5447                         path->slots[0]++;
5448                         continue;
5449                 }
5450
5451                 if (last == key.objectid) {
5452                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5453                                 last = key.objectid + key.offset;
5454                         else
5455                                 last = key.objectid + root->nodesize;
5456                         path->slots[0]++;
5457                         continue;
5458                 }
5459
5460                 ret = check_cache_range(root, cache, last,
5461                                         key.objectid - last);
5462                 if (ret)
5463                         break;
5464                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5465                         last = key.objectid + key.offset;
5466                 else
5467                         last = key.objectid + root->nodesize;
5468                 path->slots[0]++;
5469         }
5470
5471         if (last < cache->key.objectid + cache->key.offset)
5472                 ret = check_cache_range(root, cache, last,
5473                                         cache->key.objectid +
5474                                         cache->key.offset - last);
5475
5476 out:
5477         btrfs_free_path(path);
5478
5479         if (!ret &&
5480             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5481                 fprintf(stderr, "There are still entries left in the space "
5482                         "cache\n");
5483                 ret = -EINVAL;
5484         }
5485
5486         return ret;
5487 }
5488
5489 static int check_space_cache(struct btrfs_root *root)
5490 {
5491         struct btrfs_block_group_cache *cache;
5492         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5493         int ret;
5494         int error = 0;
5495
5496         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5497             btrfs_super_generation(root->fs_info->super_copy) !=
5498             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5499                 printf("cache and super generation don't match, space cache "
5500                        "will be invalidated\n");
5501                 return 0;
5502         }
5503
5504         if (ctx.progress_enabled) {
5505                 ctx.tp = TASK_FREE_SPACE;
5506                 task_start(ctx.info);
5507         }
5508
5509         while (1) {
5510                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5511                 if (!cache)
5512                         break;
5513
5514                 start = cache->key.objectid + cache->key.offset;
5515                 if (!cache->free_space_ctl) {
5516                         if (btrfs_init_free_space_ctl(cache,
5517                                                       root->sectorsize)) {
5518                                 ret = -ENOMEM;
5519                                 break;
5520                         }
5521                 } else {
5522                         btrfs_remove_free_space_cache(cache);
5523                 }
5524
5525                 if (btrfs_fs_compat_ro(root->fs_info,
5526                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5527                         ret = exclude_super_stripes(root, cache);
5528                         if (ret) {
5529                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5530                                         strerror(-ret));
5531                                 error++;
5532                                 continue;
5533                         }
5534                         ret = load_free_space_tree(root->fs_info, cache);
5535                         free_excluded_extents(root, cache);
5536                         if (ret < 0) {
5537                                 fprintf(stderr, "could not load free space tree: %s\n",
5538                                         strerror(-ret));
5539                                 error++;
5540                                 continue;
5541                         }
5542                         error += ret;
5543                 } else {
5544                         ret = load_free_space_cache(root->fs_info, cache);
5545                         if (!ret)
5546                                 continue;
5547                 }
5548
5549                 ret = verify_space_cache(root, cache);
5550                 if (ret) {
5551                         fprintf(stderr, "cache appears valid but isnt %Lu\n",
5552                                 cache->key.objectid);
5553                         error++;
5554                 }
5555         }
5556
5557         task_stop(ctx.info);
5558
5559         return error ? -EINVAL : 0;
5560 }
5561
5562 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5563                         u64 num_bytes, unsigned long leaf_offset,
5564                         struct extent_buffer *eb) {
5565
5566         u64 offset = 0;
5567         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5568         char *data;
5569         unsigned long csum_offset;
5570         u32 csum;
5571         u32 csum_expected;
5572         u64 read_len;
5573         u64 data_checked = 0;
5574         u64 tmp;
5575         int ret = 0;
5576         int mirror;
5577         int num_copies;
5578
5579         if (num_bytes % root->sectorsize)
5580                 return -EINVAL;
5581
5582         data = malloc(num_bytes);
5583         if (!data)
5584                 return -ENOMEM;
5585
5586         while (offset < num_bytes) {
5587                 mirror = 0;
5588 again:
5589                 read_len = num_bytes - offset;
5590                 /* read as much space once a time */
5591                 ret = read_extent_data(root, data + offset,
5592                                 bytenr + offset, &read_len, mirror);
5593                 if (ret)
5594                         goto out;
5595                 data_checked = 0;
5596                 /* verify every 4k data's checksum */
5597                 while (data_checked < read_len) {
5598                         csum = ~(u32)0;
5599                         tmp = offset + data_checked;
5600
5601                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5602                                                csum, root->sectorsize);
5603                         btrfs_csum_final(csum, (char *)&csum);
5604
5605                         csum_offset = leaf_offset +
5606                                  tmp / root->sectorsize * csum_size;
5607                         read_extent_buffer(eb, (char *)&csum_expected,
5608                                            csum_offset, csum_size);
5609                         /* try another mirror */
5610                         if (csum != csum_expected) {
5611                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5612                                                 mirror, bytenr + tmp,
5613                                                 csum, csum_expected);
5614                                 num_copies = btrfs_num_copies(
5615                                                 &root->fs_info->mapping_tree,
5616                                                 bytenr, num_bytes);
5617                                 if (mirror < num_copies - 1) {
5618                                         mirror += 1;
5619                                         goto again;
5620                                 }
5621                         }
5622                         data_checked += root->sectorsize;
5623                 }
5624                 offset += read_len;
5625         }
5626 out:
5627         free(data);
5628         return ret;
5629 }
5630
5631 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5632                                u64 num_bytes)
5633 {
5634         struct btrfs_path *path;
5635         struct extent_buffer *leaf;
5636         struct btrfs_key key;
5637         int ret;
5638
5639         path = btrfs_alloc_path();
5640         if (!path) {
5641                 fprintf(stderr, "Error allocing path\n");
5642                 return -ENOMEM;
5643         }
5644
5645         key.objectid = bytenr;
5646         key.type = BTRFS_EXTENT_ITEM_KEY;
5647         key.offset = (u64)-1;
5648
5649 again:
5650         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5651                                 0, 0);
5652         if (ret < 0) {
5653                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5654                 btrfs_free_path(path);
5655                 return ret;
5656         } else if (ret) {
5657                 if (path->slots[0] > 0) {
5658                         path->slots[0]--;
5659                 } else {
5660                         ret = btrfs_prev_leaf(root, path);
5661                         if (ret < 0) {
5662                                 goto out;
5663                         } else if (ret > 0) {
5664                                 ret = 0;
5665                                 goto out;
5666                         }
5667                 }
5668         }
5669
5670         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5671
5672         /*
5673          * Block group items come before extent items if they have the same
5674          * bytenr, so walk back one more just in case.  Dear future traveler,
5675          * first congrats on mastering time travel.  Now if it's not too much
5676          * trouble could you go back to 2006 and tell Chris to make the
5677          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5678          * EXTENT_ITEM_KEY please?
5679          */
5680         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5681                 if (path->slots[0] > 0) {
5682                         path->slots[0]--;
5683                 } else {
5684                         ret = btrfs_prev_leaf(root, path);
5685                         if (ret < 0) {
5686                                 goto out;
5687                         } else if (ret > 0) {
5688                                 ret = 0;
5689                                 goto out;
5690                         }
5691                 }
5692                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5693         }
5694
5695         while (num_bytes) {
5696                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5697                         ret = btrfs_next_leaf(root, path);
5698                         if (ret < 0) {
5699                                 fprintf(stderr, "Error going to next leaf "
5700                                         "%d\n", ret);
5701                                 btrfs_free_path(path);
5702                                 return ret;
5703                         } else if (ret) {
5704                                 break;
5705                         }
5706                 }
5707                 leaf = path->nodes[0];
5708                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5709                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5710                         path->slots[0]++;
5711                         continue;
5712                 }
5713                 if (key.objectid + key.offset < bytenr) {
5714                         path->slots[0]++;
5715                         continue;
5716                 }
5717                 if (key.objectid > bytenr + num_bytes)
5718                         break;
5719
5720                 if (key.objectid == bytenr) {
5721                         if (key.offset >= num_bytes) {
5722                                 num_bytes = 0;
5723                                 break;
5724                         }
5725                         num_bytes -= key.offset;
5726                         bytenr += key.offset;
5727                 } else if (key.objectid < bytenr) {
5728                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5729                                 num_bytes = 0;
5730                                 break;
5731                         }
5732                         num_bytes = (bytenr + num_bytes) -
5733                                 (key.objectid + key.offset);
5734                         bytenr = key.objectid + key.offset;
5735                 } else {
5736                         if (key.objectid + key.offset < bytenr + num_bytes) {
5737                                 u64 new_start = key.objectid + key.offset;
5738                                 u64 new_bytes = bytenr + num_bytes - new_start;
5739
5740                                 /*
5741                                  * Weird case, the extent is in the middle of
5742                                  * our range, we'll have to search one side
5743                                  * and then the other.  Not sure if this happens
5744                                  * in real life, but no harm in coding it up
5745                                  * anyway just in case.
5746                                  */
5747                                 btrfs_release_path(path);
5748                                 ret = check_extent_exists(root, new_start,
5749                                                           new_bytes);
5750                                 if (ret) {
5751                                         fprintf(stderr, "Right section didn't "
5752                                                 "have a record\n");
5753                                         break;
5754                                 }
5755                                 num_bytes = key.objectid - bytenr;
5756                                 goto again;
5757                         }
5758                         num_bytes = key.objectid - bytenr;
5759                 }
5760                 path->slots[0]++;
5761         }
5762         ret = 0;
5763
5764 out:
5765         if (num_bytes && !ret) {
5766                 fprintf(stderr, "There are no extents for csum range "
5767                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5768                 ret = 1;
5769         }
5770
5771         btrfs_free_path(path);
5772         return ret;
5773 }
5774
5775 static int check_csums(struct btrfs_root *root)
5776 {
5777         struct btrfs_path *path;
5778         struct extent_buffer *leaf;
5779         struct btrfs_key key;
5780         u64 offset = 0, num_bytes = 0;
5781         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5782         int errors = 0;
5783         int ret;
5784         u64 data_len;
5785         unsigned long leaf_offset;
5786
5787         root = root->fs_info->csum_root;
5788         if (!extent_buffer_uptodate(root->node)) {
5789                 fprintf(stderr, "No valid csum tree found\n");
5790                 return -ENOENT;
5791         }
5792
5793         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5794         key.type = BTRFS_EXTENT_CSUM_KEY;
5795         key.offset = 0;
5796
5797         path = btrfs_alloc_path();
5798         if (!path)
5799                 return -ENOMEM;
5800
5801         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5802         if (ret < 0) {
5803                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5804                 btrfs_free_path(path);
5805                 return ret;
5806         }
5807
5808         if (ret > 0 && path->slots[0])
5809                 path->slots[0]--;
5810         ret = 0;
5811
5812         while (1) {
5813                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5814                         ret = btrfs_next_leaf(root, path);
5815                         if (ret < 0) {
5816                                 fprintf(stderr, "Error going to next leaf "
5817                                         "%d\n", ret);
5818                                 break;
5819                         }
5820                         if (ret)
5821                                 break;
5822                 }
5823                 leaf = path->nodes[0];
5824
5825                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5826                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5827                         path->slots[0]++;
5828                         continue;
5829                 }
5830
5831                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5832                               csum_size) * root->sectorsize;
5833                 if (!check_data_csum)
5834                         goto skip_csum_check;
5835                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5836                 ret = check_extent_csums(root, key.offset, data_len,
5837                                          leaf_offset, leaf);
5838                 if (ret)
5839                         break;
5840 skip_csum_check:
5841                 if (!num_bytes) {
5842                         offset = key.offset;
5843                 } else if (key.offset != offset + num_bytes) {
5844                         ret = check_extent_exists(root, offset, num_bytes);
5845                         if (ret) {
5846                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5847                                         "there is no extent record\n",
5848                                         offset, offset+num_bytes);
5849                                 errors++;
5850                         }
5851                         offset = key.offset;
5852                         num_bytes = 0;
5853                 }
5854                 num_bytes += data_len;
5855                 path->slots[0]++;
5856         }
5857
5858         btrfs_free_path(path);
5859         return errors;
5860 }
5861
5862 static int is_dropped_key(struct btrfs_key *key,
5863                           struct btrfs_key *drop_key) {
5864         if (key->objectid < drop_key->objectid)
5865                 return 1;
5866         else if (key->objectid == drop_key->objectid) {
5867                 if (key->type < drop_key->type)
5868                         return 1;
5869                 else if (key->type == drop_key->type) {
5870                         if (key->offset < drop_key->offset)
5871                                 return 1;
5872                 }
5873         }
5874         return 0;
5875 }
5876
5877 /*
5878  * Here are the rules for FULL_BACKREF.
5879  *
5880  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5881  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5882  *      FULL_BACKREF set.
5883  * 3) We cow'ed the block walking down a reloc tree.  This is impossible to tell
5884  *    if it happened after the relocation occurred since we'll have dropped the
5885  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5886  *    have no real way to know for sure.
5887  *
5888  * We process the blocks one root at a time, and we start from the lowest root
5889  * objectid and go to the highest.  So we can just lookup the owner backref for
5890  * the record and if we don't find it then we know it doesn't exist and we have
5891  * a FULL BACKREF.
5892  *
5893  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5894  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5895  * be set or not and then we can check later once we've gathered all the refs.
5896  */
5897 static int calc_extent_flag(struct btrfs_root *root,
5898                            struct cache_tree *extent_cache,
5899                            struct extent_buffer *buf,
5900                            struct root_item_record *ri,
5901                            u64 *flags)
5902 {
5903         struct extent_record *rec;
5904         struct cache_extent *cache;
5905         struct tree_backref *tback;
5906         u64 owner = 0;
5907
5908         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5909         /* we have added this extent before */
5910         BUG_ON(!cache);
5911         rec = container_of(cache, struct extent_record, cache);
5912
5913         /*
5914          * Except file/reloc tree, we can not have
5915          * FULL BACKREF MODE
5916          */
5917         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5918                 goto normal;
5919         /*
5920          * root node
5921          */
5922         if (buf->start == ri->bytenr)
5923                 goto normal;
5924
5925         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5926                 goto full_backref;
5927
5928         owner = btrfs_header_owner(buf);
5929         if (owner == ri->objectid)
5930                 goto normal;
5931
5932         tback = find_tree_backref(rec, 0, owner);
5933         if (!tback)
5934                 goto full_backref;
5935 normal:
5936         *flags = 0;
5937         if (rec->flag_block_full_backref != FLAG_UNSET &&
5938             rec->flag_block_full_backref != 0)
5939                 rec->bad_full_backref = 1;
5940         return 0;
5941 full_backref:
5942         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5943         if (rec->flag_block_full_backref != FLAG_UNSET &&
5944             rec->flag_block_full_backref != 1)
5945                 rec->bad_full_backref = 1;
5946         return 0;
5947 }
5948
5949 static int run_next_block(struct btrfs_root *root,
5950                           struct block_info *bits,
5951                           int bits_nr,
5952                           u64 *last,
5953                           struct cache_tree *pending,
5954                           struct cache_tree *seen,
5955                           struct cache_tree *reada,
5956                           struct cache_tree *nodes,
5957                           struct cache_tree *extent_cache,
5958                           struct cache_tree *chunk_cache,
5959                           struct rb_root *dev_cache,
5960                           struct block_group_tree *block_group_cache,
5961                           struct device_extent_tree *dev_extent_cache,
5962                           struct root_item_record *ri)
5963 {
5964         struct extent_buffer *buf;
5965         struct extent_record *rec = NULL;
5966         u64 bytenr;
5967         u32 size;
5968         u64 parent;
5969         u64 owner;
5970         u64 flags;
5971         u64 ptr;
5972         u64 gen = 0;
5973         int ret = 0;
5974         int i;
5975         int nritems;
5976         struct btrfs_key key;
5977         struct cache_extent *cache;
5978         int reada_bits;
5979
5980         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5981                                     bits_nr, &reada_bits);
5982         if (nritems == 0)
5983                 return 1;
5984
5985         if (!reada_bits) {
5986                 for(i = 0; i < nritems; i++) {
5987                         ret = add_cache_extent(reada, bits[i].start,
5988                                                bits[i].size);
5989                         if (ret == -EEXIST)
5990                                 continue;
5991
5992                         /* fixme, get the parent transid */
5993                         readahead_tree_block(root, bits[i].start,
5994                                              bits[i].size, 0);
5995                 }
5996         }
5997         *last = bits[0].start;
5998         bytenr = bits[0].start;
5999         size = bits[0].size;
6000
6001         cache = lookup_cache_extent(pending, bytenr, size);
6002         if (cache) {
6003                 remove_cache_extent(pending, cache);
6004                 free(cache);
6005         }
6006         cache = lookup_cache_extent(reada, bytenr, size);
6007         if (cache) {
6008                 remove_cache_extent(reada, cache);
6009                 free(cache);
6010         }
6011         cache = lookup_cache_extent(nodes, bytenr, size);
6012         if (cache) {
6013                 remove_cache_extent(nodes, cache);
6014                 free(cache);
6015         }
6016         cache = lookup_cache_extent(extent_cache, bytenr, size);
6017         if (cache) {
6018                 rec = container_of(cache, struct extent_record, cache);
6019                 gen = rec->parent_generation;
6020         }
6021
6022         /* fixme, get the real parent transid */
6023         buf = read_tree_block(root, bytenr, size, gen);
6024         if (!extent_buffer_uptodate(buf)) {
6025                 record_bad_block_io(root->fs_info,
6026                                     extent_cache, bytenr, size);
6027                 goto out;
6028         }
6029
6030         nritems = btrfs_header_nritems(buf);
6031
6032         flags = 0;
6033         if (!init_extent_tree) {
6034                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6035                                        btrfs_header_level(buf), 1, NULL,
6036                                        &flags);
6037                 if (ret < 0) {
6038                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6039                         if (ret < 0) {
6040                                 fprintf(stderr, "Couldn't calc extent flags\n");
6041                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6042                         }
6043                 }
6044         } else {
6045                 flags = 0;
6046                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6047                 if (ret < 0) {
6048                         fprintf(stderr, "Couldn't calc extent flags\n");
6049                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6050                 }
6051         }
6052
6053         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6054                 if (ri != NULL &&
6055                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6056                     ri->objectid == btrfs_header_owner(buf)) {
6057                         /*
6058                          * Ok we got to this block from it's original owner and
6059                          * we have FULL_BACKREF set.  Relocation can leave
6060                          * converted blocks over so this is altogether possible,
6061                          * however it's not possible if the generation > the
6062                          * last snapshot, so check for this case.
6063                          */
6064                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6065                             btrfs_header_generation(buf) > ri->last_snapshot) {
6066                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6067                                 rec->bad_full_backref = 1;
6068                         }
6069                 }
6070         } else {
6071                 if (ri != NULL &&
6072                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6073                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6074                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6075                         rec->bad_full_backref = 1;
6076                 }
6077         }
6078
6079         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6080                 rec->flag_block_full_backref = 1;
6081                 parent = bytenr;
6082                 owner = 0;
6083         } else {
6084                 rec->flag_block_full_backref = 0;
6085                 parent = 0;
6086                 owner = btrfs_header_owner(buf);
6087         }
6088
6089         ret = check_block(root, extent_cache, buf, flags);
6090         if (ret)
6091                 goto out;
6092
6093         if (btrfs_is_leaf(buf)) {
6094                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6095                 for (i = 0; i < nritems; i++) {
6096                         struct btrfs_file_extent_item *fi;
6097                         btrfs_item_key_to_cpu(buf, &key, i);
6098                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6099                                 process_extent_item(root, extent_cache, buf,
6100                                                     i);
6101                                 continue;
6102                         }
6103                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6104                                 process_extent_item(root, extent_cache, buf,
6105                                                     i);
6106                                 continue;
6107                         }
6108                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6109                                 total_csum_bytes +=
6110                                         btrfs_item_size_nr(buf, i);
6111                                 continue;
6112                         }
6113                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6114                                 process_chunk_item(chunk_cache, &key, buf, i);
6115                                 continue;
6116                         }
6117                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6118                                 process_device_item(dev_cache, &key, buf, i);
6119                                 continue;
6120                         }
6121                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6122                                 process_block_group_item(block_group_cache,
6123                                         &key, buf, i);
6124                                 continue;
6125                         }
6126                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6127                                 process_device_extent_item(dev_extent_cache,
6128                                         &key, buf, i);
6129                                 continue;
6130
6131                         }
6132                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6133 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6134                                 process_extent_ref_v0(extent_cache, buf, i);
6135 #else
6136                                 BUG();
6137 #endif
6138                                 continue;
6139                         }
6140
6141                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6142                                 add_tree_backref(extent_cache, key.objectid, 0,
6143                                                  key.offset, 0);
6144                                 continue;
6145                         }
6146                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6147                                 add_tree_backref(extent_cache, key.objectid,
6148                                                  key.offset, 0, 0);
6149                                 continue;
6150                         }
6151                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6152                                 struct btrfs_extent_data_ref *ref;
6153                                 ref = btrfs_item_ptr(buf, i,
6154                                                 struct btrfs_extent_data_ref);
6155                                 add_data_backref(extent_cache,
6156                                         key.objectid, 0,
6157                                         btrfs_extent_data_ref_root(buf, ref),
6158                                         btrfs_extent_data_ref_objectid(buf,
6159                                                                        ref),
6160                                         btrfs_extent_data_ref_offset(buf, ref),
6161                                         btrfs_extent_data_ref_count(buf, ref),
6162                                         0, root->sectorsize);
6163                                 continue;
6164                         }
6165                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6166                                 struct btrfs_shared_data_ref *ref;
6167                                 ref = btrfs_item_ptr(buf, i,
6168                                                 struct btrfs_shared_data_ref);
6169                                 add_data_backref(extent_cache,
6170                                         key.objectid, key.offset, 0, 0, 0,
6171                                         btrfs_shared_data_ref_count(buf, ref),
6172                                         0, root->sectorsize);
6173                                 continue;
6174                         }
6175                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6176                                 struct bad_item *bad;
6177
6178                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6179                                         continue;
6180                                 if (!owner)
6181                                         continue;
6182                                 bad = malloc(sizeof(struct bad_item));
6183                                 if (!bad)
6184                                         continue;
6185                                 INIT_LIST_HEAD(&bad->list);
6186                                 memcpy(&bad->key, &key,
6187                                        sizeof(struct btrfs_key));
6188                                 bad->root_id = owner;
6189                                 list_add_tail(&bad->list, &delete_items);
6190                                 continue;
6191                         }
6192                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6193                                 continue;
6194                         fi = btrfs_item_ptr(buf, i,
6195                                             struct btrfs_file_extent_item);
6196                         if (btrfs_file_extent_type(buf, fi) ==
6197                             BTRFS_FILE_EXTENT_INLINE)
6198                                 continue;
6199                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6200                                 continue;
6201
6202                         data_bytes_allocated +=
6203                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6204                         if (data_bytes_allocated < root->sectorsize) {
6205                                 abort();
6206                         }
6207                         data_bytes_referenced +=
6208                                 btrfs_file_extent_num_bytes(buf, fi);
6209                         add_data_backref(extent_cache,
6210                                 btrfs_file_extent_disk_bytenr(buf, fi),
6211                                 parent, owner, key.objectid, key.offset -
6212                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6213                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6214                 }
6215         } else {
6216                 int level;
6217                 struct btrfs_key first_key;
6218
6219                 first_key.objectid = 0;
6220
6221                 if (nritems > 0)
6222                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6223                 level = btrfs_header_level(buf);
6224                 for (i = 0; i < nritems; i++) {
6225                         ptr = btrfs_node_blockptr(buf, i);
6226                         size = root->nodesize;
6227                         btrfs_node_key_to_cpu(buf, &key, i);
6228                         if (ri != NULL) {
6229                                 if ((level == ri->drop_level)
6230                                     && is_dropped_key(&key, &ri->drop_key)) {
6231                                         continue;
6232                                 }
6233                         }
6234                         ret = add_extent_rec(extent_cache, &key,
6235                                              btrfs_node_ptr_generation(buf, i),
6236                                              ptr, size, 0, 0, 1, 0, 1, 0,
6237                                              size);
6238                         BUG_ON(ret);
6239
6240                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6241
6242                         if (level > 1) {
6243                                 add_pending(nodes, seen, ptr, size);
6244                         } else {
6245                                 add_pending(pending, seen, ptr, size);
6246                         }
6247                 }
6248                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6249                                       nritems) * sizeof(struct btrfs_key_ptr);
6250         }
6251         total_btree_bytes += buf->len;
6252         if (fs_root_objectid(btrfs_header_owner(buf)))
6253                 total_fs_tree_bytes += buf->len;
6254         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6255                 total_extent_tree_bytes += buf->len;
6256         if (!found_old_backref &&
6257             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6258             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6259             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6260                 found_old_backref = 1;
6261 out:
6262         free_extent_buffer(buf);
6263         return ret;
6264 }
6265
6266 static int add_root_to_pending(struct extent_buffer *buf,
6267                                struct cache_tree *extent_cache,
6268                                struct cache_tree *pending,
6269                                struct cache_tree *seen,
6270                                struct cache_tree *nodes,
6271                                u64 objectid)
6272 {
6273         if (btrfs_header_level(buf) > 0)
6274                 add_pending(nodes, seen, buf->start, buf->len);
6275         else
6276                 add_pending(pending, seen, buf->start, buf->len);
6277         add_extent_rec(extent_cache, NULL, 0, buf->start, buf->len,
6278                        0, 1, 1, 0, 1, 0, buf->len);
6279
6280         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6281             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6282                 add_tree_backref(extent_cache, buf->start, buf->start,
6283                                  0, 1);
6284         else
6285                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6286         return 0;
6287 }
6288
6289 /* as we fix the tree, we might be deleting blocks that
6290  * we're tracking for repair.  This hook makes sure we
6291  * remove any backrefs for blocks as we are fixing them.
6292  */
6293 static int free_extent_hook(struct btrfs_trans_handle *trans,
6294                             struct btrfs_root *root,
6295                             u64 bytenr, u64 num_bytes, u64 parent,
6296                             u64 root_objectid, u64 owner, u64 offset,
6297                             int refs_to_drop)
6298 {
6299         struct extent_record *rec;
6300         struct cache_extent *cache;
6301         int is_data;
6302         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6303
6304         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6305         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6306         if (!cache)
6307                 return 0;
6308
6309         rec = container_of(cache, struct extent_record, cache);
6310         if (is_data) {
6311                 struct data_backref *back;
6312                 back = find_data_backref(rec, parent, root_objectid, owner,
6313                                          offset, 1, bytenr, num_bytes);
6314                 if (!back)
6315                         goto out;
6316                 if (back->node.found_ref) {
6317                         back->found_ref -= refs_to_drop;
6318                         if (rec->refs)
6319                                 rec->refs -= refs_to_drop;
6320                 }
6321                 if (back->node.found_extent_tree) {
6322                         back->num_refs -= refs_to_drop;
6323                         if (rec->extent_item_refs)
6324                                 rec->extent_item_refs -= refs_to_drop;
6325                 }
6326                 if (back->found_ref == 0)
6327                         back->node.found_ref = 0;
6328                 if (back->num_refs == 0)
6329                         back->node.found_extent_tree = 0;
6330
6331                 if (!back->node.found_extent_tree && back->node.found_ref) {
6332                         list_del(&back->node.list);
6333                         free(back);
6334                 }
6335         } else {
6336                 struct tree_backref *back;
6337                 back = find_tree_backref(rec, parent, root_objectid);
6338                 if (!back)
6339                         goto out;
6340                 if (back->node.found_ref) {
6341                         if (rec->refs)
6342                                 rec->refs--;
6343                         back->node.found_ref = 0;
6344                 }
6345                 if (back->node.found_extent_tree) {
6346                         if (rec->extent_item_refs)
6347                                 rec->extent_item_refs--;
6348                         back->node.found_extent_tree = 0;
6349                 }
6350                 if (!back->node.found_extent_tree && back->node.found_ref) {
6351                         list_del(&back->node.list);
6352                         free(back);
6353                 }
6354         }
6355         maybe_free_extent_rec(extent_cache, rec);
6356 out:
6357         return 0;
6358 }
6359
6360 static int delete_extent_records(struct btrfs_trans_handle *trans,
6361                                  struct btrfs_root *root,
6362                                  struct btrfs_path *path,
6363                                  u64 bytenr, u64 new_len)
6364 {
6365         struct btrfs_key key;
6366         struct btrfs_key found_key;
6367         struct extent_buffer *leaf;
6368         int ret;
6369         int slot;
6370
6371
6372         key.objectid = bytenr;
6373         key.type = (u8)-1;
6374         key.offset = (u64)-1;
6375
6376         while(1) {
6377                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6378                                         &key, path, 0, 1);
6379                 if (ret < 0)
6380                         break;
6381
6382                 if (ret > 0) {
6383                         ret = 0;
6384                         if (path->slots[0] == 0)
6385                                 break;
6386                         path->slots[0]--;
6387                 }
6388                 ret = 0;
6389
6390                 leaf = path->nodes[0];
6391                 slot = path->slots[0];
6392
6393                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6394                 if (found_key.objectid != bytenr)
6395                         break;
6396
6397                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6398                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6399                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6400                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6401                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6402                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6403                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6404                         btrfs_release_path(path);
6405                         if (found_key.type == 0) {
6406                                 if (found_key.offset == 0)
6407                                         break;
6408                                 key.offset = found_key.offset - 1;
6409                                 key.type = found_key.type;
6410                         }
6411                         key.type = found_key.type - 1;
6412                         key.offset = (u64)-1;
6413                         continue;
6414                 }
6415
6416                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6417                         found_key.objectid, found_key.type, found_key.offset);
6418
6419                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6420                 if (ret)
6421                         break;
6422                 btrfs_release_path(path);
6423
6424                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6425                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6426                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6427                                 found_key.offset : root->nodesize;
6428
6429                         ret = btrfs_update_block_group(trans, root, bytenr,
6430                                                        bytes, 0, 0);
6431                         if (ret)
6432                                 break;
6433                 }
6434         }
6435
6436         btrfs_release_path(path);
6437         return ret;
6438 }
6439
6440 /*
6441  * for a single backref, this will allocate a new extent
6442  * and add the backref to it.
6443  */
6444 static int record_extent(struct btrfs_trans_handle *trans,
6445                          struct btrfs_fs_info *info,
6446                          struct btrfs_path *path,
6447                          struct extent_record *rec,
6448                          struct extent_backref *back,
6449                          int allocated, u64 flags)
6450 {
6451         int ret;
6452         struct btrfs_root *extent_root = info->extent_root;
6453         struct extent_buffer *leaf;
6454         struct btrfs_key ins_key;
6455         struct btrfs_extent_item *ei;
6456         struct tree_backref *tback;
6457         struct data_backref *dback;
6458         struct btrfs_tree_block_info *bi;
6459
6460         if (!back->is_data)
6461                 rec->max_size = max_t(u64, rec->max_size,
6462                                     info->extent_root->nodesize);
6463
6464         if (!allocated) {
6465                 u32 item_size = sizeof(*ei);
6466
6467                 if (!back->is_data)
6468                         item_size += sizeof(*bi);
6469
6470                 ins_key.objectid = rec->start;
6471                 ins_key.offset = rec->max_size;
6472                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6473
6474                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6475                                         &ins_key, item_size);
6476                 if (ret)
6477                         goto fail;
6478
6479                 leaf = path->nodes[0];
6480                 ei = btrfs_item_ptr(leaf, path->slots[0],
6481                                     struct btrfs_extent_item);
6482
6483                 btrfs_set_extent_refs(leaf, ei, 0);
6484                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6485
6486                 if (back->is_data) {
6487                         btrfs_set_extent_flags(leaf, ei,
6488                                                BTRFS_EXTENT_FLAG_DATA);
6489                 } else {
6490                         struct btrfs_disk_key copy_key;;
6491
6492                         tback = (struct tree_backref *)back;
6493                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6494                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6495                                              sizeof(*bi));
6496
6497                         btrfs_set_disk_key_objectid(&copy_key,
6498                                                     rec->info_objectid);
6499                         btrfs_set_disk_key_type(&copy_key, 0);
6500                         btrfs_set_disk_key_offset(&copy_key, 0);
6501
6502                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6503                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6504
6505                         btrfs_set_extent_flags(leaf, ei,
6506                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6507                 }
6508
6509                 btrfs_mark_buffer_dirty(leaf);
6510                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6511                                                rec->max_size, 1, 0);
6512                 if (ret)
6513                         goto fail;
6514                 btrfs_release_path(path);
6515         }
6516
6517         if (back->is_data) {
6518                 u64 parent;
6519                 int i;
6520
6521                 dback = (struct data_backref *)back;
6522                 if (back->full_backref)
6523                         parent = dback->parent;
6524                 else
6525                         parent = 0;
6526
6527                 for (i = 0; i < dback->found_ref; i++) {
6528                         /* if parent != 0, we're doing a full backref
6529                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6530                          * just makes the backref allocator create a data
6531                          * backref
6532                          */
6533                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6534                                                    rec->start, rec->max_size,
6535                                                    parent,
6536                                                    dback->root,
6537                                                    parent ?
6538                                                    BTRFS_FIRST_FREE_OBJECTID :
6539                                                    dback->owner,
6540                                                    dback->offset);
6541                         if (ret)
6542                                 break;
6543                 }
6544                 fprintf(stderr, "adding new data backref"
6545                                 " on %llu %s %llu owner %llu"
6546                                 " offset %llu found %d\n",
6547                                 (unsigned long long)rec->start,
6548                                 back->full_backref ?
6549                                 "parent" : "root",
6550                                 back->full_backref ?
6551                                 (unsigned long long)parent :
6552                                 (unsigned long long)dback->root,
6553                                 (unsigned long long)dback->owner,
6554                                 (unsigned long long)dback->offset,
6555                                 dback->found_ref);
6556         } else {
6557                 u64 parent;
6558
6559                 tback = (struct tree_backref *)back;
6560                 if (back->full_backref)
6561                         parent = tback->parent;
6562                 else
6563                         parent = 0;
6564
6565                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6566                                            rec->start, rec->max_size,
6567                                            parent, tback->root, 0, 0);
6568                 fprintf(stderr, "adding new tree backref on "
6569                         "start %llu len %llu parent %llu root %llu\n",
6570                         rec->start, rec->max_size, parent, tback->root);
6571         }
6572 fail:
6573         btrfs_release_path(path);
6574         return ret;
6575 }
6576
6577 static struct extent_entry *find_entry(struct list_head *entries,
6578                                        u64 bytenr, u64 bytes)
6579 {
6580         struct extent_entry *entry = NULL;
6581
6582         list_for_each_entry(entry, entries, list) {
6583                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6584                         return entry;
6585         }
6586
6587         return NULL;
6588 }
6589
6590 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6591 {
6592         struct extent_entry *entry, *best = NULL, *prev = NULL;
6593
6594         list_for_each_entry(entry, entries, list) {
6595                 if (!prev) {
6596                         prev = entry;
6597                         continue;
6598                 }
6599
6600                 /*
6601                  * If there are as many broken entries as entries then we know
6602                  * not to trust this particular entry.
6603                  */
6604                 if (entry->broken == entry->count)
6605                         continue;
6606
6607                 /*
6608                  * If our current entry == best then we can't be sure our best
6609                  * is really the best, so we need to keep searching.
6610                  */
6611                 if (best && best->count == entry->count) {
6612                         prev = entry;
6613                         best = NULL;
6614                         continue;
6615                 }
6616
6617                 /* Prev == entry, not good enough, have to keep searching */
6618                 if (!prev->broken && prev->count == entry->count)
6619                         continue;
6620
6621                 if (!best)
6622                         best = (prev->count > entry->count) ? prev : entry;
6623                 else if (best->count < entry->count)
6624                         best = entry;
6625                 prev = entry;
6626         }
6627
6628         return best;
6629 }
6630
6631 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6632                       struct data_backref *dback, struct extent_entry *entry)
6633 {
6634         struct btrfs_trans_handle *trans;
6635         struct btrfs_root *root;
6636         struct btrfs_file_extent_item *fi;
6637         struct extent_buffer *leaf;
6638         struct btrfs_key key;
6639         u64 bytenr, bytes;
6640         int ret, err;
6641
6642         key.objectid = dback->root;
6643         key.type = BTRFS_ROOT_ITEM_KEY;
6644         key.offset = (u64)-1;
6645         root = btrfs_read_fs_root(info, &key);
6646         if (IS_ERR(root)) {
6647                 fprintf(stderr, "Couldn't find root for our ref\n");
6648                 return -EINVAL;
6649         }
6650
6651         /*
6652          * The backref points to the original offset of the extent if it was
6653          * split, so we need to search down to the offset we have and then walk
6654          * forward until we find the backref we're looking for.
6655          */
6656         key.objectid = dback->owner;
6657         key.type = BTRFS_EXTENT_DATA_KEY;
6658         key.offset = dback->offset;
6659         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6660         if (ret < 0) {
6661                 fprintf(stderr, "Error looking up ref %d\n", ret);
6662                 return ret;
6663         }
6664
6665         while (1) {
6666                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6667                         ret = btrfs_next_leaf(root, path);
6668                         if (ret) {
6669                                 fprintf(stderr, "Couldn't find our ref, next\n");
6670                                 return -EINVAL;
6671                         }
6672                 }
6673                 leaf = path->nodes[0];
6674                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6675                 if (key.objectid != dback->owner ||
6676                     key.type != BTRFS_EXTENT_DATA_KEY) {
6677                         fprintf(stderr, "Couldn't find our ref, search\n");
6678                         return -EINVAL;
6679                 }
6680                 fi = btrfs_item_ptr(leaf, path->slots[0],
6681                                     struct btrfs_file_extent_item);
6682                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6683                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6684
6685                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6686                         break;
6687                 path->slots[0]++;
6688         }
6689
6690         btrfs_release_path(path);
6691
6692         trans = btrfs_start_transaction(root, 1);
6693         if (IS_ERR(trans))
6694                 return PTR_ERR(trans);
6695
6696         /*
6697          * Ok we have the key of the file extent we want to fix, now we can cow
6698          * down to the thing and fix it.
6699          */
6700         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6701         if (ret < 0) {
6702                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6703                         key.objectid, key.type, key.offset, ret);
6704                 goto out;
6705         }
6706         if (ret > 0) {
6707                 fprintf(stderr, "Well that's odd, we just found this key "
6708                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6709                         key.offset);
6710                 ret = -EINVAL;
6711                 goto out;
6712         }
6713         leaf = path->nodes[0];
6714         fi = btrfs_item_ptr(leaf, path->slots[0],
6715                             struct btrfs_file_extent_item);
6716
6717         if (btrfs_file_extent_compression(leaf, fi) &&
6718             dback->disk_bytenr != entry->bytenr) {
6719                 fprintf(stderr, "Ref doesn't match the record start and is "
6720                         "compressed, please take a btrfs-image of this file "
6721                         "system and send it to a btrfs developer so they can "
6722                         "complete this functionality for bytenr %Lu\n",
6723                         dback->disk_bytenr);
6724                 ret = -EINVAL;
6725                 goto out;
6726         }
6727
6728         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6729                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6730         } else if (dback->disk_bytenr > entry->bytenr) {
6731                 u64 off_diff, offset;
6732
6733                 off_diff = dback->disk_bytenr - entry->bytenr;
6734                 offset = btrfs_file_extent_offset(leaf, fi);
6735                 if (dback->disk_bytenr + offset +
6736                     btrfs_file_extent_num_bytes(leaf, fi) >
6737                     entry->bytenr + entry->bytes) {
6738                         fprintf(stderr, "Ref is past the entry end, please "
6739                                 "take a btrfs-image of this file system and "
6740                                 "send it to a btrfs developer, ref %Lu\n",
6741                                 dback->disk_bytenr);
6742                         ret = -EINVAL;
6743                         goto out;
6744                 }
6745                 offset += off_diff;
6746                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6747                 btrfs_set_file_extent_offset(leaf, fi, offset);
6748         } else if (dback->disk_bytenr < entry->bytenr) {
6749                 u64 offset;
6750
6751                 offset = btrfs_file_extent_offset(leaf, fi);
6752                 if (dback->disk_bytenr + offset < entry->bytenr) {
6753                         fprintf(stderr, "Ref is before the entry start, please"
6754                                 " take a btrfs-image of this file system and "
6755                                 "send it to a btrfs developer, ref %Lu\n",
6756                                 dback->disk_bytenr);
6757                         ret = -EINVAL;
6758                         goto out;
6759                 }
6760
6761                 offset += dback->disk_bytenr;
6762                 offset -= entry->bytenr;
6763                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6764                 btrfs_set_file_extent_offset(leaf, fi, offset);
6765         }
6766
6767         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6768
6769         /*
6770          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6771          * only do this if we aren't using compression, otherwise it's a
6772          * trickier case.
6773          */
6774         if (!btrfs_file_extent_compression(leaf, fi))
6775                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6776         else
6777                 printf("ram bytes may be wrong?\n");
6778         btrfs_mark_buffer_dirty(leaf);
6779 out:
6780         err = btrfs_commit_transaction(trans, root);
6781         btrfs_release_path(path);
6782         return ret ? ret : err;
6783 }
6784
6785 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6786                            struct extent_record *rec)
6787 {
6788         struct extent_backref *back;
6789         struct data_backref *dback;
6790         struct extent_entry *entry, *best = NULL;
6791         LIST_HEAD(entries);
6792         int nr_entries = 0;
6793         int broken_entries = 0;
6794         int ret = 0;
6795         short mismatch = 0;
6796
6797         /*
6798          * Metadata is easy and the backrefs should always agree on bytenr and
6799          * size, if not we've got bigger issues.
6800          */
6801         if (rec->metadata)
6802                 return 0;
6803
6804         list_for_each_entry(back, &rec->backrefs, list) {
6805                 if (back->full_backref || !back->is_data)
6806                         continue;
6807
6808                 dback = (struct data_backref *)back;
6809
6810                 /*
6811                  * We only pay attention to backrefs that we found a real
6812                  * backref for.
6813                  */
6814                 if (dback->found_ref == 0)
6815                         continue;
6816
6817                 /*
6818                  * For now we only catch when the bytes don't match, not the
6819                  * bytenr.  We can easily do this at the same time, but I want
6820                  * to have a fs image to test on before we just add repair
6821                  * functionality willy-nilly so we know we won't screw up the
6822                  * repair.
6823                  */
6824
6825                 entry = find_entry(&entries, dback->disk_bytenr,
6826                                    dback->bytes);
6827                 if (!entry) {
6828                         entry = malloc(sizeof(struct extent_entry));
6829                         if (!entry) {
6830                                 ret = -ENOMEM;
6831                                 goto out;
6832                         }
6833                         memset(entry, 0, sizeof(*entry));
6834                         entry->bytenr = dback->disk_bytenr;
6835                         entry->bytes = dback->bytes;
6836                         list_add_tail(&entry->list, &entries);
6837                         nr_entries++;
6838                 }
6839
6840                 /*
6841                  * If we only have on entry we may think the entries agree when
6842                  * in reality they don't so we have to do some extra checking.
6843                  */
6844                 if (dback->disk_bytenr != rec->start ||
6845                     dback->bytes != rec->nr || back->broken)
6846                         mismatch = 1;
6847
6848                 if (back->broken) {
6849                         entry->broken++;
6850                         broken_entries++;
6851                 }
6852
6853                 entry->count++;
6854         }
6855
6856         /* Yay all the backrefs agree, carry on good sir */
6857         if (nr_entries <= 1 && !mismatch)
6858                 goto out;
6859
6860         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6861                 "%Lu\n", rec->start);
6862
6863         /*
6864          * First we want to see if the backrefs can agree amongst themselves who
6865          * is right, so figure out which one of the entries has the highest
6866          * count.
6867          */
6868         best = find_most_right_entry(&entries);
6869
6870         /*
6871          * Ok so we may have an even split between what the backrefs think, so
6872          * this is where we use the extent ref to see what it thinks.
6873          */
6874         if (!best) {
6875                 entry = find_entry(&entries, rec->start, rec->nr);
6876                 if (!entry && (!broken_entries || !rec->found_rec)) {
6877                         fprintf(stderr, "Backrefs don't agree with each other "
6878                                 "and extent record doesn't agree with anybody,"
6879                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6880                                 rec->start, rec->nr);
6881                         ret = -EINVAL;
6882                         goto out;
6883                 } else if (!entry) {
6884                         /*
6885                          * Ok our backrefs were broken, we'll assume this is the
6886                          * correct value and add an entry for this range.
6887                          */
6888                         entry = malloc(sizeof(struct extent_entry));
6889                         if (!entry) {
6890                                 ret = -ENOMEM;
6891                                 goto out;
6892                         }
6893                         memset(entry, 0, sizeof(*entry));
6894                         entry->bytenr = rec->start;
6895                         entry->bytes = rec->nr;
6896                         list_add_tail(&entry->list, &entries);
6897                         nr_entries++;
6898                 }
6899                 entry->count++;
6900                 best = find_most_right_entry(&entries);
6901                 if (!best) {
6902                         fprintf(stderr, "Backrefs and extent record evenly "
6903                                 "split on who is right, this is going to "
6904                                 "require user input to fix bytenr %Lu bytes "
6905                                 "%Lu\n", rec->start, rec->nr);
6906                         ret = -EINVAL;
6907                         goto out;
6908                 }
6909         }
6910
6911         /*
6912          * I don't think this can happen currently as we'll abort() if we catch
6913          * this case higher up, but in case somebody removes that we still can't
6914          * deal with it properly here yet, so just bail out of that's the case.
6915          */
6916         if (best->bytenr != rec->start) {
6917                 fprintf(stderr, "Extent start and backref starts don't match, "
6918                         "please use btrfs-image on this file system and send "
6919                         "it to a btrfs developer so they can make fsck fix "
6920                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6921                         rec->start, rec->nr);
6922                 ret = -EINVAL;
6923                 goto out;
6924         }
6925
6926         /*
6927          * Ok great we all agreed on an extent record, let's go find the real
6928          * references and fix up the ones that don't match.
6929          */
6930         list_for_each_entry(back, &rec->backrefs, list) {
6931                 if (back->full_backref || !back->is_data)
6932                         continue;
6933
6934                 dback = (struct data_backref *)back;
6935
6936                 /*
6937                  * Still ignoring backrefs that don't have a real ref attached
6938                  * to them.
6939                  */
6940                 if (dback->found_ref == 0)
6941                         continue;
6942
6943                 if (dback->bytes == best->bytes &&
6944                     dback->disk_bytenr == best->bytenr)
6945                         continue;
6946
6947                 ret = repair_ref(info, path, dback, best);
6948                 if (ret)
6949                         goto out;
6950         }
6951
6952         /*
6953          * Ok we messed with the actual refs, which means we need to drop our
6954          * entire cache and go back and rescan.  I know this is a huge pain and
6955          * adds a lot of extra work, but it's the only way to be safe.  Once all
6956          * the backrefs agree we may not need to do anything to the extent
6957          * record itself.
6958          */
6959         ret = -EAGAIN;
6960 out:
6961         while (!list_empty(&entries)) {
6962                 entry = list_entry(entries.next, struct extent_entry, list);
6963                 list_del_init(&entry->list);
6964                 free(entry);
6965         }
6966         return ret;
6967 }
6968
6969 static int process_duplicates(struct btrfs_root *root,
6970                               struct cache_tree *extent_cache,
6971                               struct extent_record *rec)
6972 {
6973         struct extent_record *good, *tmp;
6974         struct cache_extent *cache;
6975         int ret;
6976
6977         /*
6978          * If we found a extent record for this extent then return, or if we
6979          * have more than one duplicate we are likely going to need to delete
6980          * something.
6981          */
6982         if (rec->found_rec || rec->num_duplicates > 1)
6983                 return 0;
6984
6985         /* Shouldn't happen but just in case */
6986         BUG_ON(!rec->num_duplicates);
6987
6988         /*
6989          * So this happens if we end up with a backref that doesn't match the
6990          * actual extent entry.  So either the backref is bad or the extent
6991          * entry is bad.  Either way we want to have the extent_record actually
6992          * reflect what we found in the extent_tree, so we need to take the
6993          * duplicate out and use that as the extent_record since the only way we
6994          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
6995          */
6996         remove_cache_extent(extent_cache, &rec->cache);
6997
6998         good = list_entry(rec->dups.next, struct extent_record, list);
6999         list_del_init(&good->list);
7000         INIT_LIST_HEAD(&good->backrefs);
7001         INIT_LIST_HEAD(&good->dups);
7002         good->cache.start = good->start;
7003         good->cache.size = good->nr;
7004         good->content_checked = 0;
7005         good->owner_ref_checked = 0;
7006         good->num_duplicates = 0;
7007         good->refs = rec->refs;
7008         list_splice_init(&rec->backrefs, &good->backrefs);
7009         while (1) {
7010                 cache = lookup_cache_extent(extent_cache, good->start,
7011                                             good->nr);
7012                 if (!cache)
7013                         break;
7014                 tmp = container_of(cache, struct extent_record, cache);
7015
7016                 /*
7017                  * If we find another overlapping extent and it's found_rec is
7018                  * set then it's a duplicate and we need to try and delete
7019                  * something.
7020                  */
7021                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7022                         if (list_empty(&good->list))
7023                                 list_add_tail(&good->list,
7024                                               &duplicate_extents);
7025                         good->num_duplicates += tmp->num_duplicates + 1;
7026                         list_splice_init(&tmp->dups, &good->dups);
7027                         list_del_init(&tmp->list);
7028                         list_add_tail(&tmp->list, &good->dups);
7029                         remove_cache_extent(extent_cache, &tmp->cache);
7030                         continue;
7031                 }
7032
7033                 /*
7034                  * Ok we have another non extent item backed extent rec, so lets
7035                  * just add it to this extent and carry on like we did above.
7036                  */
7037                 good->refs += tmp->refs;
7038                 list_splice_init(&tmp->backrefs, &good->backrefs);
7039                 remove_cache_extent(extent_cache, &tmp->cache);
7040                 free(tmp);
7041         }
7042         ret = insert_cache_extent(extent_cache, &good->cache);
7043         BUG_ON(ret);
7044         free(rec);
7045         return good->num_duplicates ? 0 : 1;
7046 }
7047
7048 static int delete_duplicate_records(struct btrfs_root *root,
7049                                     struct extent_record *rec)
7050 {
7051         struct btrfs_trans_handle *trans;
7052         LIST_HEAD(delete_list);
7053         struct btrfs_path *path;
7054         struct extent_record *tmp, *good, *n;
7055         int nr_del = 0;
7056         int ret = 0, err;
7057         struct btrfs_key key;
7058
7059         path = btrfs_alloc_path();
7060         if (!path) {
7061                 ret = -ENOMEM;
7062                 goto out;
7063         }
7064
7065         good = rec;
7066         /* Find the record that covers all of the duplicates. */
7067         list_for_each_entry(tmp, &rec->dups, list) {
7068                 if (good->start < tmp->start)
7069                         continue;
7070                 if (good->nr > tmp->nr)
7071                         continue;
7072
7073                 if (tmp->start + tmp->nr < good->start + good->nr) {
7074                         fprintf(stderr, "Ok we have overlapping extents that "
7075                                 "aren't completely covered by eachother, this "
7076                                 "is going to require more careful thought.  "
7077                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7078                                 tmp->start, tmp->nr, good->start, good->nr);
7079                         abort();
7080                 }
7081                 good = tmp;
7082         }
7083
7084         if (good != rec)
7085                 list_add_tail(&rec->list, &delete_list);
7086
7087         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7088                 if (tmp == good)
7089                         continue;
7090                 list_move_tail(&tmp->list, &delete_list);
7091         }
7092
7093         root = root->fs_info->extent_root;
7094         trans = btrfs_start_transaction(root, 1);
7095         if (IS_ERR(trans)) {
7096                 ret = PTR_ERR(trans);
7097                 goto out;
7098         }
7099
7100         list_for_each_entry(tmp, &delete_list, list) {
7101                 if (tmp->found_rec == 0)
7102                         continue;
7103                 key.objectid = tmp->start;
7104                 key.type = BTRFS_EXTENT_ITEM_KEY;
7105                 key.offset = tmp->nr;
7106
7107                 /* Shouldn't happen but just in case */
7108                 if (tmp->metadata) {
7109                         fprintf(stderr, "Well this shouldn't happen, extent "
7110                                 "record overlaps but is metadata? "
7111                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7112                         abort();
7113                 }
7114
7115                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7116                 if (ret) {
7117                         if (ret > 0)
7118                                 ret = -EINVAL;
7119                         break;
7120                 }
7121                 ret = btrfs_del_item(trans, root, path);
7122                 if (ret)
7123                         break;
7124                 btrfs_release_path(path);
7125                 nr_del++;
7126         }
7127         err = btrfs_commit_transaction(trans, root);
7128         if (err && !ret)
7129                 ret = err;
7130 out:
7131         while (!list_empty(&delete_list)) {
7132                 tmp = list_entry(delete_list.next, struct extent_record, list);
7133                 list_del_init(&tmp->list);
7134                 if (tmp == rec)
7135                         continue;
7136                 free(tmp);
7137         }
7138
7139         while (!list_empty(&rec->dups)) {
7140                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7141                 list_del_init(&tmp->list);
7142                 free(tmp);
7143         }
7144
7145         btrfs_free_path(path);
7146
7147         if (!ret && !nr_del)
7148                 rec->num_duplicates = 0;
7149
7150         return ret ? ret : nr_del;
7151 }
7152
7153 static int find_possible_backrefs(struct btrfs_fs_info *info,
7154                                   struct btrfs_path *path,
7155                                   struct cache_tree *extent_cache,
7156                                   struct extent_record *rec)
7157 {
7158         struct btrfs_root *root;
7159         struct extent_backref *back;
7160         struct data_backref *dback;
7161         struct cache_extent *cache;
7162         struct btrfs_file_extent_item *fi;
7163         struct btrfs_key key;
7164         u64 bytenr, bytes;
7165         int ret;
7166
7167         list_for_each_entry(back, &rec->backrefs, list) {
7168                 /* Don't care about full backrefs (poor unloved backrefs) */
7169                 if (back->full_backref || !back->is_data)
7170                         continue;
7171
7172                 dback = (struct data_backref *)back;
7173
7174                 /* We found this one, we don't need to do a lookup */
7175                 if (dback->found_ref)
7176                         continue;
7177
7178                 key.objectid = dback->root;
7179                 key.type = BTRFS_ROOT_ITEM_KEY;
7180                 key.offset = (u64)-1;
7181
7182                 root = btrfs_read_fs_root(info, &key);
7183
7184                 /* No root, definitely a bad ref, skip */
7185                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7186                         continue;
7187                 /* Other err, exit */
7188                 if (IS_ERR(root))
7189                         return PTR_ERR(root);
7190
7191                 key.objectid = dback->owner;
7192                 key.type = BTRFS_EXTENT_DATA_KEY;
7193                 key.offset = dback->offset;
7194                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7195                 if (ret) {
7196                         btrfs_release_path(path);
7197                         if (ret < 0)
7198                                 return ret;
7199                         /* Didn't find it, we can carry on */
7200                         ret = 0;
7201                         continue;
7202                 }
7203
7204                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7205                                     struct btrfs_file_extent_item);
7206                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7207                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7208                 btrfs_release_path(path);
7209                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7210                 if (cache) {
7211                         struct extent_record *tmp;
7212                         tmp = container_of(cache, struct extent_record, cache);
7213
7214                         /*
7215                          * If we found an extent record for the bytenr for this
7216                          * particular backref then we can't add it to our
7217                          * current extent record.  We only want to add backrefs
7218                          * that don't have a corresponding extent item in the
7219                          * extent tree since they likely belong to this record
7220                          * and we need to fix it if it doesn't match bytenrs.
7221                          */
7222                         if  (tmp->found_rec)
7223                                 continue;
7224                 }
7225
7226                 dback->found_ref += 1;
7227                 dback->disk_bytenr = bytenr;
7228                 dback->bytes = bytes;
7229
7230                 /*
7231                  * Set this so the verify backref code knows not to trust the
7232                  * values in this backref.
7233                  */
7234                 back->broken = 1;
7235         }
7236
7237         return 0;
7238 }
7239
7240 /*
7241  * Record orphan data ref into corresponding root.
7242  *
7243  * Return 0 if the extent item contains data ref and recorded.
7244  * Return 1 if the extent item contains no useful data ref
7245  *   On that case, it may contains only shared_dataref or metadata backref
7246  *   or the file extent exists(this should be handled by the extent bytenr
7247  *   recovery routine)
7248  * Return <0 if something goes wrong.
7249  */
7250 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7251                                       struct extent_record *rec)
7252 {
7253         struct btrfs_key key;
7254         struct btrfs_root *dest_root;
7255         struct extent_backref *back;
7256         struct data_backref *dback;
7257         struct orphan_data_extent *orphan;
7258         struct btrfs_path *path;
7259         int recorded_data_ref = 0;
7260         int ret = 0;
7261
7262         if (rec->metadata)
7263                 return 1;
7264         path = btrfs_alloc_path();
7265         if (!path)
7266                 return -ENOMEM;
7267         list_for_each_entry(back, &rec->backrefs, list) {
7268                 if (back->full_backref || !back->is_data ||
7269                     !back->found_extent_tree)
7270                         continue;
7271                 dback = (struct data_backref *)back;
7272                 if (dback->found_ref)
7273                         continue;
7274                 key.objectid = dback->root;
7275                 key.type = BTRFS_ROOT_ITEM_KEY;
7276                 key.offset = (u64)-1;
7277
7278                 dest_root = btrfs_read_fs_root(fs_info, &key);
7279
7280                 /* For non-exist root we just skip it */
7281                 if (IS_ERR(dest_root) || !dest_root)
7282                         continue;
7283
7284                 key.objectid = dback->owner;
7285                 key.type = BTRFS_EXTENT_DATA_KEY;
7286                 key.offset = dback->offset;
7287
7288                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7289                 /*
7290                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7291                  * we need to record it for inode/file extent rebuild.
7292                  * For ret > 0, we record it only for file extent rebuild.
7293                  * For ret == 0, the file extent exists but only bytenr
7294                  * mismatch, let the original bytenr fix routine to handle,
7295                  * don't record it.
7296                  */
7297                 if (ret == 0)
7298                         continue;
7299                 ret = 0;
7300                 orphan = malloc(sizeof(*orphan));
7301                 if (!orphan) {
7302                         ret = -ENOMEM;
7303                         goto out;
7304                 }
7305                 INIT_LIST_HEAD(&orphan->list);
7306                 orphan->root = dback->root;
7307                 orphan->objectid = dback->owner;
7308                 orphan->offset = dback->offset;
7309                 orphan->disk_bytenr = rec->cache.start;
7310                 orphan->disk_len = rec->cache.size;
7311                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7312                 recorded_data_ref = 1;
7313         }
7314 out:
7315         btrfs_free_path(path);
7316         if (!ret)
7317                 return !recorded_data_ref;
7318         else
7319                 return ret;
7320 }
7321
7322 /*
7323  * when an incorrect extent item is found, this will delete
7324  * all of the existing entries for it and recreate them
7325  * based on what the tree scan found.
7326  */
7327 static int fixup_extent_refs(struct btrfs_fs_info *info,
7328                              struct cache_tree *extent_cache,
7329                              struct extent_record *rec)
7330 {
7331         struct btrfs_trans_handle *trans = NULL;
7332         int ret;
7333         struct btrfs_path *path;
7334         struct list_head *cur = rec->backrefs.next;
7335         struct cache_extent *cache;
7336         struct extent_backref *back;
7337         int allocated = 0;
7338         u64 flags = 0;
7339
7340         if (rec->flag_block_full_backref)
7341                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7342
7343         path = btrfs_alloc_path();
7344         if (!path)
7345                 return -ENOMEM;
7346
7347         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7348                 /*
7349                  * Sometimes the backrefs themselves are so broken they don't
7350                  * get attached to any meaningful rec, so first go back and
7351                  * check any of our backrefs that we couldn't find and throw
7352                  * them into the list if we find the backref so that
7353                  * verify_backrefs can figure out what to do.
7354                  */
7355                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7356                 if (ret < 0)
7357                         goto out;
7358         }
7359
7360         /* step one, make sure all of the backrefs agree */
7361         ret = verify_backrefs(info, path, rec);
7362         if (ret < 0)
7363                 goto out;
7364
7365         trans = btrfs_start_transaction(info->extent_root, 1);
7366         if (IS_ERR(trans)) {
7367                 ret = PTR_ERR(trans);
7368                 goto out;
7369         }
7370
7371         /* step two, delete all the existing records */
7372         ret = delete_extent_records(trans, info->extent_root, path,
7373                                     rec->start, rec->max_size);
7374
7375         if (ret < 0)
7376                 goto out;
7377
7378         /* was this block corrupt?  If so, don't add references to it */
7379         cache = lookup_cache_extent(info->corrupt_blocks,
7380                                     rec->start, rec->max_size);
7381         if (cache) {
7382                 ret = 0;
7383                 goto out;
7384         }
7385
7386         /* step three, recreate all the refs we did find */
7387         while(cur != &rec->backrefs) {
7388                 back = list_entry(cur, struct extent_backref, list);
7389                 cur = cur->next;
7390
7391                 /*
7392                  * if we didn't find any references, don't create a
7393                  * new extent record
7394                  */
7395                 if (!back->found_ref)
7396                         continue;
7397
7398                 rec->bad_full_backref = 0;
7399                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7400                 allocated = 1;
7401
7402                 if (ret)
7403                         goto out;
7404         }
7405 out:
7406         if (trans) {
7407                 int err = btrfs_commit_transaction(trans, info->extent_root);
7408                 if (!ret)
7409                         ret = err;
7410         }
7411
7412         btrfs_free_path(path);
7413         return ret;
7414 }
7415
7416 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7417                               struct extent_record *rec)
7418 {
7419         struct btrfs_trans_handle *trans;
7420         struct btrfs_root *root = fs_info->extent_root;
7421         struct btrfs_path *path;
7422         struct btrfs_extent_item *ei;
7423         struct btrfs_key key;
7424         u64 flags;
7425         int ret = 0;
7426
7427         key.objectid = rec->start;
7428         if (rec->metadata) {
7429                 key.type = BTRFS_METADATA_ITEM_KEY;
7430                 key.offset = rec->info_level;
7431         } else {
7432                 key.type = BTRFS_EXTENT_ITEM_KEY;
7433                 key.offset = rec->max_size;
7434         }
7435
7436         path = btrfs_alloc_path();
7437         if (!path)
7438                 return -ENOMEM;
7439
7440         trans = btrfs_start_transaction(root, 0);
7441         if (IS_ERR(trans)) {
7442                 btrfs_free_path(path);
7443                 return PTR_ERR(trans);
7444         }
7445
7446         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7447         if (ret < 0) {
7448                 btrfs_free_path(path);
7449                 btrfs_commit_transaction(trans, root);
7450                 return ret;
7451         } else if (ret) {
7452                 fprintf(stderr, "Didn't find extent for %llu\n",
7453                         (unsigned long long)rec->start);
7454                 btrfs_free_path(path);
7455                 btrfs_commit_transaction(trans, root);
7456                 return -ENOENT;
7457         }
7458
7459         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7460                             struct btrfs_extent_item);
7461         flags = btrfs_extent_flags(path->nodes[0], ei);
7462         if (rec->flag_block_full_backref) {
7463                 fprintf(stderr, "setting full backref on %llu\n",
7464                         (unsigned long long)key.objectid);
7465                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7466         } else {
7467                 fprintf(stderr, "clearing full backref on %llu\n",
7468                         (unsigned long long)key.objectid);
7469                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7470         }
7471         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7472         btrfs_mark_buffer_dirty(path->nodes[0]);
7473         btrfs_free_path(path);
7474         return btrfs_commit_transaction(trans, root);
7475 }
7476
7477 /* right now we only prune from the extent allocation tree */
7478 static int prune_one_block(struct btrfs_trans_handle *trans,
7479                            struct btrfs_fs_info *info,
7480                            struct btrfs_corrupt_block *corrupt)
7481 {
7482         int ret;
7483         struct btrfs_path path;
7484         struct extent_buffer *eb;
7485         u64 found;
7486         int slot;
7487         int nritems;
7488         int level = corrupt->level + 1;
7489
7490         btrfs_init_path(&path);
7491 again:
7492         /* we want to stop at the parent to our busted block */
7493         path.lowest_level = level;
7494
7495         ret = btrfs_search_slot(trans, info->extent_root,
7496                                 &corrupt->key, &path, -1, 1);
7497
7498         if (ret < 0)
7499                 goto out;
7500
7501         eb = path.nodes[level];
7502         if (!eb) {
7503                 ret = -ENOENT;
7504                 goto out;
7505         }
7506
7507         /*
7508          * hopefully the search gave us the block we want to prune,
7509          * lets try that first
7510          */
7511         slot = path.slots[level];
7512         found =  btrfs_node_blockptr(eb, slot);
7513         if (found == corrupt->cache.start)
7514                 goto del_ptr;
7515
7516         nritems = btrfs_header_nritems(eb);
7517
7518         /* the search failed, lets scan this node and hope we find it */
7519         for (slot = 0; slot < nritems; slot++) {
7520                 found =  btrfs_node_blockptr(eb, slot);
7521                 if (found == corrupt->cache.start)
7522                         goto del_ptr;
7523         }
7524         /*
7525          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7526          * to this block
7527          */
7528         if (eb == info->extent_root->node) {
7529                 ret = -ENOENT;
7530                 goto out;
7531         } else {
7532                 level++;
7533                 btrfs_release_path(&path);
7534                 goto again;
7535         }
7536
7537 del_ptr:
7538         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7539         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7540
7541 out:
7542         btrfs_release_path(&path);
7543         return ret;
7544 }
7545
7546 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7547 {
7548         struct btrfs_trans_handle *trans = NULL;
7549         struct cache_extent *cache;
7550         struct btrfs_corrupt_block *corrupt;
7551
7552         while (1) {
7553                 cache = search_cache_extent(info->corrupt_blocks, 0);
7554                 if (!cache)
7555                         break;
7556                 if (!trans) {
7557                         trans = btrfs_start_transaction(info->extent_root, 1);
7558                         if (IS_ERR(trans))
7559                                 return PTR_ERR(trans);
7560                 }
7561                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7562                 prune_one_block(trans, info, corrupt);
7563                 remove_cache_extent(info->corrupt_blocks, cache);
7564         }
7565         if (trans)
7566                 return btrfs_commit_transaction(trans, info->extent_root);
7567         return 0;
7568 }
7569
7570 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7571 {
7572         struct btrfs_block_group_cache *cache;
7573         u64 start, end;
7574         int ret;
7575
7576         while (1) {
7577                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7578                                             &start, &end, EXTENT_DIRTY);
7579                 if (ret)
7580                         break;
7581                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7582                                    GFP_NOFS);
7583         }
7584
7585         start = 0;
7586         while (1) {
7587                 cache = btrfs_lookup_first_block_group(fs_info, start);
7588                 if (!cache)
7589                         break;
7590                 if (cache->cached)
7591                         cache->cached = 0;
7592                 start = cache->key.objectid + cache->key.offset;
7593         }
7594 }
7595
7596 static int check_extent_refs(struct btrfs_root *root,
7597                              struct cache_tree *extent_cache)
7598 {
7599         struct extent_record *rec;
7600         struct cache_extent *cache;
7601         int err = 0;
7602         int ret = 0;
7603         int fixed = 0;
7604         int had_dups = 0;
7605         int recorded = 0;
7606
7607         if (repair) {
7608                 /*
7609                  * if we're doing a repair, we have to make sure
7610                  * we don't allocate from the problem extents.
7611                  * In the worst case, this will be all the
7612                  * extents in the FS
7613                  */
7614                 cache = search_cache_extent(extent_cache, 0);
7615                 while(cache) {
7616                         rec = container_of(cache, struct extent_record, cache);
7617                         set_extent_dirty(root->fs_info->excluded_extents,
7618                                          rec->start,
7619                                          rec->start + rec->max_size - 1,
7620                                          GFP_NOFS);
7621                         cache = next_cache_extent(cache);
7622                 }
7623
7624                 /* pin down all the corrupted blocks too */
7625                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7626                 while(cache) {
7627                         set_extent_dirty(root->fs_info->excluded_extents,
7628                                          cache->start,
7629                                          cache->start + cache->size - 1,
7630                                          GFP_NOFS);
7631                         cache = next_cache_extent(cache);
7632                 }
7633                 prune_corrupt_blocks(root->fs_info);
7634                 reset_cached_block_groups(root->fs_info);
7635         }
7636
7637         reset_cached_block_groups(root->fs_info);
7638
7639         /*
7640          * We need to delete any duplicate entries we find first otherwise we
7641          * could mess up the extent tree when we have backrefs that actually
7642          * belong to a different extent item and not the weird duplicate one.
7643          */
7644         while (repair && !list_empty(&duplicate_extents)) {
7645                 rec = list_entry(duplicate_extents.next, struct extent_record,
7646                                  list);
7647                 list_del_init(&rec->list);
7648
7649                 /* Sometimes we can find a backref before we find an actual
7650                  * extent, so we need to process it a little bit to see if there
7651                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7652                  * if this is a backref screwup.  If we need to delete stuff
7653                  * process_duplicates() will return 0, otherwise it will return
7654                  * 1 and we
7655                  */
7656                 if (process_duplicates(root, extent_cache, rec))
7657                         continue;
7658                 ret = delete_duplicate_records(root, rec);
7659                 if (ret < 0)
7660                         return ret;
7661                 /*
7662                  * delete_duplicate_records will return the number of entries
7663                  * deleted, so if it's greater than 0 then we know we actually
7664                  * did something and we need to remove.
7665                  */
7666                 if (ret)
7667                         had_dups = 1;
7668         }
7669
7670         if (had_dups)
7671                 return -EAGAIN;
7672
7673         while(1) {
7674                 int cur_err = 0;
7675
7676                 fixed = 0;
7677                 recorded = 0;
7678                 cache = search_cache_extent(extent_cache, 0);
7679                 if (!cache)
7680                         break;
7681                 rec = container_of(cache, struct extent_record, cache);
7682                 if (rec->num_duplicates) {
7683                         fprintf(stderr, "extent item %llu has multiple extent "
7684                                 "items\n", (unsigned long long)rec->start);
7685                         err = 1;
7686                         cur_err = 1;
7687                 }
7688
7689                 if (rec->refs != rec->extent_item_refs) {
7690                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7691                                 (unsigned long long)rec->start,
7692                                 (unsigned long long)rec->nr);
7693                         fprintf(stderr, "extent item %llu, found %llu\n",
7694                                 (unsigned long long)rec->extent_item_refs,
7695                                 (unsigned long long)rec->refs);
7696                         ret = record_orphan_data_extents(root->fs_info, rec);
7697                         if (ret < 0)
7698                                 goto repair_abort;
7699                         if (ret == 0) {
7700                                 recorded = 1;
7701                         } else {
7702                                 /*
7703                                  * we can't use the extent to repair file
7704                                  * extent, let the fallback method handle it.
7705                                  */
7706                                 if (!fixed && repair) {
7707                                         ret = fixup_extent_refs(
7708                                                         root->fs_info,
7709                                                         extent_cache, rec);
7710                                         if (ret)
7711                                                 goto repair_abort;
7712                                         fixed = 1;
7713                                 }
7714                         }
7715                         err = 1;
7716                         cur_err = 1;
7717                 }
7718                 if (all_backpointers_checked(rec, 1)) {
7719                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7720                                 (unsigned long long)rec->start,
7721                                 (unsigned long long)rec->nr);
7722
7723                         if (!fixed && !recorded && repair) {
7724                                 ret = fixup_extent_refs(root->fs_info,
7725                                                         extent_cache, rec);
7726                                 if (ret)
7727                                         goto repair_abort;
7728                                 fixed = 1;
7729                         }
7730                         cur_err = 1;
7731                         err = 1;
7732                 }
7733                 if (!rec->owner_ref_checked) {
7734                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7735                                 (unsigned long long)rec->start,
7736                                 (unsigned long long)rec->nr);
7737                         if (!fixed && !recorded && repair) {
7738                                 ret = fixup_extent_refs(root->fs_info,
7739                                                         extent_cache, rec);
7740                                 if (ret)
7741                                         goto repair_abort;
7742                                 fixed = 1;
7743                         }
7744                         err = 1;
7745                         cur_err = 1;
7746                 }
7747                 if (rec->bad_full_backref) {
7748                         fprintf(stderr, "bad full backref, on [%llu]\n",
7749                                 (unsigned long long)rec->start);
7750                         if (repair) {
7751                                 ret = fixup_extent_flags(root->fs_info, rec);
7752                                 if (ret)
7753                                         goto repair_abort;
7754                                 fixed = 1;
7755                         }
7756                         err = 1;
7757                         cur_err = 1;
7758                 }
7759                 /*
7760                  * Although it's not a extent ref's problem, we reuse this
7761                  * routine for error reporting.
7762                  * No repair function yet.
7763                  */
7764                 if (rec->crossing_stripes) {
7765                         fprintf(stderr,
7766                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7767                                 rec->start, rec->start + rec->max_size);
7768                         err = 1;
7769                         cur_err = 1;
7770                 }
7771
7772                 if (rec->wrong_chunk_type) {
7773                         fprintf(stderr,
7774                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7775                                 rec->start, rec->start + rec->max_size);
7776                         err = 1;
7777                         cur_err = 1;
7778                 }
7779
7780                 remove_cache_extent(extent_cache, cache);
7781                 free_all_extent_backrefs(rec);
7782                 if (!init_extent_tree && repair && (!cur_err || fixed))
7783                         clear_extent_dirty(root->fs_info->excluded_extents,
7784                                            rec->start,
7785                                            rec->start + rec->max_size - 1,
7786                                            GFP_NOFS);
7787                 free(rec);
7788         }
7789 repair_abort:
7790         if (repair) {
7791                 if (ret && ret != -EAGAIN) {
7792                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7793                         exit(1);
7794                 } else if (!ret) {
7795                         struct btrfs_trans_handle *trans;
7796
7797                         root = root->fs_info->extent_root;
7798                         trans = btrfs_start_transaction(root, 1);
7799                         if (IS_ERR(trans)) {
7800                                 ret = PTR_ERR(trans);
7801                                 goto repair_abort;
7802                         }
7803
7804                         btrfs_fix_block_accounting(trans, root);
7805                         ret = btrfs_commit_transaction(trans, root);
7806                         if (ret)
7807                                 goto repair_abort;
7808                 }
7809                 if (err)
7810                         fprintf(stderr, "repaired damaged extent references\n");
7811                 return ret;
7812         }
7813         return err;
7814 }
7815
7816 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7817 {
7818         u64 stripe_size;
7819
7820         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7821                 stripe_size = length;
7822                 stripe_size /= num_stripes;
7823         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7824                 stripe_size = length * 2;
7825                 stripe_size /= num_stripes;
7826         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7827                 stripe_size = length;
7828                 stripe_size /= (num_stripes - 1);
7829         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7830                 stripe_size = length;
7831                 stripe_size /= (num_stripes - 2);
7832         } else {
7833                 stripe_size = length;
7834         }
7835         return stripe_size;
7836 }
7837
7838 /*
7839  * Check the chunk with its block group/dev list ref:
7840  * Return 0 if all refs seems valid.
7841  * Return 1 if part of refs seems valid, need later check for rebuild ref
7842  * like missing block group and needs to search extent tree to rebuild them.
7843  * Return -1 if essential refs are missing and unable to rebuild.
7844  */
7845 static int check_chunk_refs(struct chunk_record *chunk_rec,
7846                             struct block_group_tree *block_group_cache,
7847                             struct device_extent_tree *dev_extent_cache,
7848                             int silent)
7849 {
7850         struct cache_extent *block_group_item;
7851         struct block_group_record *block_group_rec;
7852         struct cache_extent *dev_extent_item;
7853         struct device_extent_record *dev_extent_rec;
7854         u64 devid;
7855         u64 offset;
7856         u64 length;
7857         int metadump_v2 = 0;
7858         int i;
7859         int ret = 0;
7860
7861         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7862                                                chunk_rec->offset,
7863                                                chunk_rec->length);
7864         if (block_group_item) {
7865                 block_group_rec = container_of(block_group_item,
7866                                                struct block_group_record,
7867                                                cache);
7868                 if (chunk_rec->length != block_group_rec->offset ||
7869                     chunk_rec->offset != block_group_rec->objectid ||
7870                     (!metadump_v2 &&
7871                      chunk_rec->type_flags != block_group_rec->flags)) {
7872                         if (!silent)
7873                                 fprintf(stderr,
7874                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7875                                         chunk_rec->objectid,
7876                                         chunk_rec->type,
7877                                         chunk_rec->offset,
7878                                         chunk_rec->length,
7879                                         chunk_rec->offset,
7880                                         chunk_rec->type_flags,
7881                                         block_group_rec->objectid,
7882                                         block_group_rec->type,
7883                                         block_group_rec->offset,
7884                                         block_group_rec->offset,
7885                                         block_group_rec->objectid,
7886                                         block_group_rec->flags);
7887                         ret = -1;
7888                 } else {
7889                         list_del_init(&block_group_rec->list);
7890                         chunk_rec->bg_rec = block_group_rec;
7891                 }
7892         } else {
7893                 if (!silent)
7894                         fprintf(stderr,
7895                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7896                                 chunk_rec->objectid,
7897                                 chunk_rec->type,
7898                                 chunk_rec->offset,
7899                                 chunk_rec->length,
7900                                 chunk_rec->offset,
7901                                 chunk_rec->type_flags);
7902                 ret = 1;
7903         }
7904
7905         if (metadump_v2)
7906                 return ret;
7907
7908         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7909                                     chunk_rec->num_stripes);
7910         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7911                 devid = chunk_rec->stripes[i].devid;
7912                 offset = chunk_rec->stripes[i].offset;
7913                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7914                                                        devid, offset, length);
7915                 if (dev_extent_item) {
7916                         dev_extent_rec = container_of(dev_extent_item,
7917                                                 struct device_extent_record,
7918                                                 cache);
7919                         if (dev_extent_rec->objectid != devid ||
7920                             dev_extent_rec->offset != offset ||
7921                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7922                             dev_extent_rec->length != length) {
7923                                 if (!silent)
7924                                         fprintf(stderr,
7925                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7926                                                 chunk_rec->objectid,
7927                                                 chunk_rec->type,
7928                                                 chunk_rec->offset,
7929                                                 chunk_rec->stripes[i].devid,
7930                                                 chunk_rec->stripes[i].offset,
7931                                                 dev_extent_rec->objectid,
7932                                                 dev_extent_rec->offset,
7933                                                 dev_extent_rec->length);
7934                                 ret = -1;
7935                         } else {
7936                                 list_move(&dev_extent_rec->chunk_list,
7937                                           &chunk_rec->dextents);
7938                         }
7939                 } else {
7940                         if (!silent)
7941                                 fprintf(stderr,
7942                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7943                                         chunk_rec->objectid,
7944                                         chunk_rec->type,
7945                                         chunk_rec->offset,
7946                                         chunk_rec->stripes[i].devid,
7947                                         chunk_rec->stripes[i].offset);
7948                         ret = -1;
7949                 }
7950         }
7951         return ret;
7952 }
7953
7954 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7955 int check_chunks(struct cache_tree *chunk_cache,
7956                  struct block_group_tree *block_group_cache,
7957                  struct device_extent_tree *dev_extent_cache,
7958                  struct list_head *good, struct list_head *bad,
7959                  struct list_head *rebuild, int silent)
7960 {
7961         struct cache_extent *chunk_item;
7962         struct chunk_record *chunk_rec;
7963         struct block_group_record *bg_rec;
7964         struct device_extent_record *dext_rec;
7965         int err;
7966         int ret = 0;
7967
7968         chunk_item = first_cache_extent(chunk_cache);
7969         while (chunk_item) {
7970                 chunk_rec = container_of(chunk_item, struct chunk_record,
7971                                          cache);
7972                 err = check_chunk_refs(chunk_rec, block_group_cache,
7973                                        dev_extent_cache, silent);
7974                 if (err < 0)
7975                         ret = err;
7976                 if (err == 0 && good)
7977                         list_add_tail(&chunk_rec->list, good);
7978                 if (err > 0 && rebuild)
7979                         list_add_tail(&chunk_rec->list, rebuild);
7980                 if (err < 0 && bad)
7981                         list_add_tail(&chunk_rec->list, bad);
7982                 chunk_item = next_cache_extent(chunk_item);
7983         }
7984
7985         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
7986                 if (!silent)
7987                         fprintf(stderr,
7988                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
7989                                 bg_rec->objectid,
7990                                 bg_rec->offset,
7991                                 bg_rec->flags);
7992                 if (!ret)
7993                         ret = 1;
7994         }
7995
7996         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
7997                             chunk_list) {
7998                 if (!silent)
7999                         fprintf(stderr,
8000                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8001                                 dext_rec->objectid,
8002                                 dext_rec->offset,
8003                                 dext_rec->length);
8004                 if (!ret)
8005                         ret = 1;
8006         }
8007         return ret;
8008 }
8009
8010
8011 static int check_device_used(struct device_record *dev_rec,
8012                              struct device_extent_tree *dext_cache)
8013 {
8014         struct cache_extent *cache;
8015         struct device_extent_record *dev_extent_rec;
8016         u64 total_byte = 0;
8017
8018         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8019         while (cache) {
8020                 dev_extent_rec = container_of(cache,
8021                                               struct device_extent_record,
8022                                               cache);
8023                 if (dev_extent_rec->objectid != dev_rec->devid)
8024                         break;
8025
8026                 list_del_init(&dev_extent_rec->device_list);
8027                 total_byte += dev_extent_rec->length;
8028                 cache = next_cache_extent(cache);
8029         }
8030
8031         if (total_byte != dev_rec->byte_used) {
8032                 fprintf(stderr,
8033                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8034                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8035                         dev_rec->type, dev_rec->offset);
8036                 return -1;
8037         } else {
8038                 return 0;
8039         }
8040 }
8041
8042 /* check btrfs_dev_item -> btrfs_dev_extent */
8043 static int check_devices(struct rb_root *dev_cache,
8044                          struct device_extent_tree *dev_extent_cache)
8045 {
8046         struct rb_node *dev_node;
8047         struct device_record *dev_rec;
8048         struct device_extent_record *dext_rec;
8049         int err;
8050         int ret = 0;
8051
8052         dev_node = rb_first(dev_cache);
8053         while (dev_node) {
8054                 dev_rec = container_of(dev_node, struct device_record, node);
8055                 err = check_device_used(dev_rec, dev_extent_cache);
8056                 if (err)
8057                         ret = err;
8058
8059                 dev_node = rb_next(dev_node);
8060         }
8061         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8062                             device_list) {
8063                 fprintf(stderr,
8064                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8065                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8066                 if (!ret)
8067                         ret = 1;
8068         }
8069         return ret;
8070 }
8071
8072 static int add_root_item_to_list(struct list_head *head,
8073                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8074                                   u8 level, u8 drop_level,
8075                                   int level_size, struct btrfs_key *drop_key)
8076 {
8077
8078         struct root_item_record *ri_rec;
8079         ri_rec = malloc(sizeof(*ri_rec));
8080         if (!ri_rec)
8081                 return -ENOMEM;
8082         ri_rec->bytenr = bytenr;
8083         ri_rec->objectid = objectid;
8084         ri_rec->level = level;
8085         ri_rec->level_size = level_size;
8086         ri_rec->drop_level = drop_level;
8087         ri_rec->last_snapshot = last_snapshot;
8088         if (drop_key)
8089                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8090         list_add_tail(&ri_rec->list, head);
8091
8092         return 0;
8093 }
8094
8095 static void free_root_item_list(struct list_head *list)
8096 {
8097         struct root_item_record *ri_rec;
8098
8099         while (!list_empty(list)) {
8100                 ri_rec = list_first_entry(list, struct root_item_record,
8101                                           list);
8102                 list_del_init(&ri_rec->list);
8103                 free(ri_rec);
8104         }
8105 }
8106
8107 static int deal_root_from_list(struct list_head *list,
8108                                struct btrfs_root *root,
8109                                struct block_info *bits,
8110                                int bits_nr,
8111                                struct cache_tree *pending,
8112                                struct cache_tree *seen,
8113                                struct cache_tree *reada,
8114                                struct cache_tree *nodes,
8115                                struct cache_tree *extent_cache,
8116                                struct cache_tree *chunk_cache,
8117                                struct rb_root *dev_cache,
8118                                struct block_group_tree *block_group_cache,
8119                                struct device_extent_tree *dev_extent_cache)
8120 {
8121         int ret = 0;
8122         u64 last;
8123
8124         while (!list_empty(list)) {
8125                 struct root_item_record *rec;
8126                 struct extent_buffer *buf;
8127                 rec = list_entry(list->next,
8128                                  struct root_item_record, list);
8129                 last = 0;
8130                 buf = read_tree_block(root->fs_info->tree_root,
8131                                       rec->bytenr, rec->level_size, 0);
8132                 if (!extent_buffer_uptodate(buf)) {
8133                         free_extent_buffer(buf);
8134                         ret = -EIO;
8135                         break;
8136                 }
8137                 add_root_to_pending(buf, extent_cache, pending,
8138                                     seen, nodes, rec->objectid);
8139                 /*
8140                  * To rebuild extent tree, we need deal with snapshot
8141                  * one by one, otherwise we deal with node firstly which
8142                  * can maximize readahead.
8143                  */
8144                 while (1) {
8145                         ret = run_next_block(root, bits, bits_nr, &last,
8146                                              pending, seen, reada, nodes,
8147                                              extent_cache, chunk_cache,
8148                                              dev_cache, block_group_cache,
8149                                              dev_extent_cache, rec);
8150                         if (ret != 0)
8151                                 break;
8152                 }
8153                 free_extent_buffer(buf);
8154                 list_del(&rec->list);
8155                 free(rec);
8156                 if (ret < 0)
8157                         break;
8158         }
8159         while (ret >= 0) {
8160                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8161                                      reada, nodes, extent_cache, chunk_cache,
8162                                      dev_cache, block_group_cache,
8163                                      dev_extent_cache, NULL);
8164                 if (ret != 0) {
8165                         if (ret > 0)
8166                                 ret = 0;
8167                         break;
8168                 }
8169         }
8170         return ret;
8171 }
8172
8173 static int check_chunks_and_extents(struct btrfs_root *root)
8174 {
8175         struct rb_root dev_cache;
8176         struct cache_tree chunk_cache;
8177         struct block_group_tree block_group_cache;
8178         struct device_extent_tree dev_extent_cache;
8179         struct cache_tree extent_cache;
8180         struct cache_tree seen;
8181         struct cache_tree pending;
8182         struct cache_tree reada;
8183         struct cache_tree nodes;
8184         struct extent_io_tree excluded_extents;
8185         struct cache_tree corrupt_blocks;
8186         struct btrfs_path path;
8187         struct btrfs_key key;
8188         struct btrfs_key found_key;
8189         int ret, err = 0;
8190         struct block_info *bits;
8191         int bits_nr;
8192         struct extent_buffer *leaf;
8193         int slot;
8194         struct btrfs_root_item ri;
8195         struct list_head dropping_trees;
8196         struct list_head normal_trees;
8197         struct btrfs_root *root1;
8198         u64 objectid;
8199         u32 level_size;
8200         u8 level;
8201
8202         dev_cache = RB_ROOT;
8203         cache_tree_init(&chunk_cache);
8204         block_group_tree_init(&block_group_cache);
8205         device_extent_tree_init(&dev_extent_cache);
8206
8207         cache_tree_init(&extent_cache);
8208         cache_tree_init(&seen);
8209         cache_tree_init(&pending);
8210         cache_tree_init(&nodes);
8211         cache_tree_init(&reada);
8212         cache_tree_init(&corrupt_blocks);
8213         extent_io_tree_init(&excluded_extents);
8214         INIT_LIST_HEAD(&dropping_trees);
8215         INIT_LIST_HEAD(&normal_trees);
8216
8217         if (repair) {
8218                 root->fs_info->excluded_extents = &excluded_extents;
8219                 root->fs_info->fsck_extent_cache = &extent_cache;
8220                 root->fs_info->free_extent_hook = free_extent_hook;
8221                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8222         }
8223
8224         bits_nr = 1024;
8225         bits = malloc(bits_nr * sizeof(struct block_info));
8226         if (!bits) {
8227                 perror("malloc");
8228                 exit(1);
8229         }
8230
8231         if (ctx.progress_enabled) {
8232                 ctx.tp = TASK_EXTENTS;
8233                 task_start(ctx.info);
8234         }
8235
8236 again:
8237         root1 = root->fs_info->tree_root;
8238         level = btrfs_header_level(root1->node);
8239         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8240                                     root1->node->start, 0, level, 0,
8241                                     root1->nodesize, NULL);
8242         if (ret < 0)
8243                 goto out;
8244         root1 = root->fs_info->chunk_root;
8245         level = btrfs_header_level(root1->node);
8246         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8247                                     root1->node->start, 0, level, 0,
8248                                     root1->nodesize, NULL);
8249         if (ret < 0)
8250                 goto out;
8251         btrfs_init_path(&path);
8252         key.offset = 0;
8253         key.objectid = 0;
8254         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8255         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8256                                         &key, &path, 0, 0);
8257         if (ret < 0)
8258                 goto out;
8259         while(1) {
8260                 leaf = path.nodes[0];
8261                 slot = path.slots[0];
8262                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8263                         ret = btrfs_next_leaf(root, &path);
8264                         if (ret != 0)
8265                                 break;
8266                         leaf = path.nodes[0];
8267                         slot = path.slots[0];
8268                 }
8269                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8270                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8271                         unsigned long offset;
8272                         u64 last_snapshot;
8273
8274                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8275                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8276                         last_snapshot = btrfs_root_last_snapshot(&ri);
8277                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8278                                 level = btrfs_root_level(&ri);
8279                                 level_size = root->nodesize;
8280                                 ret = add_root_item_to_list(&normal_trees,
8281                                                 found_key.objectid,
8282                                                 btrfs_root_bytenr(&ri),
8283                                                 last_snapshot, level,
8284                                                 0, level_size, NULL);
8285                                 if (ret < 0)
8286                                         goto out;
8287                         } else {
8288                                 level = btrfs_root_level(&ri);
8289                                 level_size = root->nodesize;
8290                                 objectid = found_key.objectid;
8291                                 btrfs_disk_key_to_cpu(&found_key,
8292                                                       &ri.drop_progress);
8293                                 ret = add_root_item_to_list(&dropping_trees,
8294                                                 objectid,
8295                                                 btrfs_root_bytenr(&ri),
8296                                                 last_snapshot, level,
8297                                                 ri.drop_level,
8298                                                 level_size, &found_key);
8299                                 if (ret < 0)
8300                                         goto out;
8301                         }
8302                 }
8303                 path.slots[0]++;
8304         }
8305         btrfs_release_path(&path);
8306
8307         /*
8308          * check_block can return -EAGAIN if it fixes something, please keep
8309          * this in mind when dealing with return values from these functions, if
8310          * we get -EAGAIN we want to fall through and restart the loop.
8311          */
8312         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8313                                   &seen, &reada, &nodes, &extent_cache,
8314                                   &chunk_cache, &dev_cache, &block_group_cache,
8315                                   &dev_extent_cache);
8316         if (ret < 0) {
8317                 if (ret == -EAGAIN)
8318                         goto loop;
8319                 goto out;
8320         }
8321         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8322                                   &pending, &seen, &reada, &nodes,
8323                                   &extent_cache, &chunk_cache, &dev_cache,
8324                                   &block_group_cache, &dev_extent_cache);
8325         if (ret < 0) {
8326                 if (ret == -EAGAIN)
8327                         goto loop;
8328                 goto out;
8329         }
8330
8331         ret = check_chunks(&chunk_cache, &block_group_cache,
8332                            &dev_extent_cache, NULL, NULL, NULL, 0);
8333         if (ret) {
8334                 if (ret == -EAGAIN)
8335                         goto loop;
8336                 err = ret;
8337         }
8338
8339         ret = check_extent_refs(root, &extent_cache);
8340         if (ret < 0) {
8341                 if (ret == -EAGAIN)
8342                         goto loop;
8343                 goto out;
8344         }
8345
8346         ret = check_devices(&dev_cache, &dev_extent_cache);
8347         if (ret && err)
8348                 ret = err;
8349
8350 out:
8351         task_stop(ctx.info);
8352         if (repair) {
8353                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8354                 extent_io_tree_cleanup(&excluded_extents);
8355                 root->fs_info->fsck_extent_cache = NULL;
8356                 root->fs_info->free_extent_hook = NULL;
8357                 root->fs_info->corrupt_blocks = NULL;
8358                 root->fs_info->excluded_extents = NULL;
8359         }
8360         free(bits);
8361         free_chunk_cache_tree(&chunk_cache);
8362         free_device_cache_tree(&dev_cache);
8363         free_block_group_tree(&block_group_cache);
8364         free_device_extent_tree(&dev_extent_cache);
8365         free_extent_cache_tree(&seen);
8366         free_extent_cache_tree(&pending);
8367         free_extent_cache_tree(&reada);
8368         free_extent_cache_tree(&nodes);
8369         return ret;
8370 loop:
8371         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8372         free_extent_cache_tree(&seen);
8373         free_extent_cache_tree(&pending);
8374         free_extent_cache_tree(&reada);
8375         free_extent_cache_tree(&nodes);
8376         free_chunk_cache_tree(&chunk_cache);
8377         free_block_group_tree(&block_group_cache);
8378         free_device_cache_tree(&dev_cache);
8379         free_device_extent_tree(&dev_extent_cache);
8380         free_extent_record_cache(root->fs_info, &extent_cache);
8381         free_root_item_list(&normal_trees);
8382         free_root_item_list(&dropping_trees);
8383         extent_io_tree_cleanup(&excluded_extents);
8384         goto again;
8385 }
8386
8387 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8388                            struct btrfs_root *root, int overwrite)
8389 {
8390         struct extent_buffer *c;
8391         struct extent_buffer *old = root->node;
8392         int level;
8393         int ret;
8394         struct btrfs_disk_key disk_key = {0,0,0};
8395
8396         level = 0;
8397
8398         if (overwrite) {
8399                 c = old;
8400                 extent_buffer_get(c);
8401                 goto init;
8402         }
8403         c = btrfs_alloc_free_block(trans, root,
8404                                    root->nodesize,
8405                                    root->root_key.objectid,
8406                                    &disk_key, level, 0, 0);
8407         if (IS_ERR(c)) {
8408                 c = old;
8409                 extent_buffer_get(c);
8410                 overwrite = 1;
8411         }
8412 init:
8413         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8414         btrfs_set_header_level(c, level);
8415         btrfs_set_header_bytenr(c, c->start);
8416         btrfs_set_header_generation(c, trans->transid);
8417         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8418         btrfs_set_header_owner(c, root->root_key.objectid);
8419
8420         write_extent_buffer(c, root->fs_info->fsid,
8421                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8422
8423         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8424                             btrfs_header_chunk_tree_uuid(c),
8425                             BTRFS_UUID_SIZE);
8426
8427         btrfs_mark_buffer_dirty(c);
8428         /*
8429          * this case can happen in the following case:
8430          *
8431          * 1.overwrite previous root.
8432          *
8433          * 2.reinit reloc data root, this is because we skip pin
8434          * down reloc data tree before which means we can allocate
8435          * same block bytenr here.
8436          */
8437         if (old->start == c->start) {
8438                 btrfs_set_root_generation(&root->root_item,
8439                                           trans->transid);
8440                 root->root_item.level = btrfs_header_level(root->node);
8441                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8442                                         &root->root_key, &root->root_item);
8443                 if (ret) {
8444                         free_extent_buffer(c);
8445                         return ret;
8446                 }
8447         }
8448         free_extent_buffer(old);
8449         root->node = c;
8450         add_root_to_dirty_list(root);
8451         return 0;
8452 }
8453
8454 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8455                                 struct extent_buffer *eb, int tree_root)
8456 {
8457         struct extent_buffer *tmp;
8458         struct btrfs_root_item *ri;
8459         struct btrfs_key key;
8460         u64 bytenr;
8461         u32 nodesize;
8462         int level = btrfs_header_level(eb);
8463         int nritems;
8464         int ret;
8465         int i;
8466
8467         /*
8468          * If we have pinned this block before, don't pin it again.
8469          * This can not only avoid forever loop with broken filesystem
8470          * but also give us some speedups.
8471          */
8472         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8473                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8474                 return 0;
8475
8476         btrfs_pin_extent(fs_info, eb->start, eb->len);
8477
8478         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8479         nritems = btrfs_header_nritems(eb);
8480         for (i = 0; i < nritems; i++) {
8481                 if (level == 0) {
8482                         btrfs_item_key_to_cpu(eb, &key, i);
8483                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8484                                 continue;
8485                         /* Skip the extent root and reloc roots */
8486                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8487                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8488                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8489                                 continue;
8490                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8491                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8492
8493                         /*
8494                          * If at any point we start needing the real root we
8495                          * will have to build a stump root for the root we are
8496                          * in, but for now this doesn't actually use the root so
8497                          * just pass in extent_root.
8498                          */
8499                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8500                                               nodesize, 0);
8501                         if (!extent_buffer_uptodate(tmp)) {
8502                                 fprintf(stderr, "Error reading root block\n");
8503                                 return -EIO;
8504                         }
8505                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8506                         free_extent_buffer(tmp);
8507                         if (ret)
8508                                 return ret;
8509                 } else {
8510                         bytenr = btrfs_node_blockptr(eb, i);
8511
8512                         /* If we aren't the tree root don't read the block */
8513                         if (level == 1 && !tree_root) {
8514                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8515                                 continue;
8516                         }
8517
8518                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8519                                               nodesize, 0);
8520                         if (!extent_buffer_uptodate(tmp)) {
8521                                 fprintf(stderr, "Error reading tree block\n");
8522                                 return -EIO;
8523                         }
8524                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8525                         free_extent_buffer(tmp);
8526                         if (ret)
8527                                 return ret;
8528                 }
8529         }
8530
8531         return 0;
8532 }
8533
8534 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8535 {
8536         int ret;
8537
8538         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8539         if (ret)
8540                 return ret;
8541
8542         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8543 }
8544
8545 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8546 {
8547         struct btrfs_block_group_cache *cache;
8548         struct btrfs_path *path;
8549         struct extent_buffer *leaf;
8550         struct btrfs_chunk *chunk;
8551         struct btrfs_key key;
8552         int ret;
8553         u64 start;
8554
8555         path = btrfs_alloc_path();
8556         if (!path)
8557                 return -ENOMEM;
8558
8559         key.objectid = 0;
8560         key.type = BTRFS_CHUNK_ITEM_KEY;
8561         key.offset = 0;
8562
8563         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8564         if (ret < 0) {
8565                 btrfs_free_path(path);
8566                 return ret;
8567         }
8568
8569         /*
8570          * We do this in case the block groups were screwed up and had alloc
8571          * bits that aren't actually set on the chunks.  This happens with
8572          * restored images every time and could happen in real life I guess.
8573          */
8574         fs_info->avail_data_alloc_bits = 0;
8575         fs_info->avail_metadata_alloc_bits = 0;
8576         fs_info->avail_system_alloc_bits = 0;
8577
8578         /* First we need to create the in-memory block groups */
8579         while (1) {
8580                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8581                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8582                         if (ret < 0) {
8583                                 btrfs_free_path(path);
8584                                 return ret;
8585                         }
8586                         if (ret) {
8587                                 ret = 0;
8588                                 break;
8589                         }
8590                 }
8591                 leaf = path->nodes[0];
8592                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8593                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8594                         path->slots[0]++;
8595                         continue;
8596                 }
8597
8598                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8599                                        struct btrfs_chunk);
8600                 btrfs_add_block_group(fs_info, 0,
8601                                       btrfs_chunk_type(leaf, chunk),
8602                                       key.objectid, key.offset,
8603                                       btrfs_chunk_length(leaf, chunk));
8604                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8605                                  key.offset + btrfs_chunk_length(leaf, chunk),
8606                                  GFP_NOFS);
8607                 path->slots[0]++;
8608         }
8609         start = 0;
8610         while (1) {
8611                 cache = btrfs_lookup_first_block_group(fs_info, start);
8612                 if (!cache)
8613                         break;
8614                 cache->cached = 1;
8615                 start = cache->key.objectid + cache->key.offset;
8616         }
8617
8618         btrfs_free_path(path);
8619         return 0;
8620 }
8621
8622 static int reset_balance(struct btrfs_trans_handle *trans,
8623                          struct btrfs_fs_info *fs_info)
8624 {
8625         struct btrfs_root *root = fs_info->tree_root;
8626         struct btrfs_path *path;
8627         struct extent_buffer *leaf;
8628         struct btrfs_key key;
8629         int del_slot, del_nr = 0;
8630         int ret;
8631         int found = 0;
8632
8633         path = btrfs_alloc_path();
8634         if (!path)
8635                 return -ENOMEM;
8636
8637         key.objectid = BTRFS_BALANCE_OBJECTID;
8638         key.type = BTRFS_BALANCE_ITEM_KEY;
8639         key.offset = 0;
8640
8641         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8642         if (ret) {
8643                 if (ret > 0)
8644                         ret = 0;
8645                 if (!ret)
8646                         goto reinit_data_reloc;
8647                 else
8648                         goto out;
8649         }
8650
8651         ret = btrfs_del_item(trans, root, path);
8652         if (ret)
8653                 goto out;
8654         btrfs_release_path(path);
8655
8656         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8657         key.type = BTRFS_ROOT_ITEM_KEY;
8658         key.offset = 0;
8659
8660         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8661         if (ret < 0)
8662                 goto out;
8663         while (1) {
8664                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8665                         if (!found)
8666                                 break;
8667
8668                         if (del_nr) {
8669                                 ret = btrfs_del_items(trans, root, path,
8670                                                       del_slot, del_nr);
8671                                 del_nr = 0;
8672                                 if (ret)
8673                                         goto out;
8674                         }
8675                         key.offset++;
8676                         btrfs_release_path(path);
8677
8678                         found = 0;
8679                         ret = btrfs_search_slot(trans, root, &key, path,
8680                                                 -1, 1);
8681                         if (ret < 0)
8682                                 goto out;
8683                         continue;
8684                 }
8685                 found = 1;
8686                 leaf = path->nodes[0];
8687                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8688                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8689                         break;
8690                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8691                         path->slots[0]++;
8692                         continue;
8693                 }
8694                 if (!del_nr) {
8695                         del_slot = path->slots[0];
8696                         del_nr = 1;
8697                 } else {
8698                         del_nr++;
8699                 }
8700                 path->slots[0]++;
8701         }
8702
8703         if (del_nr) {
8704                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8705                 if (ret)
8706                         goto out;
8707         }
8708         btrfs_release_path(path);
8709
8710 reinit_data_reloc:
8711         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8712         key.type = BTRFS_ROOT_ITEM_KEY;
8713         key.offset = (u64)-1;
8714         root = btrfs_read_fs_root(fs_info, &key);
8715         if (IS_ERR(root)) {
8716                 fprintf(stderr, "Error reading data reloc tree\n");
8717                 ret = PTR_ERR(root);
8718                 goto out;
8719         }
8720         record_root_in_trans(trans, root);
8721         ret = btrfs_fsck_reinit_root(trans, root, 0);
8722         if (ret)
8723                 goto out;
8724         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8725 out:
8726         btrfs_free_path(path);
8727         return ret;
8728 }
8729
8730 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8731                               struct btrfs_fs_info *fs_info)
8732 {
8733         u64 start = 0;
8734         int ret;
8735
8736         /*
8737          * The only reason we don't do this is because right now we're just
8738          * walking the trees we find and pinning down their bytes, we don't look
8739          * at any of the leaves.  In order to do mixed groups we'd have to check
8740          * the leaves of any fs roots and pin down the bytes for any file
8741          * extents we find.  Not hard but why do it if we don't have to?
8742          */
8743         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8744                 fprintf(stderr, "We don't support re-initing the extent tree "
8745                         "for mixed block groups yet, please notify a btrfs "
8746                         "developer you want to do this so they can add this "
8747                         "functionality.\n");
8748                 return -EINVAL;
8749         }
8750
8751         /*
8752          * first we need to walk all of the trees except the extent tree and pin
8753          * down the bytes that are in use so we don't overwrite any existing
8754          * metadata.
8755          */
8756         ret = pin_metadata_blocks(fs_info);
8757         if (ret) {
8758                 fprintf(stderr, "error pinning down used bytes\n");
8759                 return ret;
8760         }
8761
8762         /*
8763          * Need to drop all the block groups since we're going to recreate all
8764          * of them again.
8765          */
8766         btrfs_free_block_groups(fs_info);
8767         ret = reset_block_groups(fs_info);
8768         if (ret) {
8769                 fprintf(stderr, "error resetting the block groups\n");
8770                 return ret;
8771         }
8772
8773         /* Ok we can allocate now, reinit the extent root */
8774         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8775         if (ret) {
8776                 fprintf(stderr, "extent root initialization failed\n");
8777                 /*
8778                  * When the transaction code is updated we should end the
8779                  * transaction, but for now progs only knows about commit so
8780                  * just return an error.
8781                  */
8782                 return ret;
8783         }
8784
8785         /*
8786          * Now we have all the in-memory block groups setup so we can make
8787          * allocations properly, and the metadata we care about is safe since we
8788          * pinned all of it above.
8789          */
8790         while (1) {
8791                 struct btrfs_block_group_cache *cache;
8792
8793                 cache = btrfs_lookup_first_block_group(fs_info, start);
8794                 if (!cache)
8795                         break;
8796                 start = cache->key.objectid + cache->key.offset;
8797                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8798                                         &cache->key, &cache->item,
8799                                         sizeof(cache->item));
8800                 if (ret) {
8801                         fprintf(stderr, "Error adding block group\n");
8802                         return ret;
8803                 }
8804                 btrfs_extent_post_op(trans, fs_info->extent_root);
8805         }
8806
8807         ret = reset_balance(trans, fs_info);
8808         if (ret)
8809                 fprintf(stderr, "error reseting the pending balance\n");
8810
8811         return ret;
8812 }
8813
8814 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8815 {
8816         struct btrfs_path *path;
8817         struct btrfs_trans_handle *trans;
8818         struct btrfs_key key;
8819         int ret;
8820
8821         printf("Recowing metadata block %llu\n", eb->start);
8822         key.objectid = btrfs_header_owner(eb);
8823         key.type = BTRFS_ROOT_ITEM_KEY;
8824         key.offset = (u64)-1;
8825
8826         root = btrfs_read_fs_root(root->fs_info, &key);
8827         if (IS_ERR(root)) {
8828                 fprintf(stderr, "Couldn't find owner root %llu\n",
8829                         key.objectid);
8830                 return PTR_ERR(root);
8831         }
8832
8833         path = btrfs_alloc_path();
8834         if (!path)
8835                 return -ENOMEM;
8836
8837         trans = btrfs_start_transaction(root, 1);
8838         if (IS_ERR(trans)) {
8839                 btrfs_free_path(path);
8840                 return PTR_ERR(trans);
8841         }
8842
8843         path->lowest_level = btrfs_header_level(eb);
8844         if (path->lowest_level)
8845                 btrfs_node_key_to_cpu(eb, &key, 0);
8846         else
8847                 btrfs_item_key_to_cpu(eb, &key, 0);
8848
8849         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8850         btrfs_commit_transaction(trans, root);
8851         btrfs_free_path(path);
8852         return ret;
8853 }
8854
8855 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8856 {
8857         struct btrfs_path *path;
8858         struct btrfs_trans_handle *trans;
8859         struct btrfs_key key;
8860         int ret;
8861
8862         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8863                bad->key.type, bad->key.offset);
8864         key.objectid = bad->root_id;
8865         key.type = BTRFS_ROOT_ITEM_KEY;
8866         key.offset = (u64)-1;
8867
8868         root = btrfs_read_fs_root(root->fs_info, &key);
8869         if (IS_ERR(root)) {
8870                 fprintf(stderr, "Couldn't find owner root %llu\n",
8871                         key.objectid);
8872                 return PTR_ERR(root);
8873         }
8874
8875         path = btrfs_alloc_path();
8876         if (!path)
8877                 return -ENOMEM;
8878
8879         trans = btrfs_start_transaction(root, 1);
8880         if (IS_ERR(trans)) {
8881                 btrfs_free_path(path);
8882                 return PTR_ERR(trans);
8883         }
8884
8885         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8886         if (ret) {
8887                 if (ret > 0)
8888                         ret = 0;
8889                 goto out;
8890         }
8891         ret = btrfs_del_item(trans, root, path);
8892 out:
8893         btrfs_commit_transaction(trans, root);
8894         btrfs_free_path(path);
8895         return ret;
8896 }
8897
8898 static int zero_log_tree(struct btrfs_root *root)
8899 {
8900         struct btrfs_trans_handle *trans;
8901         int ret;
8902
8903         trans = btrfs_start_transaction(root, 1);
8904         if (IS_ERR(trans)) {
8905                 ret = PTR_ERR(trans);
8906                 return ret;
8907         }
8908         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8909         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8910         ret = btrfs_commit_transaction(trans, root);
8911         return ret;
8912 }
8913
8914 static int populate_csum(struct btrfs_trans_handle *trans,
8915                          struct btrfs_root *csum_root, char *buf, u64 start,
8916                          u64 len)
8917 {
8918         u64 offset = 0;
8919         u64 sectorsize;
8920         int ret = 0;
8921
8922         while (offset < len) {
8923                 sectorsize = csum_root->sectorsize;
8924                 ret = read_extent_data(csum_root, buf, start + offset,
8925                                        &sectorsize, 0);
8926                 if (ret)
8927                         break;
8928                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8929                                             start + offset, buf, sectorsize);
8930                 if (ret)
8931                         break;
8932                 offset += sectorsize;
8933         }
8934         return ret;
8935 }
8936
8937 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8938                                       struct btrfs_root *csum_root,
8939                                       struct btrfs_root *cur_root)
8940 {
8941         struct btrfs_path *path;
8942         struct btrfs_key key;
8943         struct extent_buffer *node;
8944         struct btrfs_file_extent_item *fi;
8945         char *buf = NULL;
8946         u64 start = 0;
8947         u64 len = 0;
8948         int slot = 0;
8949         int ret = 0;
8950
8951         path = btrfs_alloc_path();
8952         if (!path)
8953                 return -ENOMEM;
8954         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8955         if (!buf) {
8956                 ret = -ENOMEM;
8957                 goto out;
8958         }
8959
8960         key.objectid = 0;
8961         key.offset = 0;
8962         key.type = 0;
8963
8964         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8965         if (ret < 0)
8966                 goto out;
8967         /* Iterate all regular file extents and fill its csum */
8968         while (1) {
8969                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8970
8971                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8972                         goto next;
8973                 node = path->nodes[0];
8974                 slot = path->slots[0];
8975                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8976                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8977                         goto next;
8978                 start = btrfs_file_extent_disk_bytenr(node, fi);
8979                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8980
8981                 ret = populate_csum(trans, csum_root, buf, start, len);
8982                 if (ret == -EEXIST)
8983                         ret = 0;
8984                 if (ret < 0)
8985                         goto out;
8986 next:
8987                 /*
8988                  * TODO: if next leaf is corrupted, jump to nearest next valid
8989                  * leaf.
8990                  */
8991                 ret = btrfs_next_item(cur_root, path);
8992                 if (ret < 0)
8993                         goto out;
8994                 if (ret > 0) {
8995                         ret = 0;
8996                         goto out;
8997                 }
8998         }
8999
9000 out:
9001         btrfs_free_path(path);
9002         free(buf);
9003         return ret;
9004 }
9005
9006 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9007                                   struct btrfs_root *csum_root)
9008 {
9009         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9010         struct btrfs_path *path;
9011         struct btrfs_root *tree_root = fs_info->tree_root;
9012         struct btrfs_root *cur_root;
9013         struct extent_buffer *node;
9014         struct btrfs_key key;
9015         int slot = 0;
9016         int ret = 0;
9017
9018         path = btrfs_alloc_path();
9019         if (!path)
9020                 return -ENOMEM;
9021
9022         key.objectid = BTRFS_FS_TREE_OBJECTID;
9023         key.offset = 0;
9024         key.type = BTRFS_ROOT_ITEM_KEY;
9025
9026         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9027         if (ret < 0)
9028                 goto out;
9029         if (ret > 0) {
9030                 ret = -ENOENT;
9031                 goto out;
9032         }
9033
9034         while (1) {
9035                 node = path->nodes[0];
9036                 slot = path->slots[0];
9037                 btrfs_item_key_to_cpu(node, &key, slot);
9038                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9039                         goto out;
9040                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9041                         goto next;
9042                 if (!is_fstree(key.objectid))
9043                         goto next;
9044                 key.offset = (u64)-1;
9045
9046                 cur_root = btrfs_read_fs_root(fs_info, &key);
9047                 if (IS_ERR(cur_root) || !cur_root) {
9048                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9049                                 key.objectid);
9050                         goto out;
9051                 }
9052                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9053                                 cur_root);
9054                 if (ret < 0)
9055                         goto out;
9056 next:
9057                 ret = btrfs_next_item(tree_root, path);
9058                 if (ret > 0) {
9059                         ret = 0;
9060                         goto out;
9061                 }
9062                 if (ret < 0)
9063                         goto out;
9064         }
9065
9066 out:
9067         btrfs_free_path(path);
9068         return ret;
9069 }
9070
9071 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9072                                       struct btrfs_root *csum_root)
9073 {
9074         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9075         struct btrfs_path *path;
9076         struct btrfs_extent_item *ei;
9077         struct extent_buffer *leaf;
9078         char *buf;
9079         struct btrfs_key key;
9080         int ret;
9081
9082         path = btrfs_alloc_path();
9083         if (!path)
9084                 return -ENOMEM;
9085
9086         key.objectid = 0;
9087         key.type = BTRFS_EXTENT_ITEM_KEY;
9088         key.offset = 0;
9089
9090         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9091         if (ret < 0) {
9092                 btrfs_free_path(path);
9093                 return ret;
9094         }
9095
9096         buf = malloc(csum_root->sectorsize);
9097         if (!buf) {
9098                 btrfs_free_path(path);
9099                 return -ENOMEM;
9100         }
9101
9102         while (1) {
9103                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9104                         ret = btrfs_next_leaf(extent_root, path);
9105                         if (ret < 0)
9106                                 break;
9107                         if (ret) {
9108                                 ret = 0;
9109                                 break;
9110                         }
9111                 }
9112                 leaf = path->nodes[0];
9113
9114                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9115                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9116                         path->slots[0]++;
9117                         continue;
9118                 }
9119
9120                 ei = btrfs_item_ptr(leaf, path->slots[0],
9121                                     struct btrfs_extent_item);
9122                 if (!(btrfs_extent_flags(leaf, ei) &
9123                       BTRFS_EXTENT_FLAG_DATA)) {
9124                         path->slots[0]++;
9125                         continue;
9126                 }
9127
9128                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9129                                     key.offset);
9130                 if (ret)
9131                         break;
9132                 path->slots[0]++;
9133         }
9134
9135         btrfs_free_path(path);
9136         free(buf);
9137         return ret;
9138 }
9139
9140 /*
9141  * Recalculate the csum and put it into the csum tree.
9142  *
9143  * Extent tree init will wipe out all the extent info, so in that case, we
9144  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9145  * will use fs/subvol trees to init the csum tree.
9146  */
9147 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9148                           struct btrfs_root *csum_root,
9149                           int search_fs_tree)
9150 {
9151         if (search_fs_tree)
9152                 return fill_csum_tree_from_fs(trans, csum_root);
9153         else
9154                 return fill_csum_tree_from_extent(trans, csum_root);
9155 }
9156
9157 static void free_roots_info_cache(void)
9158 {
9159         if (!roots_info_cache)
9160                 return;
9161
9162         while (!cache_tree_empty(roots_info_cache)) {
9163                 struct cache_extent *entry;
9164                 struct root_item_info *rii;
9165
9166                 entry = first_cache_extent(roots_info_cache);
9167                 if (!entry)
9168                         break;
9169                 remove_cache_extent(roots_info_cache, entry);
9170                 rii = container_of(entry, struct root_item_info, cache_extent);
9171                 free(rii);
9172         }
9173
9174         free(roots_info_cache);
9175         roots_info_cache = NULL;
9176 }
9177
9178 static int build_roots_info_cache(struct btrfs_fs_info *info)
9179 {
9180         int ret = 0;
9181         struct btrfs_key key;
9182         struct extent_buffer *leaf;
9183         struct btrfs_path *path;
9184
9185         if (!roots_info_cache) {
9186                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9187                 if (!roots_info_cache)
9188                         return -ENOMEM;
9189                 cache_tree_init(roots_info_cache);
9190         }
9191
9192         path = btrfs_alloc_path();
9193         if (!path)
9194                 return -ENOMEM;
9195
9196         key.objectid = 0;
9197         key.type = BTRFS_EXTENT_ITEM_KEY;
9198         key.offset = 0;
9199
9200         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9201         if (ret < 0)
9202                 goto out;
9203         leaf = path->nodes[0];
9204
9205         while (1) {
9206                 struct btrfs_key found_key;
9207                 struct btrfs_extent_item *ei;
9208                 struct btrfs_extent_inline_ref *iref;
9209                 int slot = path->slots[0];
9210                 int type;
9211                 u64 flags;
9212                 u64 root_id;
9213                 u8 level;
9214                 struct cache_extent *entry;
9215                 struct root_item_info *rii;
9216
9217                 if (slot >= btrfs_header_nritems(leaf)) {
9218                         ret = btrfs_next_leaf(info->extent_root, path);
9219                         if (ret < 0) {
9220                                 break;
9221                         } else if (ret) {
9222                                 ret = 0;
9223                                 break;
9224                         }
9225                         leaf = path->nodes[0];
9226                         slot = path->slots[0];
9227                 }
9228
9229                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9230
9231                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9232                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9233                         goto next;
9234
9235                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9236                 flags = btrfs_extent_flags(leaf, ei);
9237
9238                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9239                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9240                         goto next;
9241
9242                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9243                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9244                         level = found_key.offset;
9245                 } else {
9246                         struct btrfs_tree_block_info *binfo;
9247
9248                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9249                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9250                         level = btrfs_tree_block_level(leaf, binfo);
9251                 }
9252
9253                 /*
9254                  * For a root extent, it must be of the following type and the
9255                  * first (and only one) iref in the item.
9256                  */
9257                 type = btrfs_extent_inline_ref_type(leaf, iref);
9258                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9259                         goto next;
9260
9261                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9262                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9263                 if (!entry) {
9264                         rii = malloc(sizeof(struct root_item_info));
9265                         if (!rii) {
9266                                 ret = -ENOMEM;
9267                                 goto out;
9268                         }
9269                         rii->cache_extent.start = root_id;
9270                         rii->cache_extent.size = 1;
9271                         rii->level = (u8)-1;
9272                         entry = &rii->cache_extent;
9273                         ret = insert_cache_extent(roots_info_cache, entry);
9274                         ASSERT(ret == 0);
9275                 } else {
9276                         rii = container_of(entry, struct root_item_info,
9277                                            cache_extent);
9278                 }
9279
9280                 ASSERT(rii->cache_extent.start == root_id);
9281                 ASSERT(rii->cache_extent.size == 1);
9282
9283                 if (level > rii->level || rii->level == (u8)-1) {
9284                         rii->level = level;
9285                         rii->bytenr = found_key.objectid;
9286                         rii->gen = btrfs_extent_generation(leaf, ei);
9287                         rii->node_count = 1;
9288                 } else if (level == rii->level) {
9289                         rii->node_count++;
9290                 }
9291 next:
9292                 path->slots[0]++;
9293         }
9294
9295 out:
9296         btrfs_free_path(path);
9297
9298         return ret;
9299 }
9300
9301 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9302                                   struct btrfs_path *path,
9303                                   const struct btrfs_key *root_key,
9304                                   const int read_only_mode)
9305 {
9306         const u64 root_id = root_key->objectid;
9307         struct cache_extent *entry;
9308         struct root_item_info *rii;
9309         struct btrfs_root_item ri;
9310         unsigned long offset;
9311
9312         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9313         if (!entry) {
9314                 fprintf(stderr,
9315                         "Error: could not find extent items for root %llu\n",
9316                         root_key->objectid);
9317                 return -ENOENT;
9318         }
9319
9320         rii = container_of(entry, struct root_item_info, cache_extent);
9321         ASSERT(rii->cache_extent.start == root_id);
9322         ASSERT(rii->cache_extent.size == 1);
9323
9324         if (rii->node_count != 1) {
9325                 fprintf(stderr,
9326                         "Error: could not find btree root extent for root %llu\n",
9327                         root_id);
9328                 return -ENOENT;
9329         }
9330
9331         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9332         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9333
9334         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9335             btrfs_root_level(&ri) != rii->level ||
9336             btrfs_root_generation(&ri) != rii->gen) {
9337
9338                 /*
9339                  * If we're in repair mode but our caller told us to not update
9340                  * the root item, i.e. just check if it needs to be updated, don't
9341                  * print this message, since the caller will call us again shortly
9342                  * for the same root item without read only mode (the caller will
9343                  * open a transaction first).
9344                  */
9345                 if (!(read_only_mode && repair))
9346                         fprintf(stderr,
9347                                 "%sroot item for root %llu,"
9348                                 " current bytenr %llu, current gen %llu, current level %u,"
9349                                 " new bytenr %llu, new gen %llu, new level %u\n",
9350                                 (read_only_mode ? "" : "fixing "),
9351                                 root_id,
9352                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9353                                 btrfs_root_level(&ri),
9354                                 rii->bytenr, rii->gen, rii->level);
9355
9356                 if (btrfs_root_generation(&ri) > rii->gen) {
9357                         fprintf(stderr,
9358                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9359                                 root_id, btrfs_root_generation(&ri), rii->gen);
9360                         return -EINVAL;
9361                 }
9362
9363                 if (!read_only_mode) {
9364                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9365                         btrfs_set_root_level(&ri, rii->level);
9366                         btrfs_set_root_generation(&ri, rii->gen);
9367                         write_extent_buffer(path->nodes[0], &ri,
9368                                             offset, sizeof(ri));
9369                 }
9370
9371                 return 1;
9372         }
9373
9374         return 0;
9375 }
9376
9377 /*
9378  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9379  * caused read-only snapshots to be corrupted if they were created at a moment
9380  * when the source subvolume/snapshot had orphan items. The issue was that the
9381  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9382  * node instead of the post orphan cleanup root node.
9383  * So this function, and its callees, just detects and fixes those cases. Even
9384  * though the regression was for read-only snapshots, this function applies to
9385  * any snapshot/subvolume root.
9386  * This must be run before any other repair code - not doing it so, makes other
9387  * repair code delete or modify backrefs in the extent tree for example, which
9388  * will result in an inconsistent fs after repairing the root items.
9389  */
9390 static int repair_root_items(struct btrfs_fs_info *info)
9391 {
9392         struct btrfs_path *path = NULL;
9393         struct btrfs_key key;
9394         struct extent_buffer *leaf;
9395         struct btrfs_trans_handle *trans = NULL;
9396         int ret = 0;
9397         int bad_roots = 0;
9398         int need_trans = 0;
9399
9400         ret = build_roots_info_cache(info);
9401         if (ret)
9402                 goto out;
9403
9404         path = btrfs_alloc_path();
9405         if (!path) {
9406                 ret = -ENOMEM;
9407                 goto out;
9408         }
9409
9410         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9411         key.type = BTRFS_ROOT_ITEM_KEY;
9412         key.offset = 0;
9413
9414 again:
9415         /*
9416          * Avoid opening and committing transactions if a leaf doesn't have
9417          * any root items that need to be fixed, so that we avoid rotating
9418          * backup roots unnecessarily.
9419          */
9420         if (need_trans) {
9421                 trans = btrfs_start_transaction(info->tree_root, 1);
9422                 if (IS_ERR(trans)) {
9423                         ret = PTR_ERR(trans);
9424                         goto out;
9425                 }
9426         }
9427
9428         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9429                                 0, trans ? 1 : 0);
9430         if (ret < 0)
9431                 goto out;
9432         leaf = path->nodes[0];
9433
9434         while (1) {
9435                 struct btrfs_key found_key;
9436
9437                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9438                         int no_more_keys = find_next_key(path, &key);
9439
9440                         btrfs_release_path(path);
9441                         if (trans) {
9442                                 ret = btrfs_commit_transaction(trans,
9443                                                                info->tree_root);
9444                                 trans = NULL;
9445                                 if (ret < 0)
9446                                         goto out;
9447                         }
9448                         need_trans = 0;
9449                         if (no_more_keys)
9450                                 break;
9451                         goto again;
9452                 }
9453
9454                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9455
9456                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9457                         goto next;
9458                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9459                         goto next;
9460
9461                 ret = maybe_repair_root_item(info, path, &found_key,
9462                                              trans ? 0 : 1);
9463                 if (ret < 0)
9464                         goto out;
9465                 if (ret) {
9466                         if (!trans && repair) {
9467                                 need_trans = 1;
9468                                 key = found_key;
9469                                 btrfs_release_path(path);
9470                                 goto again;
9471                         }
9472                         bad_roots++;
9473                 }
9474 next:
9475                 path->slots[0]++;
9476         }
9477         ret = 0;
9478 out:
9479         free_roots_info_cache();
9480         btrfs_free_path(path);
9481         if (trans)
9482                 btrfs_commit_transaction(trans, info->tree_root);
9483         if (ret < 0)
9484                 return ret;
9485
9486         return bad_roots;
9487 }
9488
9489 const char * const cmd_check_usage[] = {
9490         "btrfs check [options] <device>",
9491         "Check structural inegrity of a filesystem (unmounted).",
9492         "Check structural inegrity of an unmounted filesystem. Verify internal",
9493         "trees' consistency and item connectivity. In the repair mode try to",
9494         "fix the problems found.",
9495         "WARNING: the repair mode is considered dangerous",
9496         "",
9497         "-s|--super <superblock>     use this superblock copy",
9498         "-b|--backup                 use the first valid backup root copy",
9499         "--repair                    try to repair the filesystem",
9500         "--readonly                  run in read-only mode (default)",
9501         "--init-csum-tree            create a new CRC tree",
9502         "--init-extent-tree          create a new extent tree",
9503         "--check-data-csum           verify checkums of data blocks",
9504         "-Q|--qgroup-report           print a report on qgroup consistency",
9505         "-E|--subvol-extents <subvolid>",
9506         "                            print subvolume extents and sharing state",
9507         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9508         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9509         "-p|--progress               indicate progress",
9510         NULL
9511 };
9512
9513 int cmd_check(int argc, char **argv)
9514 {
9515         struct cache_tree root_cache;
9516         struct btrfs_root *root;
9517         struct btrfs_fs_info *info;
9518         u64 bytenr = 0;
9519         u64 subvolid = 0;
9520         u64 tree_root_bytenr = 0;
9521         u64 chunk_root_bytenr = 0;
9522         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9523         int ret;
9524         u64 num;
9525         int init_csum_tree = 0;
9526         int readonly = 0;
9527         int qgroup_report = 0;
9528         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9529
9530         while(1) {
9531                 int c;
9532                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9533                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9534                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9535                 static const struct option long_options[] = {
9536                         { "super", required_argument, NULL, 's' },
9537                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9538                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9539                         { "init-csum-tree", no_argument, NULL,
9540                                 GETOPT_VAL_INIT_CSUM },
9541                         { "init-extent-tree", no_argument, NULL,
9542                                 GETOPT_VAL_INIT_EXTENT },
9543                         { "check-data-csum", no_argument, NULL,
9544                                 GETOPT_VAL_CHECK_CSUM },
9545                         { "backup", no_argument, NULL, 'b' },
9546                         { "subvol-extents", required_argument, NULL, 'E' },
9547                         { "qgroup-report", no_argument, NULL, 'Q' },
9548                         { "tree-root", required_argument, NULL, 'r' },
9549                         { "chunk-root", required_argument, NULL,
9550                                 GETOPT_VAL_CHUNK_TREE },
9551                         { "progress", no_argument, NULL, 'p' },
9552                         { NULL, 0, NULL, 0}
9553                 };
9554
9555                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9556                 if (c < 0)
9557                         break;
9558                 switch(c) {
9559                         case 'a': /* ignored */ break;
9560                         case 'b':
9561                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9562                                 break;
9563                         case 's':
9564                                 num = arg_strtou64(optarg);
9565                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9566                                         fprintf(stderr,
9567                                                 "ERROR: super mirror should be less than: %d\n",
9568                                                 BTRFS_SUPER_MIRROR_MAX);
9569                                         exit(1);
9570                                 }
9571                                 bytenr = btrfs_sb_offset(((int)num));
9572                                 printf("using SB copy %llu, bytenr %llu\n", num,
9573                                        (unsigned long long)bytenr);
9574                                 break;
9575                         case 'Q':
9576                                 qgroup_report = 1;
9577                                 break;
9578                         case 'E':
9579                                 subvolid = arg_strtou64(optarg);
9580                                 break;
9581                         case 'r':
9582                                 tree_root_bytenr = arg_strtou64(optarg);
9583                                 break;
9584                         case GETOPT_VAL_CHUNK_TREE:
9585                                 chunk_root_bytenr = arg_strtou64(optarg);
9586                                 break;
9587                         case 'p':
9588                                 ctx.progress_enabled = true;
9589                                 break;
9590                         case '?':
9591                         case 'h':
9592                                 usage(cmd_check_usage);
9593                         case GETOPT_VAL_REPAIR:
9594                                 printf("enabling repair mode\n");
9595                                 repair = 1;
9596                                 ctree_flags |= OPEN_CTREE_WRITES;
9597                                 break;
9598                         case GETOPT_VAL_READONLY:
9599                                 readonly = 1;
9600                                 break;
9601                         case GETOPT_VAL_INIT_CSUM:
9602                                 printf("Creating a new CRC tree\n");
9603                                 init_csum_tree = 1;
9604                                 repair = 1;
9605                                 ctree_flags |= OPEN_CTREE_WRITES;
9606                                 break;
9607                         case GETOPT_VAL_INIT_EXTENT:
9608                                 init_extent_tree = 1;
9609                                 ctree_flags |= (OPEN_CTREE_WRITES |
9610                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9611                                 repair = 1;
9612                                 break;
9613                         case GETOPT_VAL_CHECK_CSUM:
9614                                 check_data_csum = 1;
9615                                 break;
9616                 }
9617         }
9618
9619         if (check_argc_exact(argc - optind, 1))
9620                 usage(cmd_check_usage);
9621
9622         if (ctx.progress_enabled) {
9623                 ctx.tp = TASK_NOTHING;
9624                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9625         }
9626
9627         /* This check is the only reason for --readonly to exist */
9628         if (readonly && repair) {
9629                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9630                 exit(1);
9631         }
9632
9633         radix_tree_init();
9634         cache_tree_init(&root_cache);
9635
9636         if((ret = check_mounted(argv[optind])) < 0) {
9637                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9638                 goto err_out;
9639         } else if(ret) {
9640                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9641                 ret = -EBUSY;
9642                 goto err_out;
9643         }
9644
9645         /* only allow partial opening under repair mode */
9646         if (repair)
9647                 ctree_flags |= OPEN_CTREE_PARTIAL;
9648
9649         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9650                                   chunk_root_bytenr, ctree_flags);
9651         if (!info) {
9652                 fprintf(stderr, "Couldn't open file system\n");
9653                 ret = -EIO;
9654                 goto err_out;
9655         }
9656
9657         global_info = info;
9658         root = info->fs_root;
9659
9660         /*
9661          * repair mode will force us to commit transaction which
9662          * will make us fail to load log tree when mounting.
9663          */
9664         if (repair && btrfs_super_log_root(info->super_copy)) {
9665                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9666                 if (!ret) {
9667                         ret = 1;
9668                         goto close_out;
9669                 }
9670                 ret = zero_log_tree(root);
9671                 if (ret) {
9672                         fprintf(stderr, "fail to zero log tree\n");
9673                         goto close_out;
9674                 }
9675         }
9676
9677         uuid_unparse(info->super_copy->fsid, uuidbuf);
9678         if (qgroup_report) {
9679                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9680                        uuidbuf);
9681                 ret = qgroup_verify_all(info);
9682                 if (ret == 0)
9683                         ret = report_qgroups(1);
9684                 goto close_out;
9685         }
9686         if (subvolid) {
9687                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9688                        subvolid, argv[optind], uuidbuf);
9689                 ret = print_extent_state(info, subvolid);
9690                 goto close_out;
9691         }
9692         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9693
9694         if (!extent_buffer_uptodate(info->tree_root->node) ||
9695             !extent_buffer_uptodate(info->dev_root->node) ||
9696             !extent_buffer_uptodate(info->chunk_root->node)) {
9697                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9698                 ret = -EIO;
9699                 goto close_out;
9700         }
9701
9702         if (init_extent_tree || init_csum_tree) {
9703                 struct btrfs_trans_handle *trans;
9704
9705                 trans = btrfs_start_transaction(info->extent_root, 0);
9706                 if (IS_ERR(trans)) {
9707                         fprintf(stderr, "Error starting transaction\n");
9708                         ret = PTR_ERR(trans);
9709                         goto close_out;
9710                 }
9711
9712                 if (init_extent_tree) {
9713                         printf("Creating a new extent tree\n");
9714                         ret = reinit_extent_tree(trans, info);
9715                         if (ret)
9716                                 goto close_out;
9717                 }
9718
9719                 if (init_csum_tree) {
9720                         fprintf(stderr, "Reinit crc root\n");
9721                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9722                         if (ret) {
9723                                 fprintf(stderr, "crc root initialization failed\n");
9724                                 ret = -EIO;
9725                                 goto close_out;
9726                         }
9727
9728                         ret = fill_csum_tree(trans, info->csum_root,
9729                                              init_extent_tree);
9730                         if (ret) {
9731                                 fprintf(stderr, "crc refilling failed\n");
9732                                 return -EIO;
9733                         }
9734                 }
9735                 /*
9736                  * Ok now we commit and run the normal fsck, which will add
9737                  * extent entries for all of the items it finds.
9738                  */
9739                 ret = btrfs_commit_transaction(trans, info->extent_root);
9740                 if (ret)
9741                         goto close_out;
9742         }
9743         if (!extent_buffer_uptodate(info->extent_root->node)) {
9744                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9745                 ret = -EIO;
9746                 goto close_out;
9747         }
9748         if (!extent_buffer_uptodate(info->csum_root->node)) {
9749                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9750                 ret = -EIO;
9751                 goto close_out;
9752         }
9753
9754         if (!ctx.progress_enabled)
9755                 fprintf(stderr, "checking extents\n");
9756         ret = check_chunks_and_extents(root);
9757         if (ret)
9758                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9759
9760         ret = repair_root_items(info);
9761         if (ret < 0)
9762                 goto close_out;
9763         if (repair) {
9764                 fprintf(stderr, "Fixed %d roots.\n", ret);
9765                 ret = 0;
9766         } else if (ret > 0) {
9767                 fprintf(stderr,
9768                        "Found %d roots with an outdated root item.\n",
9769                        ret);
9770                 fprintf(stderr,
9771                         "Please run a filesystem check with the option --repair to fix them.\n");
9772                 ret = 1;
9773                 goto close_out;
9774         }
9775
9776         if (!ctx.progress_enabled) {
9777                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9778                         fprintf(stderr, "checking free space tree\n");
9779                 else
9780                         fprintf(stderr, "checking free space cache\n");
9781         }
9782         ret = check_space_cache(root);
9783         if (ret)
9784                 goto out;
9785
9786         /*
9787          * We used to have to have these hole extents in between our real
9788          * extents so if we don't have this flag set we need to make sure there
9789          * are no gaps in the file extents for inodes, otherwise we can just
9790          * ignore it when this happens.
9791          */
9792         no_holes = btrfs_fs_incompat(root->fs_info,
9793                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9794         if (!ctx.progress_enabled)
9795                 fprintf(stderr, "checking fs roots\n");
9796         ret = check_fs_roots(root, &root_cache);
9797         if (ret)
9798                 goto out;
9799
9800         fprintf(stderr, "checking csums\n");
9801         ret = check_csums(root);
9802         if (ret)
9803                 goto out;
9804
9805         fprintf(stderr, "checking root refs\n");
9806         ret = check_root_refs(root, &root_cache);
9807         if (ret)
9808                 goto out;
9809
9810         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9811                 struct extent_buffer *eb;
9812
9813                 eb = list_first_entry(&root->fs_info->recow_ebs,
9814                                       struct extent_buffer, recow);
9815                 list_del_init(&eb->recow);
9816                 ret = recow_extent_buffer(root, eb);
9817                 if (ret)
9818                         break;
9819         }
9820
9821         while (!list_empty(&delete_items)) {
9822                 struct bad_item *bad;
9823
9824                 bad = list_first_entry(&delete_items, struct bad_item, list);
9825                 list_del_init(&bad->list);
9826                 if (repair)
9827                         ret = delete_bad_item(root, bad);
9828                 free(bad);
9829         }
9830
9831         if (info->quota_enabled) {
9832                 int err;
9833                 fprintf(stderr, "checking quota groups\n");
9834                 err = qgroup_verify_all(info);
9835                 if (err)
9836                         goto out;
9837         }
9838
9839         if (!list_empty(&root->fs_info->recow_ebs)) {
9840                 fprintf(stderr, "Transid errors in file system\n");
9841                 ret = 1;
9842         }
9843 out:
9844         /* Don't override original ret */
9845         if (ret)
9846                 report_qgroups(0);
9847         else
9848                 ret = report_qgroups(0);
9849         if (found_old_backref) { /*
9850                  * there was a disk format change when mixed
9851                  * backref was in testing tree. The old format
9852                  * existed about one week.
9853                  */
9854                 printf("\n * Found old mixed backref format. "
9855                        "The old format is not supported! *"
9856                        "\n * Please mount the FS in readonly mode, "
9857                        "backup data and re-format the FS. *\n\n");
9858                 ret = 1;
9859         }
9860         printf("found %llu bytes used err is %d\n",
9861                (unsigned long long)bytes_used, ret);
9862         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9863         printf("total tree bytes: %llu\n",
9864                (unsigned long long)total_btree_bytes);
9865         printf("total fs tree bytes: %llu\n",
9866                (unsigned long long)total_fs_tree_bytes);
9867         printf("total extent tree bytes: %llu\n",
9868                (unsigned long long)total_extent_tree_bytes);
9869         printf("btree space waste bytes: %llu\n",
9870                (unsigned long long)btree_space_waste);
9871         printf("file data blocks allocated: %llu\n referenced %llu\n",
9872                 (unsigned long long)data_bytes_allocated,
9873                 (unsigned long long)data_bytes_referenced);
9874
9875         free_root_recs_tree(&root_cache);
9876 close_out:
9877         close_ctree(root);
9878 err_out:
9879         if (ctx.progress_enabled)
9880                 task_deinit(ctx.info);
9881
9882         return ret;
9883 }