btrfs-progs: convert-tests: Add support for custom test scripts
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44
45 enum task_position {
46         TASK_EXTENTS,
47         TASK_FREE_SPACE,
48         TASK_FS_ROOTS,
49         TASK_NOTHING, /* have to be the last element */
50 };
51
52 struct task_ctx {
53         int progress_enabled;
54         enum task_position tp;
55
56         struct task_info *info;
57 };
58
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 struct extent_backref {
79         struct list_head list;
80         unsigned int is_data:1;
81         unsigned int found_extent_tree:1;
82         unsigned int full_backref:1;
83         unsigned int found_ref:1;
84         unsigned int broken:1;
85 };
86
87 struct data_backref {
88         struct extent_backref node;
89         union {
90                 u64 parent;
91                 u64 root;
92         };
93         u64 owner;
94         u64 offset;
95         u64 disk_bytenr;
96         u64 bytes;
97         u64 ram_bytes;
98         u32 num_refs;
99         u32 found_ref;
100 };
101
102 /*
103  * Much like data_backref, just removed the undetermined members
104  * and change it to use list_head.
105  * During extent scan, it is stored in root->orphan_data_extent.
106  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
107  */
108 struct orphan_data_extent {
109         struct list_head list;
110         u64 root;
111         u64 objectid;
112         u64 offset;
113         u64 disk_bytenr;
114         u64 disk_len;
115 };
116
117 struct tree_backref {
118         struct extent_backref node;
119         union {
120                 u64 parent;
121                 u64 root;
122         };
123 };
124
125 /* Explicit initialization for extent_record::flag_block_full_backref */
126 enum { FLAG_UNSET = 2 };
127
128 struct extent_record {
129         struct list_head backrefs;
130         struct list_head dups;
131         struct list_head list;
132         struct cache_extent cache;
133         struct btrfs_disk_key parent_key;
134         u64 start;
135         u64 max_size;
136         u64 nr;
137         u64 refs;
138         u64 extent_item_refs;
139         u64 generation;
140         u64 parent_generation;
141         u64 info_objectid;
142         u32 num_duplicates;
143         u8 info_level;
144         unsigned int flag_block_full_backref:2;
145         unsigned int found_rec:1;
146         unsigned int content_checked:1;
147         unsigned int owner_ref_checked:1;
148         unsigned int is_root:1;
149         unsigned int metadata:1;
150         unsigned int bad_full_backref:1;
151         unsigned int crossing_stripes:1;
152         unsigned int wrong_chunk_type:1;
153 };
154
155 struct inode_backref {
156         struct list_head list;
157         unsigned int found_dir_item:1;
158         unsigned int found_dir_index:1;
159         unsigned int found_inode_ref:1;
160         unsigned int filetype:8;
161         int errors;
162         unsigned int ref_type;
163         u64 dir;
164         u64 index;
165         u16 namelen;
166         char name[0];
167 };
168
169 struct root_item_record {
170         struct list_head list;
171         u64 objectid;
172         u64 bytenr;
173         u64 last_snapshot;
174         u8 level;
175         u8 drop_level;
176         int level_size;
177         struct btrfs_key drop_key;
178 };
179
180 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
181 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
182 #define REF_ERR_NO_INODE_REF            (1 << 2)
183 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
184 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
185 #define REF_ERR_DUP_INODE_REF           (1 << 5)
186 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
187 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
188 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
189 #define REF_ERR_NO_ROOT_REF             (1 << 9)
190 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
191 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
192 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
193
194 struct file_extent_hole {
195         struct rb_node node;
196         u64 start;
197         u64 len;
198 };
199
200 struct inode_record {
201         struct list_head backrefs;
202         unsigned int checked:1;
203         unsigned int merging:1;
204         unsigned int found_inode_item:1;
205         unsigned int found_dir_item:1;
206         unsigned int found_file_extent:1;
207         unsigned int found_csum_item:1;
208         unsigned int some_csum_missing:1;
209         unsigned int nodatasum:1;
210         int errors;
211
212         u64 ino;
213         u32 nlink;
214         u32 imode;
215         u64 isize;
216         u64 nbytes;
217
218         u32 found_link;
219         u64 found_size;
220         u64 extent_start;
221         u64 extent_end;
222         struct rb_root holes;
223         struct list_head orphan_extents;
224
225         u32 refs;
226 };
227
228 #define I_ERR_NO_INODE_ITEM             (1 << 0)
229 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
230 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
231 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
232 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
233 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
234 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
235 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
236 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
237 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
238 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
239 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
240 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
241 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
242 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
243
244 struct root_backref {
245         struct list_head list;
246         unsigned int found_dir_item:1;
247         unsigned int found_dir_index:1;
248         unsigned int found_back_ref:1;
249         unsigned int found_forward_ref:1;
250         unsigned int reachable:1;
251         int errors;
252         u64 ref_root;
253         u64 dir;
254         u64 index;
255         u16 namelen;
256         char name[0];
257 };
258
259 struct root_record {
260         struct list_head backrefs;
261         struct cache_extent cache;
262         unsigned int found_root_item:1;
263         u64 objectid;
264         u32 found_ref;
265 };
266
267 struct ptr_node {
268         struct cache_extent cache;
269         void *data;
270 };
271
272 struct shared_node {
273         struct cache_extent cache;
274         struct cache_tree root_cache;
275         struct cache_tree inode_cache;
276         struct inode_record *current;
277         u32 refs;
278 };
279
280 struct block_info {
281         u64 start;
282         u32 size;
283 };
284
285 struct walk_control {
286         struct cache_tree shared;
287         struct shared_node *nodes[BTRFS_MAX_LEVEL];
288         int active_node;
289         int root_level;
290 };
291
292 struct bad_item {
293         struct btrfs_key key;
294         u64 root_id;
295         struct list_head list;
296 };
297
298 struct extent_entry {
299         u64 bytenr;
300         u64 bytes;
301         int count;
302         int broken;
303         struct list_head list;
304 };
305
306 struct root_item_info {
307         /* level of the root */
308         u8 level;
309         /* number of nodes at this level, must be 1 for a root */
310         int node_count;
311         u64 bytenr;
312         u64 gen;
313         struct cache_extent cache_extent;
314 };
315
316 static void *print_status_check(void *p)
317 {
318         struct task_ctx *priv = p;
319         const char work_indicator[] = { '.', 'o', 'O', 'o' };
320         uint32_t count = 0;
321         static char *task_position_string[] = {
322                 "checking extents",
323                 "checking free space cache",
324                 "checking fs roots",
325         };
326
327         task_period_start(priv->info, 1000 /* 1s */);
328
329         if (priv->tp == TASK_NOTHING)
330                 return NULL;
331
332         while (1) {
333                 printf("%s [%c]\r", task_position_string[priv->tp],
334                                 work_indicator[count % 4]);
335                 count++;
336                 fflush(stdout);
337                 task_period_wait(priv->info);
338         }
339         return NULL;
340 }
341
342 static int print_status_return(void *p)
343 {
344         printf("\n");
345         fflush(stdout);
346
347         return 0;
348 }
349
350 /* Compatible function to allow reuse of old codes */
351 static u64 first_extent_gap(struct rb_root *holes)
352 {
353         struct file_extent_hole *hole;
354
355         if (RB_EMPTY_ROOT(holes))
356                 return (u64)-1;
357
358         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
359         return hole->start;
360 }
361
362 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
363 {
364         struct file_extent_hole *hole1;
365         struct file_extent_hole *hole2;
366
367         hole1 = rb_entry(node1, struct file_extent_hole, node);
368         hole2 = rb_entry(node2, struct file_extent_hole, node);
369
370         if (hole1->start > hole2->start)
371                 return -1;
372         if (hole1->start < hole2->start)
373                 return 1;
374         /* Now hole1->start == hole2->start */
375         if (hole1->len >= hole2->len)
376                 /*
377                  * Hole 1 will be merge center
378                  * Same hole will be merged later
379                  */
380                 return -1;
381         /* Hole 2 will be merge center */
382         return 1;
383 }
384
385 /*
386  * Add a hole to the record
387  *
388  * This will do hole merge for copy_file_extent_holes(),
389  * which will ensure there won't be continuous holes.
390  */
391 static int add_file_extent_hole(struct rb_root *holes,
392                                 u64 start, u64 len)
393 {
394         struct file_extent_hole *hole;
395         struct file_extent_hole *prev = NULL;
396         struct file_extent_hole *next = NULL;
397
398         hole = malloc(sizeof(*hole));
399         if (!hole)
400                 return -ENOMEM;
401         hole->start = start;
402         hole->len = len;
403         /* Since compare will not return 0, no -EEXIST will happen */
404         rb_insert(holes, &hole->node, compare_hole);
405
406         /* simple merge with previous hole */
407         if (rb_prev(&hole->node))
408                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
409                                 node);
410         if (prev && prev->start + prev->len >= hole->start) {
411                 hole->len = hole->start + hole->len - prev->start;
412                 hole->start = prev->start;
413                 rb_erase(&prev->node, holes);
414                 free(prev);
415                 prev = NULL;
416         }
417
418         /* iterate merge with next holes */
419         while (1) {
420                 if (!rb_next(&hole->node))
421                         break;
422                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
423                                         node);
424                 if (hole->start + hole->len >= next->start) {
425                         if (hole->start + hole->len <= next->start + next->len)
426                                 hole->len = next->start + next->len -
427                                             hole->start;
428                         rb_erase(&next->node, holes);
429                         free(next);
430                         next = NULL;
431                 } else
432                         break;
433         }
434         return 0;
435 }
436
437 static int compare_hole_range(struct rb_node *node, void *data)
438 {
439         struct file_extent_hole *hole;
440         u64 start;
441
442         hole = (struct file_extent_hole *)data;
443         start = hole->start;
444
445         hole = rb_entry(node, struct file_extent_hole, node);
446         if (start < hole->start)
447                 return -1;
448         if (start >= hole->start && start < hole->start + hole->len)
449                 return 0;
450         return 1;
451 }
452
453 /*
454  * Delete a hole in the record
455  *
456  * This will do the hole split and is much restrict than add.
457  */
458 static int del_file_extent_hole(struct rb_root *holes,
459                                 u64 start, u64 len)
460 {
461         struct file_extent_hole *hole;
462         struct file_extent_hole tmp;
463         u64 prev_start = 0;
464         u64 prev_len = 0;
465         u64 next_start = 0;
466         u64 next_len = 0;
467         struct rb_node *node;
468         int have_prev = 0;
469         int have_next = 0;
470         int ret = 0;
471
472         tmp.start = start;
473         tmp.len = len;
474         node = rb_search(holes, &tmp, compare_hole_range, NULL);
475         if (!node)
476                 return -EEXIST;
477         hole = rb_entry(node, struct file_extent_hole, node);
478         if (start + len > hole->start + hole->len)
479                 return -EEXIST;
480
481         /*
482          * Now there will be no overlap, delete the hole and re-add the
483          * split(s) if they exists.
484          */
485         if (start > hole->start) {
486                 prev_start = hole->start;
487                 prev_len = start - hole->start;
488                 have_prev = 1;
489         }
490         if (hole->start + hole->len > start + len) {
491                 next_start = start + len;
492                 next_len = hole->start + hole->len - start - len;
493                 have_next = 1;
494         }
495         rb_erase(node, holes);
496         free(hole);
497         if (have_prev) {
498                 ret = add_file_extent_hole(holes, prev_start, prev_len);
499                 if (ret < 0)
500                         return ret;
501         }
502         if (have_next) {
503                 ret = add_file_extent_hole(holes, next_start, next_len);
504                 if (ret < 0)
505                         return ret;
506         }
507         return 0;
508 }
509
510 static int copy_file_extent_holes(struct rb_root *dst,
511                                   struct rb_root *src)
512 {
513         struct file_extent_hole *hole;
514         struct rb_node *node;
515         int ret = 0;
516
517         node = rb_first(src);
518         while (node) {
519                 hole = rb_entry(node, struct file_extent_hole, node);
520                 ret = add_file_extent_hole(dst, hole->start, hole->len);
521                 if (ret)
522                         break;
523                 node = rb_next(node);
524         }
525         return ret;
526 }
527
528 static void free_file_extent_holes(struct rb_root *holes)
529 {
530         struct rb_node *node;
531         struct file_extent_hole *hole;
532
533         node = rb_first(holes);
534         while (node) {
535                 hole = rb_entry(node, struct file_extent_hole, node);
536                 rb_erase(node, holes);
537                 free(hole);
538                 node = rb_first(holes);
539         }
540 }
541
542 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
543
544 static void record_root_in_trans(struct btrfs_trans_handle *trans,
545                                  struct btrfs_root *root)
546 {
547         if (root->last_trans != trans->transid) {
548                 root->track_dirty = 1;
549                 root->last_trans = trans->transid;
550                 root->commit_root = root->node;
551                 extent_buffer_get(root->node);
552         }
553 }
554
555 static u8 imode_to_type(u32 imode)
556 {
557 #define S_SHIFT 12
558         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
559                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
560                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
561                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
562                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
563                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
564                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
565                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
566         };
567
568         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
569 #undef S_SHIFT
570 }
571
572 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
573 {
574         struct device_record *rec1;
575         struct device_record *rec2;
576
577         rec1 = rb_entry(node1, struct device_record, node);
578         rec2 = rb_entry(node2, struct device_record, node);
579         if (rec1->devid > rec2->devid)
580                 return -1;
581         else if (rec1->devid < rec2->devid)
582                 return 1;
583         else
584                 return 0;
585 }
586
587 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
588 {
589         struct inode_record *rec;
590         struct inode_backref *backref;
591         struct inode_backref *orig;
592         struct inode_backref *tmp;
593         struct orphan_data_extent *src_orphan;
594         struct orphan_data_extent *dst_orphan;
595         size_t size;
596         int ret;
597
598         rec = malloc(sizeof(*rec));
599         if (!rec)
600                 return ERR_PTR(-ENOMEM);
601         memcpy(rec, orig_rec, sizeof(*rec));
602         rec->refs = 1;
603         INIT_LIST_HEAD(&rec->backrefs);
604         INIT_LIST_HEAD(&rec->orphan_extents);
605         rec->holes = RB_ROOT;
606
607         list_for_each_entry(orig, &orig_rec->backrefs, list) {
608                 size = sizeof(*orig) + orig->namelen + 1;
609                 backref = malloc(size);
610                 if (!backref) {
611                         ret = -ENOMEM;
612                         goto cleanup;
613                 }
614                 memcpy(backref, orig, size);
615                 list_add_tail(&backref->list, &rec->backrefs);
616         }
617         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
618                 dst_orphan = malloc(sizeof(*dst_orphan));
619                 if (!dst_orphan) {
620                         ret = -ENOMEM;
621                         goto cleanup;
622                 }
623                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
624                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
625         }
626         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
627         BUG_ON(ret < 0);
628
629         return rec;
630
631 cleanup:
632         if (!list_empty(&rec->backrefs))
633                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
634                         list_del(&orig->list);
635                         free(orig);
636                 }
637
638         if (!list_empty(&rec->orphan_extents))
639                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
640                         list_del(&orig->list);
641                         free(orig);
642                 }
643
644         free(rec);
645
646         return ERR_PTR(ret);
647 }
648
649 static void print_orphan_data_extents(struct list_head *orphan_extents,
650                                       u64 objectid)
651 {
652         struct orphan_data_extent *orphan;
653
654         if (list_empty(orphan_extents))
655                 return;
656         printf("The following data extent is lost in tree %llu:\n",
657                objectid);
658         list_for_each_entry(orphan, orphan_extents, list) {
659                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
660                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
661                        orphan->disk_len);
662         }
663 }
664
665 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
666 {
667         u64 root_objectid = root->root_key.objectid;
668         int errors = rec->errors;
669
670         if (!errors)
671                 return;
672         /* reloc root errors, we print its corresponding fs root objectid*/
673         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
674                 root_objectid = root->root_key.offset;
675                 fprintf(stderr, "reloc");
676         }
677         fprintf(stderr, "root %llu inode %llu errors %x",
678                 (unsigned long long) root_objectid,
679                 (unsigned long long) rec->ino, rec->errors);
680
681         if (errors & I_ERR_NO_INODE_ITEM)
682                 fprintf(stderr, ", no inode item");
683         if (errors & I_ERR_NO_ORPHAN_ITEM)
684                 fprintf(stderr, ", no orphan item");
685         if (errors & I_ERR_DUP_INODE_ITEM)
686                 fprintf(stderr, ", dup inode item");
687         if (errors & I_ERR_DUP_DIR_INDEX)
688                 fprintf(stderr, ", dup dir index");
689         if (errors & I_ERR_ODD_DIR_ITEM)
690                 fprintf(stderr, ", odd dir item");
691         if (errors & I_ERR_ODD_FILE_EXTENT)
692                 fprintf(stderr, ", odd file extent");
693         if (errors & I_ERR_BAD_FILE_EXTENT)
694                 fprintf(stderr, ", bad file extent");
695         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
696                 fprintf(stderr, ", file extent overlap");
697         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
698                 fprintf(stderr, ", file extent discount");
699         if (errors & I_ERR_DIR_ISIZE_WRONG)
700                 fprintf(stderr, ", dir isize wrong");
701         if (errors & I_ERR_FILE_NBYTES_WRONG)
702                 fprintf(stderr, ", nbytes wrong");
703         if (errors & I_ERR_ODD_CSUM_ITEM)
704                 fprintf(stderr, ", odd csum item");
705         if (errors & I_ERR_SOME_CSUM_MISSING)
706                 fprintf(stderr, ", some csum missing");
707         if (errors & I_ERR_LINK_COUNT_WRONG)
708                 fprintf(stderr, ", link count wrong");
709         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
710                 fprintf(stderr, ", orphan file extent");
711         fprintf(stderr, "\n");
712         /* Print the orphan extents if needed */
713         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
714                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
715
716         /* Print the holes if needed */
717         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
718                 struct file_extent_hole *hole;
719                 struct rb_node *node;
720                 int found = 0;
721
722                 node = rb_first(&rec->holes);
723                 fprintf(stderr, "Found file extent holes:\n");
724                 while (node) {
725                         found = 1;
726                         hole = rb_entry(node, struct file_extent_hole, node);
727                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
728                                 hole->start, hole->len);
729                         node = rb_next(node);
730                 }
731                 if (!found)
732                         fprintf(stderr, "\tstart: 0, len: %llu\n",
733                                 round_up(rec->isize, root->sectorsize));
734         }
735 }
736
737 static void print_ref_error(int errors)
738 {
739         if (errors & REF_ERR_NO_DIR_ITEM)
740                 fprintf(stderr, ", no dir item");
741         if (errors & REF_ERR_NO_DIR_INDEX)
742                 fprintf(stderr, ", no dir index");
743         if (errors & REF_ERR_NO_INODE_REF)
744                 fprintf(stderr, ", no inode ref");
745         if (errors & REF_ERR_DUP_DIR_ITEM)
746                 fprintf(stderr, ", dup dir item");
747         if (errors & REF_ERR_DUP_DIR_INDEX)
748                 fprintf(stderr, ", dup dir index");
749         if (errors & REF_ERR_DUP_INODE_REF)
750                 fprintf(stderr, ", dup inode ref");
751         if (errors & REF_ERR_INDEX_UNMATCH)
752                 fprintf(stderr, ", index mismatch");
753         if (errors & REF_ERR_FILETYPE_UNMATCH)
754                 fprintf(stderr, ", filetype mismatch");
755         if (errors & REF_ERR_NAME_TOO_LONG)
756                 fprintf(stderr, ", name too long");
757         if (errors & REF_ERR_NO_ROOT_REF)
758                 fprintf(stderr, ", no root ref");
759         if (errors & REF_ERR_NO_ROOT_BACKREF)
760                 fprintf(stderr, ", no root backref");
761         if (errors & REF_ERR_DUP_ROOT_REF)
762                 fprintf(stderr, ", dup root ref");
763         if (errors & REF_ERR_DUP_ROOT_BACKREF)
764                 fprintf(stderr, ", dup root backref");
765         fprintf(stderr, "\n");
766 }
767
768 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
769                                           u64 ino, int mod)
770 {
771         struct ptr_node *node;
772         struct cache_extent *cache;
773         struct inode_record *rec = NULL;
774         int ret;
775
776         cache = lookup_cache_extent(inode_cache, ino, 1);
777         if (cache) {
778                 node = container_of(cache, struct ptr_node, cache);
779                 rec = node->data;
780                 if (mod && rec->refs > 1) {
781                         node->data = clone_inode_rec(rec);
782                         if (IS_ERR(node->data))
783                                 return node->data;
784                         rec->refs--;
785                         rec = node->data;
786                 }
787         } else if (mod) {
788                 rec = calloc(1, sizeof(*rec));
789                 if (!rec)
790                         return ERR_PTR(-ENOMEM);
791                 rec->ino = ino;
792                 rec->extent_start = (u64)-1;
793                 rec->refs = 1;
794                 INIT_LIST_HEAD(&rec->backrefs);
795                 INIT_LIST_HEAD(&rec->orphan_extents);
796                 rec->holes = RB_ROOT;
797
798                 node = malloc(sizeof(*node));
799                 if (!node) {
800                         free(rec);
801                         return ERR_PTR(-ENOMEM);
802                 }
803                 node->cache.start = ino;
804                 node->cache.size = 1;
805                 node->data = rec;
806
807                 if (ino == BTRFS_FREE_INO_OBJECTID)
808                         rec->found_link = 1;
809
810                 ret = insert_cache_extent(inode_cache, &node->cache);
811                 if (ret)
812                         return ERR_PTR(-EEXIST);
813         }
814         return rec;
815 }
816
817 static void free_orphan_data_extents(struct list_head *orphan_extents)
818 {
819         struct orphan_data_extent *orphan;
820
821         while (!list_empty(orphan_extents)) {
822                 orphan = list_entry(orphan_extents->next,
823                                     struct orphan_data_extent, list);
824                 list_del(&orphan->list);
825                 free(orphan);
826         }
827 }
828
829 static void free_inode_rec(struct inode_record *rec)
830 {
831         struct inode_backref *backref;
832
833         if (--rec->refs > 0)
834                 return;
835
836         while (!list_empty(&rec->backrefs)) {
837                 backref = list_entry(rec->backrefs.next,
838                                      struct inode_backref, list);
839                 list_del(&backref->list);
840                 free(backref);
841         }
842         free_orphan_data_extents(&rec->orphan_extents);
843         free_file_extent_holes(&rec->holes);
844         free(rec);
845 }
846
847 static int can_free_inode_rec(struct inode_record *rec)
848 {
849         if (!rec->errors && rec->checked && rec->found_inode_item &&
850             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
851                 return 1;
852         return 0;
853 }
854
855 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
856                                  struct inode_record *rec)
857 {
858         struct cache_extent *cache;
859         struct inode_backref *tmp, *backref;
860         struct ptr_node *node;
861         unsigned char filetype;
862
863         if (!rec->found_inode_item)
864                 return;
865
866         filetype = imode_to_type(rec->imode);
867         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
868                 if (backref->found_dir_item && backref->found_dir_index) {
869                         if (backref->filetype != filetype)
870                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
871                         if (!backref->errors && backref->found_inode_ref &&
872                             rec->nlink == rec->found_link) {
873                                 list_del(&backref->list);
874                                 free(backref);
875                         }
876                 }
877         }
878
879         if (!rec->checked || rec->merging)
880                 return;
881
882         if (S_ISDIR(rec->imode)) {
883                 if (rec->found_size != rec->isize)
884                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
885                 if (rec->found_file_extent)
886                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
887         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
888                 if (rec->found_dir_item)
889                         rec->errors |= I_ERR_ODD_DIR_ITEM;
890                 if (rec->found_size != rec->nbytes)
891                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
892                 if (rec->nlink > 0 && !no_holes &&
893                     (rec->extent_end < rec->isize ||
894                      first_extent_gap(&rec->holes) < rec->isize))
895                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
896         }
897
898         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
899                 if (rec->found_csum_item && rec->nodatasum)
900                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
901                 if (rec->some_csum_missing && !rec->nodatasum)
902                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
903         }
904
905         BUG_ON(rec->refs != 1);
906         if (can_free_inode_rec(rec)) {
907                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
908                 node = container_of(cache, struct ptr_node, cache);
909                 BUG_ON(node->data != rec);
910                 remove_cache_extent(inode_cache, &node->cache);
911                 free(node);
912                 free_inode_rec(rec);
913         }
914 }
915
916 static int check_orphan_item(struct btrfs_root *root, u64 ino)
917 {
918         struct btrfs_path path;
919         struct btrfs_key key;
920         int ret;
921
922         key.objectid = BTRFS_ORPHAN_OBJECTID;
923         key.type = BTRFS_ORPHAN_ITEM_KEY;
924         key.offset = ino;
925
926         btrfs_init_path(&path);
927         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
928         btrfs_release_path(&path);
929         if (ret > 0)
930                 ret = -ENOENT;
931         return ret;
932 }
933
934 static int process_inode_item(struct extent_buffer *eb,
935                               int slot, struct btrfs_key *key,
936                               struct shared_node *active_node)
937 {
938         struct inode_record *rec;
939         struct btrfs_inode_item *item;
940
941         rec = active_node->current;
942         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
943         if (rec->found_inode_item) {
944                 rec->errors |= I_ERR_DUP_INODE_ITEM;
945                 return 1;
946         }
947         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
948         rec->nlink = btrfs_inode_nlink(eb, item);
949         rec->isize = btrfs_inode_size(eb, item);
950         rec->nbytes = btrfs_inode_nbytes(eb, item);
951         rec->imode = btrfs_inode_mode(eb, item);
952         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
953                 rec->nodatasum = 1;
954         rec->found_inode_item = 1;
955         if (rec->nlink == 0)
956                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
957         maybe_free_inode_rec(&active_node->inode_cache, rec);
958         return 0;
959 }
960
961 static struct inode_backref *get_inode_backref(struct inode_record *rec,
962                                                 const char *name,
963                                                 int namelen, u64 dir)
964 {
965         struct inode_backref *backref;
966
967         list_for_each_entry(backref, &rec->backrefs, list) {
968                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
969                         break;
970                 if (backref->dir != dir || backref->namelen != namelen)
971                         continue;
972                 if (memcmp(name, backref->name, namelen))
973                         continue;
974                 return backref;
975         }
976
977         backref = malloc(sizeof(*backref) + namelen + 1);
978         if (!backref)
979                 return NULL;
980         memset(backref, 0, sizeof(*backref));
981         backref->dir = dir;
982         backref->namelen = namelen;
983         memcpy(backref->name, name, namelen);
984         backref->name[namelen] = '\0';
985         list_add_tail(&backref->list, &rec->backrefs);
986         return backref;
987 }
988
989 static int add_inode_backref(struct cache_tree *inode_cache,
990                              u64 ino, u64 dir, u64 index,
991                              const char *name, int namelen,
992                              int filetype, int itemtype, int errors)
993 {
994         struct inode_record *rec;
995         struct inode_backref *backref;
996
997         rec = get_inode_rec(inode_cache, ino, 1);
998         BUG_ON(IS_ERR(rec));
999         backref = get_inode_backref(rec, name, namelen, dir);
1000         BUG_ON(!backref);
1001         if (errors)
1002                 backref->errors |= errors;
1003         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1004                 if (backref->found_dir_index)
1005                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1006                 if (backref->found_inode_ref && backref->index != index)
1007                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1008                 if (backref->found_dir_item && backref->filetype != filetype)
1009                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1010
1011                 backref->index = index;
1012                 backref->filetype = filetype;
1013                 backref->found_dir_index = 1;
1014         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1015                 rec->found_link++;
1016                 if (backref->found_dir_item)
1017                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1018                 if (backref->found_dir_index && backref->filetype != filetype)
1019                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1020
1021                 backref->filetype = filetype;
1022                 backref->found_dir_item = 1;
1023         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1024                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1025                 if (backref->found_inode_ref)
1026                         backref->errors |= REF_ERR_DUP_INODE_REF;
1027                 if (backref->found_dir_index && backref->index != index)
1028                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1029                 else
1030                         backref->index = index;
1031
1032                 backref->ref_type = itemtype;
1033                 backref->found_inode_ref = 1;
1034         } else {
1035                 BUG_ON(1);
1036         }
1037
1038         maybe_free_inode_rec(inode_cache, rec);
1039         return 0;
1040 }
1041
1042 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1043                             struct cache_tree *dst_cache)
1044 {
1045         struct inode_backref *backref;
1046         u32 dir_count = 0;
1047         int ret = 0;
1048
1049         dst->merging = 1;
1050         list_for_each_entry(backref, &src->backrefs, list) {
1051                 if (backref->found_dir_index) {
1052                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1053                                         backref->index, backref->name,
1054                                         backref->namelen, backref->filetype,
1055                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1056                 }
1057                 if (backref->found_dir_item) {
1058                         dir_count++;
1059                         add_inode_backref(dst_cache, dst->ino,
1060                                         backref->dir, 0, backref->name,
1061                                         backref->namelen, backref->filetype,
1062                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1063                 }
1064                 if (backref->found_inode_ref) {
1065                         add_inode_backref(dst_cache, dst->ino,
1066                                         backref->dir, backref->index,
1067                                         backref->name, backref->namelen, 0,
1068                                         backref->ref_type, backref->errors);
1069                 }
1070         }
1071
1072         if (src->found_dir_item)
1073                 dst->found_dir_item = 1;
1074         if (src->found_file_extent)
1075                 dst->found_file_extent = 1;
1076         if (src->found_csum_item)
1077                 dst->found_csum_item = 1;
1078         if (src->some_csum_missing)
1079                 dst->some_csum_missing = 1;
1080         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1081                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1082                 if (ret < 0)
1083                         return ret;
1084         }
1085
1086         BUG_ON(src->found_link < dir_count);
1087         dst->found_link += src->found_link - dir_count;
1088         dst->found_size += src->found_size;
1089         if (src->extent_start != (u64)-1) {
1090                 if (dst->extent_start == (u64)-1) {
1091                         dst->extent_start = src->extent_start;
1092                         dst->extent_end = src->extent_end;
1093                 } else {
1094                         if (dst->extent_end > src->extent_start)
1095                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1096                         else if (dst->extent_end < src->extent_start) {
1097                                 ret = add_file_extent_hole(&dst->holes,
1098                                         dst->extent_end,
1099                                         src->extent_start - dst->extent_end);
1100                         }
1101                         if (dst->extent_end < src->extent_end)
1102                                 dst->extent_end = src->extent_end;
1103                 }
1104         }
1105
1106         dst->errors |= src->errors;
1107         if (src->found_inode_item) {
1108                 if (!dst->found_inode_item) {
1109                         dst->nlink = src->nlink;
1110                         dst->isize = src->isize;
1111                         dst->nbytes = src->nbytes;
1112                         dst->imode = src->imode;
1113                         dst->nodatasum = src->nodatasum;
1114                         dst->found_inode_item = 1;
1115                 } else {
1116                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1117                 }
1118         }
1119         dst->merging = 0;
1120
1121         return 0;
1122 }
1123
1124 static int splice_shared_node(struct shared_node *src_node,
1125                               struct shared_node *dst_node)
1126 {
1127         struct cache_extent *cache;
1128         struct ptr_node *node, *ins;
1129         struct cache_tree *src, *dst;
1130         struct inode_record *rec, *conflict;
1131         u64 current_ino = 0;
1132         int splice = 0;
1133         int ret;
1134
1135         if (--src_node->refs == 0)
1136                 splice = 1;
1137         if (src_node->current)
1138                 current_ino = src_node->current->ino;
1139
1140         src = &src_node->root_cache;
1141         dst = &dst_node->root_cache;
1142 again:
1143         cache = search_cache_extent(src, 0);
1144         while (cache) {
1145                 node = container_of(cache, struct ptr_node, cache);
1146                 rec = node->data;
1147                 cache = next_cache_extent(cache);
1148
1149                 if (splice) {
1150                         remove_cache_extent(src, &node->cache);
1151                         ins = node;
1152                 } else {
1153                         ins = malloc(sizeof(*ins));
1154                         BUG_ON(!ins);
1155                         ins->cache.start = node->cache.start;
1156                         ins->cache.size = node->cache.size;
1157                         ins->data = rec;
1158                         rec->refs++;
1159                 }
1160                 ret = insert_cache_extent(dst, &ins->cache);
1161                 if (ret == -EEXIST) {
1162                         conflict = get_inode_rec(dst, rec->ino, 1);
1163                         BUG_ON(IS_ERR(conflict));
1164                         merge_inode_recs(rec, conflict, dst);
1165                         if (rec->checked) {
1166                                 conflict->checked = 1;
1167                                 if (dst_node->current == conflict)
1168                                         dst_node->current = NULL;
1169                         }
1170                         maybe_free_inode_rec(dst, conflict);
1171                         free_inode_rec(rec);
1172                         free(ins);
1173                 } else {
1174                         BUG_ON(ret);
1175                 }
1176         }
1177
1178         if (src == &src_node->root_cache) {
1179                 src = &src_node->inode_cache;
1180                 dst = &dst_node->inode_cache;
1181                 goto again;
1182         }
1183
1184         if (current_ino > 0 && (!dst_node->current ||
1185             current_ino > dst_node->current->ino)) {
1186                 if (dst_node->current) {
1187                         dst_node->current->checked = 1;
1188                         maybe_free_inode_rec(dst, dst_node->current);
1189                 }
1190                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1191                 BUG_ON(IS_ERR(dst_node->current));
1192         }
1193         return 0;
1194 }
1195
1196 static void free_inode_ptr(struct cache_extent *cache)
1197 {
1198         struct ptr_node *node;
1199         struct inode_record *rec;
1200
1201         node = container_of(cache, struct ptr_node, cache);
1202         rec = node->data;
1203         free_inode_rec(rec);
1204         free(node);
1205 }
1206
1207 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1208
1209 static struct shared_node *find_shared_node(struct cache_tree *shared,
1210                                             u64 bytenr)
1211 {
1212         struct cache_extent *cache;
1213         struct shared_node *node;
1214
1215         cache = lookup_cache_extent(shared, bytenr, 1);
1216         if (cache) {
1217                 node = container_of(cache, struct shared_node, cache);
1218                 return node;
1219         }
1220         return NULL;
1221 }
1222
1223 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1224 {
1225         int ret;
1226         struct shared_node *node;
1227
1228         node = calloc(1, sizeof(*node));
1229         if (!node)
1230                 return -ENOMEM;
1231         node->cache.start = bytenr;
1232         node->cache.size = 1;
1233         cache_tree_init(&node->root_cache);
1234         cache_tree_init(&node->inode_cache);
1235         node->refs = refs;
1236
1237         ret = insert_cache_extent(shared, &node->cache);
1238
1239         return ret;
1240 }
1241
1242 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1243                              struct walk_control *wc, int level)
1244 {
1245         struct shared_node *node;
1246         struct shared_node *dest;
1247         int ret;
1248
1249         if (level == wc->active_node)
1250                 return 0;
1251
1252         BUG_ON(wc->active_node <= level);
1253         node = find_shared_node(&wc->shared, bytenr);
1254         if (!node) {
1255                 ret = add_shared_node(&wc->shared, bytenr, refs);
1256                 BUG_ON(ret);
1257                 node = find_shared_node(&wc->shared, bytenr);
1258                 wc->nodes[level] = node;
1259                 wc->active_node = level;
1260                 return 0;
1261         }
1262
1263         if (wc->root_level == wc->active_node &&
1264             btrfs_root_refs(&root->root_item) == 0) {
1265                 if (--node->refs == 0) {
1266                         free_inode_recs_tree(&node->root_cache);
1267                         free_inode_recs_tree(&node->inode_cache);
1268                         remove_cache_extent(&wc->shared, &node->cache);
1269                         free(node);
1270                 }
1271                 return 1;
1272         }
1273
1274         dest = wc->nodes[wc->active_node];
1275         splice_shared_node(node, dest);
1276         if (node->refs == 0) {
1277                 remove_cache_extent(&wc->shared, &node->cache);
1278                 free(node);
1279         }
1280         return 1;
1281 }
1282
1283 static int leave_shared_node(struct btrfs_root *root,
1284                              struct walk_control *wc, int level)
1285 {
1286         struct shared_node *node;
1287         struct shared_node *dest;
1288         int i;
1289
1290         if (level == wc->root_level)
1291                 return 0;
1292
1293         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1294                 if (wc->nodes[i])
1295                         break;
1296         }
1297         BUG_ON(i >= BTRFS_MAX_LEVEL);
1298
1299         node = wc->nodes[wc->active_node];
1300         wc->nodes[wc->active_node] = NULL;
1301         wc->active_node = i;
1302
1303         dest = wc->nodes[wc->active_node];
1304         if (wc->active_node < wc->root_level ||
1305             btrfs_root_refs(&root->root_item) > 0) {
1306                 BUG_ON(node->refs <= 1);
1307                 splice_shared_node(node, dest);
1308         } else {
1309                 BUG_ON(node->refs < 2);
1310                 node->refs--;
1311         }
1312         return 0;
1313 }
1314
1315 /*
1316  * Returns:
1317  * < 0 - on error
1318  * 1   - if the root with id child_root_id is a child of root parent_root_id
1319  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1320  *       has other root(s) as parent(s)
1321  * 2   - if the root child_root_id doesn't have any parent roots
1322  */
1323 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1324                          u64 child_root_id)
1325 {
1326         struct btrfs_path path;
1327         struct btrfs_key key;
1328         struct extent_buffer *leaf;
1329         int has_parent = 0;
1330         int ret;
1331
1332         btrfs_init_path(&path);
1333
1334         key.objectid = parent_root_id;
1335         key.type = BTRFS_ROOT_REF_KEY;
1336         key.offset = child_root_id;
1337         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1338                                 0, 0);
1339         if (ret < 0)
1340                 return ret;
1341         btrfs_release_path(&path);
1342         if (!ret)
1343                 return 1;
1344
1345         key.objectid = child_root_id;
1346         key.type = BTRFS_ROOT_BACKREF_KEY;
1347         key.offset = 0;
1348         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1349                                 0, 0);
1350         if (ret < 0)
1351                 goto out;
1352
1353         while (1) {
1354                 leaf = path.nodes[0];
1355                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1356                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1357                         if (ret)
1358                                 break;
1359                         leaf = path.nodes[0];
1360                 }
1361
1362                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1363                 if (key.objectid != child_root_id ||
1364                     key.type != BTRFS_ROOT_BACKREF_KEY)
1365                         break;
1366
1367                 has_parent = 1;
1368
1369                 if (key.offset == parent_root_id) {
1370                         btrfs_release_path(&path);
1371                         return 1;
1372                 }
1373
1374                 path.slots[0]++;
1375         }
1376 out:
1377         btrfs_release_path(&path);
1378         if (ret < 0)
1379                 return ret;
1380         return has_parent ? 0 : 2;
1381 }
1382
1383 static int process_dir_item(struct btrfs_root *root,
1384                             struct extent_buffer *eb,
1385                             int slot, struct btrfs_key *key,
1386                             struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u32 data_len;
1393         int error;
1394         int nritems = 0;
1395         int filetype;
1396         struct btrfs_dir_item *di;
1397         struct inode_record *rec;
1398         struct cache_tree *root_cache;
1399         struct cache_tree *inode_cache;
1400         struct btrfs_key location;
1401         char namebuf[BTRFS_NAME_LEN];
1402
1403         root_cache = &active_node->root_cache;
1404         inode_cache = &active_node->inode_cache;
1405         rec = active_node->current;
1406         rec->found_dir_item = 1;
1407
1408         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1409         total = btrfs_item_size_nr(eb, slot);
1410         while (cur < total) {
1411                 nritems++;
1412                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1413                 name_len = btrfs_dir_name_len(eb, di);
1414                 data_len = btrfs_dir_data_len(eb, di);
1415                 filetype = btrfs_dir_type(eb, di);
1416
1417                 rec->found_size += name_len;
1418                 if (name_len <= BTRFS_NAME_LEN) {
1419                         len = name_len;
1420                         error = 0;
1421                 } else {
1422                         len = BTRFS_NAME_LEN;
1423                         error = REF_ERR_NAME_TOO_LONG;
1424                 }
1425                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1426
1427                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1428                         add_inode_backref(inode_cache, location.objectid,
1429                                           key->objectid, key->offset, namebuf,
1430                                           len, filetype, key->type, error);
1431                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1432                         add_inode_backref(root_cache, location.objectid,
1433                                           key->objectid, key->offset,
1434                                           namebuf, len, filetype,
1435                                           key->type, error);
1436                 } else {
1437                         fprintf(stderr, "invalid location in dir item %u\n",
1438                                 location.type);
1439                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1440                                           key->objectid, key->offset, namebuf,
1441                                           len, filetype, key->type, error);
1442                 }
1443
1444                 len = sizeof(*di) + name_len + data_len;
1445                 di = (struct btrfs_dir_item *)((char *)di + len);
1446                 cur += len;
1447         }
1448         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1449                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1450
1451         return 0;
1452 }
1453
1454 static int process_inode_ref(struct extent_buffer *eb,
1455                              int slot, struct btrfs_key *key,
1456                              struct shared_node *active_node)
1457 {
1458         u32 total;
1459         u32 cur = 0;
1460         u32 len;
1461         u32 name_len;
1462         u64 index;
1463         int error;
1464         struct cache_tree *inode_cache;
1465         struct btrfs_inode_ref *ref;
1466         char namebuf[BTRFS_NAME_LEN];
1467
1468         inode_cache = &active_node->inode_cache;
1469
1470         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1471         total = btrfs_item_size_nr(eb, slot);
1472         while (cur < total) {
1473                 name_len = btrfs_inode_ref_name_len(eb, ref);
1474                 index = btrfs_inode_ref_index(eb, ref);
1475                 if (name_len <= BTRFS_NAME_LEN) {
1476                         len = name_len;
1477                         error = 0;
1478                 } else {
1479                         len = BTRFS_NAME_LEN;
1480                         error = REF_ERR_NAME_TOO_LONG;
1481                 }
1482                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1483                 add_inode_backref(inode_cache, key->objectid, key->offset,
1484                                   index, namebuf, len, 0, key->type, error);
1485
1486                 len = sizeof(*ref) + name_len;
1487                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1488                 cur += len;
1489         }
1490         return 0;
1491 }
1492
1493 static int process_inode_extref(struct extent_buffer *eb,
1494                                 int slot, struct btrfs_key *key,
1495                                 struct shared_node *active_node)
1496 {
1497         u32 total;
1498         u32 cur = 0;
1499         u32 len;
1500         u32 name_len;
1501         u64 index;
1502         u64 parent;
1503         int error;
1504         struct cache_tree *inode_cache;
1505         struct btrfs_inode_extref *extref;
1506         char namebuf[BTRFS_NAME_LEN];
1507
1508         inode_cache = &active_node->inode_cache;
1509
1510         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1511         total = btrfs_item_size_nr(eb, slot);
1512         while (cur < total) {
1513                 name_len = btrfs_inode_extref_name_len(eb, extref);
1514                 index = btrfs_inode_extref_index(eb, extref);
1515                 parent = btrfs_inode_extref_parent(eb, extref);
1516                 if (name_len <= BTRFS_NAME_LEN) {
1517                         len = name_len;
1518                         error = 0;
1519                 } else {
1520                         len = BTRFS_NAME_LEN;
1521                         error = REF_ERR_NAME_TOO_LONG;
1522                 }
1523                 read_extent_buffer(eb, namebuf,
1524                                    (unsigned long)(extref + 1), len);
1525                 add_inode_backref(inode_cache, key->objectid, parent,
1526                                   index, namebuf, len, 0, key->type, error);
1527
1528                 len = sizeof(*extref) + name_len;
1529                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1530                 cur += len;
1531         }
1532         return 0;
1533
1534 }
1535
1536 static int count_csum_range(struct btrfs_root *root, u64 start,
1537                             u64 len, u64 *found)
1538 {
1539         struct btrfs_key key;
1540         struct btrfs_path path;
1541         struct extent_buffer *leaf;
1542         int ret;
1543         size_t size;
1544         *found = 0;
1545         u64 csum_end;
1546         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1547
1548         btrfs_init_path(&path);
1549
1550         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1551         key.offset = start;
1552         key.type = BTRFS_EXTENT_CSUM_KEY;
1553
1554         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1555                                 &key, &path, 0, 0);
1556         if (ret < 0)
1557                 goto out;
1558         if (ret > 0 && path.slots[0] > 0) {
1559                 leaf = path.nodes[0];
1560                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1561                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1562                     key.type == BTRFS_EXTENT_CSUM_KEY)
1563                         path.slots[0]--;
1564         }
1565
1566         while (len > 0) {
1567                 leaf = path.nodes[0];
1568                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1569                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1570                         if (ret > 0)
1571                                 break;
1572                         else if (ret < 0)
1573                                 goto out;
1574                         leaf = path.nodes[0];
1575                 }
1576
1577                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1578                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1579                     key.type != BTRFS_EXTENT_CSUM_KEY)
1580                         break;
1581
1582                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1583                 if (key.offset >= start + len)
1584                         break;
1585
1586                 if (key.offset > start)
1587                         start = key.offset;
1588
1589                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1590                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1591                 if (csum_end > start) {
1592                         size = min(csum_end - start, len);
1593                         len -= size;
1594                         start += size;
1595                         *found += size;
1596                 }
1597
1598                 path.slots[0]++;
1599         }
1600 out:
1601         btrfs_release_path(&path);
1602         if (ret < 0)
1603                 return ret;
1604         return 0;
1605 }
1606
1607 static int process_file_extent(struct btrfs_root *root,
1608                                 struct extent_buffer *eb,
1609                                 int slot, struct btrfs_key *key,
1610                                 struct shared_node *active_node)
1611 {
1612         struct inode_record *rec;
1613         struct btrfs_file_extent_item *fi;
1614         u64 num_bytes = 0;
1615         u64 disk_bytenr = 0;
1616         u64 extent_offset = 0;
1617         u64 mask = root->sectorsize - 1;
1618         int extent_type;
1619         int ret;
1620
1621         rec = active_node->current;
1622         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1623         rec->found_file_extent = 1;
1624
1625         if (rec->extent_start == (u64)-1) {
1626                 rec->extent_start = key->offset;
1627                 rec->extent_end = key->offset;
1628         }
1629
1630         if (rec->extent_end > key->offset)
1631                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1632         else if (rec->extent_end < key->offset) {
1633                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1634                                            key->offset - rec->extent_end);
1635                 if (ret < 0)
1636                         return ret;
1637         }
1638
1639         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1640         extent_type = btrfs_file_extent_type(eb, fi);
1641
1642         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1643                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1644                 if (num_bytes == 0)
1645                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1646                 rec->found_size += num_bytes;
1647                 num_bytes = (num_bytes + mask) & ~mask;
1648         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1649                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1650                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1651                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1652                 extent_offset = btrfs_file_extent_offset(eb, fi);
1653                 if (num_bytes == 0 || (num_bytes & mask))
1654                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1655                 if (num_bytes + extent_offset >
1656                     btrfs_file_extent_ram_bytes(eb, fi))
1657                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1658                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1659                     (btrfs_file_extent_compression(eb, fi) ||
1660                      btrfs_file_extent_encryption(eb, fi) ||
1661                      btrfs_file_extent_other_encoding(eb, fi)))
1662                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1663                 if (disk_bytenr > 0)
1664                         rec->found_size += num_bytes;
1665         } else {
1666                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1667         }
1668         rec->extent_end = key->offset + num_bytes;
1669
1670         /*
1671          * The data reloc tree will copy full extents into its inode and then
1672          * copy the corresponding csums.  Because the extent it copied could be
1673          * a preallocated extent that hasn't been written to yet there may be no
1674          * csums to copy, ergo we won't have csums for our file extent.  This is
1675          * ok so just don't bother checking csums if the inode belongs to the
1676          * data reloc tree.
1677          */
1678         if (disk_bytenr > 0 &&
1679             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1680                 u64 found;
1681                 if (btrfs_file_extent_compression(eb, fi))
1682                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1683                 else
1684                         disk_bytenr += extent_offset;
1685
1686                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1687                 if (ret < 0)
1688                         return ret;
1689                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1690                         if (found > 0)
1691                                 rec->found_csum_item = 1;
1692                         if (found < num_bytes)
1693                                 rec->some_csum_missing = 1;
1694                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1695                         if (found > 0)
1696                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1697                 }
1698         }
1699         return 0;
1700 }
1701
1702 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1703                             struct walk_control *wc)
1704 {
1705         struct btrfs_key key;
1706         u32 nritems;
1707         int i;
1708         int ret = 0;
1709         struct cache_tree *inode_cache;
1710         struct shared_node *active_node;
1711
1712         if (wc->root_level == wc->active_node &&
1713             btrfs_root_refs(&root->root_item) == 0)
1714                 return 0;
1715
1716         active_node = wc->nodes[wc->active_node];
1717         inode_cache = &active_node->inode_cache;
1718         nritems = btrfs_header_nritems(eb);
1719         for (i = 0; i < nritems; i++) {
1720                 btrfs_item_key_to_cpu(eb, &key, i);
1721
1722                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1723                         continue;
1724                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1725                         continue;
1726
1727                 if (active_node->current == NULL ||
1728                     active_node->current->ino < key.objectid) {
1729                         if (active_node->current) {
1730                                 active_node->current->checked = 1;
1731                                 maybe_free_inode_rec(inode_cache,
1732                                                      active_node->current);
1733                         }
1734                         active_node->current = get_inode_rec(inode_cache,
1735                                                              key.objectid, 1);
1736                         BUG_ON(IS_ERR(active_node->current));
1737                 }
1738                 switch (key.type) {
1739                 case BTRFS_DIR_ITEM_KEY:
1740                 case BTRFS_DIR_INDEX_KEY:
1741                         ret = process_dir_item(root, eb, i, &key, active_node);
1742                         break;
1743                 case BTRFS_INODE_REF_KEY:
1744                         ret = process_inode_ref(eb, i, &key, active_node);
1745                         break;
1746                 case BTRFS_INODE_EXTREF_KEY:
1747                         ret = process_inode_extref(eb, i, &key, active_node);
1748                         break;
1749                 case BTRFS_INODE_ITEM_KEY:
1750                         ret = process_inode_item(eb, i, &key, active_node);
1751                         break;
1752                 case BTRFS_EXTENT_DATA_KEY:
1753                         ret = process_file_extent(root, eb, i, &key,
1754                                                   active_node);
1755                         break;
1756                 default:
1757                         break;
1758                 };
1759         }
1760         return ret;
1761 }
1762
1763 static void reada_walk_down(struct btrfs_root *root,
1764                             struct extent_buffer *node, int slot)
1765 {
1766         u64 bytenr;
1767         u64 ptr_gen;
1768         u32 nritems;
1769         u32 blocksize;
1770         int i;
1771         int level;
1772
1773         level = btrfs_header_level(node);
1774         if (level != 1)
1775                 return;
1776
1777         nritems = btrfs_header_nritems(node);
1778         blocksize = root->nodesize;
1779         for (i = slot; i < nritems; i++) {
1780                 bytenr = btrfs_node_blockptr(node, i);
1781                 ptr_gen = btrfs_node_ptr_generation(node, i);
1782                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1783         }
1784 }
1785
1786 /*
1787  * Check the child node/leaf by the following condition:
1788  * 1. the first item key of the node/leaf should be the same with the one
1789  *    in parent.
1790  * 2. block in parent node should match the child node/leaf.
1791  * 3. generation of parent node and child's header should be consistent.
1792  *
1793  * Or the child node/leaf pointed by the key in parent is not valid.
1794  *
1795  * We hope to check leaf owner too, but since subvol may share leaves,
1796  * which makes leaf owner check not so strong, key check should be
1797  * sufficient enough for that case.
1798  */
1799 static int check_child_node(struct btrfs_root *root,
1800                             struct extent_buffer *parent, int slot,
1801                             struct extent_buffer *child)
1802 {
1803         struct btrfs_key parent_key;
1804         struct btrfs_key child_key;
1805         int ret = 0;
1806
1807         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1808         if (btrfs_header_level(child) == 0)
1809                 btrfs_item_key_to_cpu(child, &child_key, 0);
1810         else
1811                 btrfs_node_key_to_cpu(child, &child_key, 0);
1812
1813         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1814                 ret = -EINVAL;
1815                 fprintf(stderr,
1816                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1817                         parent_key.objectid, parent_key.type, parent_key.offset,
1818                         child_key.objectid, child_key.type, child_key.offset);
1819         }
1820         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1821                 ret = -EINVAL;
1822                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1823                         btrfs_node_blockptr(parent, slot),
1824                         btrfs_header_bytenr(child));
1825         }
1826         if (btrfs_node_ptr_generation(parent, slot) !=
1827             btrfs_header_generation(child)) {
1828                 ret = -EINVAL;
1829                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1830                         btrfs_header_generation(child),
1831                         btrfs_node_ptr_generation(parent, slot));
1832         }
1833         return ret;
1834 }
1835
1836 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1837                           struct walk_control *wc, int *level)
1838 {
1839         enum btrfs_tree_block_status status;
1840         u64 bytenr;
1841         u64 ptr_gen;
1842         struct extent_buffer *next;
1843         struct extent_buffer *cur;
1844         u32 blocksize;
1845         int ret, err = 0;
1846         u64 refs;
1847
1848         WARN_ON(*level < 0);
1849         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1850         ret = btrfs_lookup_extent_info(NULL, root,
1851                                        path->nodes[*level]->start,
1852                                        *level, 1, &refs, NULL);
1853         if (ret < 0) {
1854                 err = ret;
1855                 goto out;
1856         }
1857
1858         if (refs > 1) {
1859                 ret = enter_shared_node(root, path->nodes[*level]->start,
1860                                         refs, wc, *level);
1861                 if (ret > 0) {
1862                         err = ret;
1863                         goto out;
1864                 }
1865         }
1866
1867         while (*level >= 0) {
1868                 WARN_ON(*level < 0);
1869                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1870                 cur = path->nodes[*level];
1871
1872                 if (btrfs_header_level(cur) != *level)
1873                         WARN_ON(1);
1874
1875                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1876                         break;
1877                 if (*level == 0) {
1878                         ret = process_one_leaf(root, cur, wc);
1879                         if (ret < 0)
1880                                 err = ret;
1881                         break;
1882                 }
1883                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1884                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1885                 blocksize = root->nodesize;
1886                 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
1887                                                1, &refs, NULL);
1888                 if (ret < 0)
1889                         refs = 0;
1890
1891                 if (refs > 1) {
1892                         ret = enter_shared_node(root, bytenr, refs,
1893                                                 wc, *level - 1);
1894                         if (ret > 0) {
1895                                 path->slots[*level]++;
1896                                 continue;
1897                         }
1898                 }
1899
1900                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1901                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
1902                         free_extent_buffer(next);
1903                         reada_walk_down(root, cur, path->slots[*level]);
1904                         next = read_tree_block(root, bytenr, blocksize,
1905                                                ptr_gen);
1906                         if (!extent_buffer_uptodate(next)) {
1907                                 struct btrfs_key node_key;
1908
1909                                 btrfs_node_key_to_cpu(path->nodes[*level],
1910                                                       &node_key,
1911                                                       path->slots[*level]);
1912                                 btrfs_add_corrupt_extent_record(root->fs_info,
1913                                                 &node_key,
1914                                                 path->nodes[*level]->start,
1915                                                 root->nodesize, *level);
1916                                 err = -EIO;
1917                                 goto out;
1918                         }
1919                 }
1920
1921                 ret = check_child_node(root, cur, path->slots[*level], next);
1922                 if (ret) {
1923                         err = ret;
1924                         goto out;
1925                 }
1926
1927                 if (btrfs_is_leaf(next))
1928                         status = btrfs_check_leaf(root, NULL, next);
1929                 else
1930                         status = btrfs_check_node(root, NULL, next);
1931                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
1932                         free_extent_buffer(next);
1933                         err = -EIO;
1934                         goto out;
1935                 }
1936
1937                 *level = *level - 1;
1938                 free_extent_buffer(path->nodes[*level]);
1939                 path->nodes[*level] = next;
1940                 path->slots[*level] = 0;
1941         }
1942 out:
1943         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
1944         return err;
1945 }
1946
1947 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
1948                         struct walk_control *wc, int *level)
1949 {
1950         int i;
1951         struct extent_buffer *leaf;
1952
1953         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1954                 leaf = path->nodes[i];
1955                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
1956                         path->slots[i]++;
1957                         *level = i;
1958                         return 0;
1959                 } else {
1960                         free_extent_buffer(path->nodes[*level]);
1961                         path->nodes[*level] = NULL;
1962                         BUG_ON(*level > wc->active_node);
1963                         if (*level == wc->active_node)
1964                                 leave_shared_node(root, wc, *level);
1965                         *level = i + 1;
1966                 }
1967         }
1968         return 1;
1969 }
1970
1971 static int check_root_dir(struct inode_record *rec)
1972 {
1973         struct inode_backref *backref;
1974         int ret = -1;
1975
1976         if (!rec->found_inode_item || rec->errors)
1977                 goto out;
1978         if (rec->nlink != 1 || rec->found_link != 0)
1979                 goto out;
1980         if (list_empty(&rec->backrefs))
1981                 goto out;
1982         backref = list_entry(rec->backrefs.next, struct inode_backref, list);
1983         if (!backref->found_inode_ref)
1984                 goto out;
1985         if (backref->index != 0 || backref->namelen != 2 ||
1986             memcmp(backref->name, "..", 2))
1987                 goto out;
1988         if (backref->found_dir_index || backref->found_dir_item)
1989                 goto out;
1990         ret = 0;
1991 out:
1992         return ret;
1993 }
1994
1995 static int repair_inode_isize(struct btrfs_trans_handle *trans,
1996                               struct btrfs_root *root, struct btrfs_path *path,
1997                               struct inode_record *rec)
1998 {
1999         struct btrfs_inode_item *ei;
2000         struct btrfs_key key;
2001         int ret;
2002
2003         key.objectid = rec->ino;
2004         key.type = BTRFS_INODE_ITEM_KEY;
2005         key.offset = (u64)-1;
2006
2007         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2008         if (ret < 0)
2009                 goto out;
2010         if (ret) {
2011                 if (!path->slots[0]) {
2012                         ret = -ENOENT;
2013                         goto out;
2014                 }
2015                 path->slots[0]--;
2016                 ret = 0;
2017         }
2018         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2019         if (key.objectid != rec->ino) {
2020                 ret = -ENOENT;
2021                 goto out;
2022         }
2023
2024         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2025                             struct btrfs_inode_item);
2026         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2027         btrfs_mark_buffer_dirty(path->nodes[0]);
2028         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2029         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2030                root->root_key.objectid);
2031 out:
2032         btrfs_release_path(path);
2033         return ret;
2034 }
2035
2036 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2037                                     struct btrfs_root *root,
2038                                     struct btrfs_path *path,
2039                                     struct inode_record *rec)
2040 {
2041         int ret;
2042
2043         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2044         btrfs_release_path(path);
2045         if (!ret)
2046                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2047         return ret;
2048 }
2049
2050 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2051                                struct btrfs_root *root,
2052                                struct btrfs_path *path,
2053                                struct inode_record *rec)
2054 {
2055         struct btrfs_inode_item *ei;
2056         struct btrfs_key key;
2057         int ret = 0;
2058
2059         key.objectid = rec->ino;
2060         key.type = BTRFS_INODE_ITEM_KEY;
2061         key.offset = 0;
2062
2063         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2064         if (ret) {
2065                 if (ret > 0)
2066                         ret = -ENOENT;
2067                 goto out;
2068         }
2069
2070         /* Since ret == 0, no need to check anything */
2071         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2072                             struct btrfs_inode_item);
2073         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2074         btrfs_mark_buffer_dirty(path->nodes[0]);
2075         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2076         printf("reset nbytes for ino %llu root %llu\n",
2077                rec->ino, root->root_key.objectid);
2078 out:
2079         btrfs_release_path(path);
2080         return ret;
2081 }
2082
2083 static int add_missing_dir_index(struct btrfs_root *root,
2084                                  struct cache_tree *inode_cache,
2085                                  struct inode_record *rec,
2086                                  struct inode_backref *backref)
2087 {
2088         struct btrfs_path *path;
2089         struct btrfs_trans_handle *trans;
2090         struct btrfs_dir_item *dir_item;
2091         struct extent_buffer *leaf;
2092         struct btrfs_key key;
2093         struct btrfs_disk_key disk_key;
2094         struct inode_record *dir_rec;
2095         unsigned long name_ptr;
2096         u32 data_size = sizeof(*dir_item) + backref->namelen;
2097         int ret;
2098
2099         path = btrfs_alloc_path();
2100         if (!path)
2101                 return -ENOMEM;
2102
2103         trans = btrfs_start_transaction(root, 1);
2104         if (IS_ERR(trans)) {
2105                 btrfs_free_path(path);
2106                 return PTR_ERR(trans);
2107         }
2108
2109         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2110                 (unsigned long long)rec->ino);
2111         key.objectid = backref->dir;
2112         key.type = BTRFS_DIR_INDEX_KEY;
2113         key.offset = backref->index;
2114
2115         ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2116         BUG_ON(ret);
2117
2118         leaf = path->nodes[0];
2119         dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2120
2121         disk_key.objectid = cpu_to_le64(rec->ino);
2122         disk_key.type = BTRFS_INODE_ITEM_KEY;
2123         disk_key.offset = 0;
2124
2125         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2126         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2127         btrfs_set_dir_data_len(leaf, dir_item, 0);
2128         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2129         name_ptr = (unsigned long)(dir_item + 1);
2130         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2131         btrfs_mark_buffer_dirty(leaf);
2132         btrfs_free_path(path);
2133         btrfs_commit_transaction(trans, root);
2134
2135         backref->found_dir_index = 1;
2136         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2137         BUG_ON(IS_ERR(dir_rec));
2138         if (!dir_rec)
2139                 return 0;
2140         dir_rec->found_size += backref->namelen;
2141         if (dir_rec->found_size == dir_rec->isize &&
2142             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2143                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2144         if (dir_rec->found_size != dir_rec->isize)
2145                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2146
2147         return 0;
2148 }
2149
2150 static int delete_dir_index(struct btrfs_root *root,
2151                             struct cache_tree *inode_cache,
2152                             struct inode_record *rec,
2153                             struct inode_backref *backref)
2154 {
2155         struct btrfs_trans_handle *trans;
2156         struct btrfs_dir_item *di;
2157         struct btrfs_path *path;
2158         int ret = 0;
2159
2160         path = btrfs_alloc_path();
2161         if (!path)
2162                 return -ENOMEM;
2163
2164         trans = btrfs_start_transaction(root, 1);
2165         if (IS_ERR(trans)) {
2166                 btrfs_free_path(path);
2167                 return PTR_ERR(trans);
2168         }
2169
2170
2171         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2172                 (unsigned long long)backref->dir,
2173                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2174                 (unsigned long long)root->objectid);
2175
2176         di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2177                                     backref->name, backref->namelen,
2178                                     backref->index, -1);
2179         if (IS_ERR(di)) {
2180                 ret = PTR_ERR(di);
2181                 btrfs_free_path(path);
2182                 btrfs_commit_transaction(trans, root);
2183                 if (ret == -ENOENT)
2184                         return 0;
2185                 return ret;
2186         }
2187
2188         if (!di)
2189                 ret = btrfs_del_item(trans, root, path);
2190         else
2191                 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2192         BUG_ON(ret);
2193         btrfs_free_path(path);
2194         btrfs_commit_transaction(trans, root);
2195         return ret;
2196 }
2197
2198 static int create_inode_item(struct btrfs_root *root,
2199                              struct inode_record *rec,
2200                              struct inode_backref *backref, int root_dir)
2201 {
2202         struct btrfs_trans_handle *trans;
2203         struct btrfs_inode_item inode_item;
2204         time_t now = time(NULL);
2205         int ret;
2206
2207         trans = btrfs_start_transaction(root, 1);
2208         if (IS_ERR(trans)) {
2209                 ret = PTR_ERR(trans);
2210                 return ret;
2211         }
2212
2213         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2214                 "be incomplete, please check permissions and content after "
2215                 "the fsck completes.\n", (unsigned long long)root->objectid,
2216                 (unsigned long long)rec->ino);
2217
2218         memset(&inode_item, 0, sizeof(inode_item));
2219         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2220         if (root_dir)
2221                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2222         else
2223                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2224         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2225         if (rec->found_dir_item) {
2226                 if (rec->found_file_extent)
2227                         fprintf(stderr, "root %llu inode %llu has both a dir "
2228                                 "item and extents, unsure if it is a dir or a "
2229                                 "regular file so setting it as a directory\n",
2230                                 (unsigned long long)root->objectid,
2231                                 (unsigned long long)rec->ino);
2232                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2233                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2234         } else if (!rec->found_dir_item) {
2235                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2236                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2237         }
2238         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2239         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2240         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2241         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2242         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2243         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2244         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2245         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2246
2247         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2248         BUG_ON(ret);
2249         btrfs_commit_transaction(trans, root);
2250         return 0;
2251 }
2252
2253 static int repair_inode_backrefs(struct btrfs_root *root,
2254                                  struct inode_record *rec,
2255                                  struct cache_tree *inode_cache,
2256                                  int delete)
2257 {
2258         struct inode_backref *tmp, *backref;
2259         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2260         int ret = 0;
2261         int repaired = 0;
2262
2263         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2264                 if (!delete && rec->ino == root_dirid) {
2265                         if (!rec->found_inode_item) {
2266                                 ret = create_inode_item(root, rec, backref, 1);
2267                                 if (ret)
2268                                         break;
2269                                 repaired++;
2270                         }
2271                 }
2272
2273                 /* Index 0 for root dir's are special, don't mess with it */
2274                 if (rec->ino == root_dirid && backref->index == 0)
2275                         continue;
2276
2277                 if (delete &&
2278                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2279                      (backref->found_dir_index && backref->found_inode_ref &&
2280                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2281                         ret = delete_dir_index(root, inode_cache, rec, backref);
2282                         if (ret)
2283                                 break;
2284                         repaired++;
2285                         list_del(&backref->list);
2286                         free(backref);
2287                 }
2288
2289                 if (!delete && !backref->found_dir_index &&
2290                     backref->found_dir_item && backref->found_inode_ref) {
2291                         ret = add_missing_dir_index(root, inode_cache, rec,
2292                                                     backref);
2293                         if (ret)
2294                                 break;
2295                         repaired++;
2296                         if (backref->found_dir_item &&
2297                             backref->found_dir_index &&
2298                             backref->found_dir_index) {
2299                                 if (!backref->errors &&
2300                                     backref->found_inode_ref) {
2301                                         list_del(&backref->list);
2302                                         free(backref);
2303                                 }
2304                         }
2305                 }
2306
2307                 if (!delete && (!backref->found_dir_index &&
2308                                 !backref->found_dir_item &&
2309                                 backref->found_inode_ref)) {
2310                         struct btrfs_trans_handle *trans;
2311                         struct btrfs_key location;
2312
2313                         ret = check_dir_conflict(root, backref->name,
2314                                                  backref->namelen,
2315                                                  backref->dir,
2316                                                  backref->index);
2317                         if (ret) {
2318                                 /*
2319                                  * let nlink fixing routine to handle it,
2320                                  * which can do it better.
2321                                  */
2322                                 ret = 0;
2323                                 break;
2324                         }
2325                         location.objectid = rec->ino;
2326                         location.type = BTRFS_INODE_ITEM_KEY;
2327                         location.offset = 0;
2328
2329                         trans = btrfs_start_transaction(root, 1);
2330                         if (IS_ERR(trans)) {
2331                                 ret = PTR_ERR(trans);
2332                                 break;
2333                         }
2334                         fprintf(stderr, "adding missing dir index/item pair "
2335                                 "for inode %llu\n",
2336                                 (unsigned long long)rec->ino);
2337                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2338                                                     backref->namelen,
2339                                                     backref->dir, &location,
2340                                                     imode_to_type(rec->imode),
2341                                                     backref->index);
2342                         BUG_ON(ret);
2343                         btrfs_commit_transaction(trans, root);
2344                         repaired++;
2345                 }
2346
2347                 if (!delete && (backref->found_inode_ref &&
2348                                 backref->found_dir_index &&
2349                                 backref->found_dir_item &&
2350                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2351                                 !rec->found_inode_item)) {
2352                         ret = create_inode_item(root, rec, backref, 0);
2353                         if (ret)
2354                                 break;
2355                         repaired++;
2356                 }
2357
2358         }
2359         return ret ? ret : repaired;
2360 }
2361
2362 /*
2363  * To determine the file type for nlink/inode_item repair
2364  *
2365  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2366  * Return -ENOENT if file type is not found.
2367  */
2368 static int find_file_type(struct inode_record *rec, u8 *type)
2369 {
2370         struct inode_backref *backref;
2371
2372         /* For inode item recovered case */
2373         if (rec->found_inode_item) {
2374                 *type = imode_to_type(rec->imode);
2375                 return 0;
2376         }
2377
2378         list_for_each_entry(backref, &rec->backrefs, list) {
2379                 if (backref->found_dir_index || backref->found_dir_item) {
2380                         *type = backref->filetype;
2381                         return 0;
2382                 }
2383         }
2384         return -ENOENT;
2385 }
2386
2387 /*
2388  * To determine the file name for nlink repair
2389  *
2390  * Return 0 if file name is found, set name and namelen.
2391  * Return -ENOENT if file name is not found.
2392  */
2393 static int find_file_name(struct inode_record *rec,
2394                           char *name, int *namelen)
2395 {
2396         struct inode_backref *backref;
2397
2398         list_for_each_entry(backref, &rec->backrefs, list) {
2399                 if (backref->found_dir_index || backref->found_dir_item ||
2400                     backref->found_inode_ref) {
2401                         memcpy(name, backref->name, backref->namelen);
2402                         *namelen = backref->namelen;
2403                         return 0;
2404                 }
2405         }
2406         return -ENOENT;
2407 }
2408
2409 /* Reset the nlink of the inode to the correct one */
2410 static int reset_nlink(struct btrfs_trans_handle *trans,
2411                        struct btrfs_root *root,
2412                        struct btrfs_path *path,
2413                        struct inode_record *rec)
2414 {
2415         struct inode_backref *backref;
2416         struct inode_backref *tmp;
2417         struct btrfs_key key;
2418         struct btrfs_inode_item *inode_item;
2419         int ret = 0;
2420
2421         /* We don't believe this either, reset it and iterate backref */
2422         rec->found_link = 0;
2423
2424         /* Remove all backref including the valid ones */
2425         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2426                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2427                                    backref->index, backref->name,
2428                                    backref->namelen, 0);
2429                 if (ret < 0)
2430                         goto out;
2431
2432                 /* remove invalid backref, so it won't be added back */
2433                 if (!(backref->found_dir_index &&
2434                       backref->found_dir_item &&
2435                       backref->found_inode_ref)) {
2436                         list_del(&backref->list);
2437                         free(backref);
2438                 } else {
2439                         rec->found_link++;
2440                 }
2441         }
2442
2443         /* Set nlink to 0 */
2444         key.objectid = rec->ino;
2445         key.type = BTRFS_INODE_ITEM_KEY;
2446         key.offset = 0;
2447         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2448         if (ret < 0)
2449                 goto out;
2450         if (ret > 0) {
2451                 ret = -ENOENT;
2452                 goto out;
2453         }
2454         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2455                                     struct btrfs_inode_item);
2456         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2457         btrfs_mark_buffer_dirty(path->nodes[0]);
2458         btrfs_release_path(path);
2459
2460         /*
2461          * Add back valid inode_ref/dir_item/dir_index,
2462          * add_link() will handle the nlink inc, so new nlink must be correct
2463          */
2464         list_for_each_entry(backref, &rec->backrefs, list) {
2465                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2466                                      backref->name, backref->namelen,
2467                                      backref->filetype, &backref->index, 1);
2468                 if (ret < 0)
2469                         goto out;
2470         }
2471 out:
2472         btrfs_release_path(path);
2473         return ret;
2474 }
2475
2476 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2477                                struct btrfs_root *root,
2478                                struct btrfs_path *path,
2479                                struct inode_record *rec)
2480 {
2481         char *dir_name = "lost+found";
2482         char namebuf[BTRFS_NAME_LEN] = {0};
2483         u64 lost_found_ino;
2484         u32 mode = 0700;
2485         u8 type = 0;
2486         int namelen = 0;
2487         int name_recovered = 0;
2488         int type_recovered = 0;
2489         int ret = 0;
2490
2491         /*
2492          * Get file name and type first before these invalid inode ref
2493          * are deleted by remove_all_invalid_backref()
2494          */
2495         name_recovered = !find_file_name(rec, namebuf, &namelen);
2496         type_recovered = !find_file_type(rec, &type);
2497
2498         if (!name_recovered) {
2499                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2500                        rec->ino, rec->ino);
2501                 namelen = count_digits(rec->ino);
2502                 sprintf(namebuf, "%llu", rec->ino);
2503                 name_recovered = 1;
2504         }
2505         if (!type_recovered) {
2506                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2507                        rec->ino);
2508                 type = BTRFS_FT_REG_FILE;
2509                 type_recovered = 1;
2510         }
2511
2512         ret = reset_nlink(trans, root, path, rec);
2513         if (ret < 0) {
2514                 fprintf(stderr,
2515                         "Failed to reset nlink for inode %llu: %s\n",
2516                         rec->ino, strerror(-ret));
2517                 goto out;
2518         }
2519
2520         if (rec->found_link == 0) {
2521                 lost_found_ino = root->highest_inode;
2522                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2523                         ret = -EOVERFLOW;
2524                         goto out;
2525                 }
2526                 lost_found_ino++;
2527                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2528                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2529                                   mode);
2530                 if (ret < 0) {
2531                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2532                                 dir_name, strerror(-ret));
2533                         goto out;
2534                 }
2535                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2536                                      namebuf, namelen, type, NULL, 1);
2537                 /*
2538                  * Add ".INO" suffix several times to handle case where
2539                  * "FILENAME.INO" is already taken by another file.
2540                  */
2541                 while (ret == -EEXIST) {
2542                         /*
2543                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2544                          */
2545                         if (namelen + count_digits(rec->ino) + 1 >
2546                             BTRFS_NAME_LEN) {
2547                                 ret = -EFBIG;
2548                                 goto out;
2549                         }
2550                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2551                                  ".%llu", rec->ino);
2552                         namelen += count_digits(rec->ino) + 1;
2553                         ret = btrfs_add_link(trans, root, rec->ino,
2554                                              lost_found_ino, namebuf,
2555                                              namelen, type, NULL, 1);
2556                 }
2557                 if (ret < 0) {
2558                         fprintf(stderr,
2559                                 "Failed to link the inode %llu to %s dir: %s\n",
2560                                 rec->ino, dir_name, strerror(-ret));
2561                         goto out;
2562                 }
2563                 /*
2564                  * Just increase the found_link, don't actually add the
2565                  * backref. This will make things easier and this inode
2566                  * record will be freed after the repair is done.
2567                  * So fsck will not report problem about this inode.
2568                  */
2569                 rec->found_link++;
2570                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2571                        namelen, namebuf, dir_name);
2572         }
2573         printf("Fixed the nlink of inode %llu\n", rec->ino);
2574 out:
2575         /*
2576          * Clear the flag anyway, or we will loop forever for the same inode
2577          * as it will not be removed from the bad inode list and the dead loop
2578          * happens.
2579          */
2580         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2581         btrfs_release_path(path);
2582         return ret;
2583 }
2584
2585 /*
2586  * Check if there is any normal(reg or prealloc) file extent for given
2587  * ino.
2588  * This is used to determine the file type when neither its dir_index/item or
2589  * inode_item exists.
2590  *
2591  * This will *NOT* report error, if any error happens, just consider it does
2592  * not have any normal file extent.
2593  */
2594 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2595 {
2596         struct btrfs_path *path;
2597         struct btrfs_key key;
2598         struct btrfs_key found_key;
2599         struct btrfs_file_extent_item *fi;
2600         u8 type;
2601         int ret = 0;
2602
2603         path = btrfs_alloc_path();
2604         if (!path)
2605                 goto out;
2606         key.objectid = ino;
2607         key.type = BTRFS_EXTENT_DATA_KEY;
2608         key.offset = 0;
2609
2610         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2611         if (ret < 0) {
2612                 ret = 0;
2613                 goto out;
2614         }
2615         if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2616                 ret = btrfs_next_leaf(root, path);
2617                 if (ret) {
2618                         ret = 0;
2619                         goto out;
2620                 }
2621         }
2622         while (1) {
2623                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2624                                       path->slots[0]);
2625                 if (found_key.objectid != ino ||
2626                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2627                         break;
2628                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2629                                     struct btrfs_file_extent_item);
2630                 type = btrfs_file_extent_type(path->nodes[0], fi);
2631                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2632                         ret = 1;
2633                         goto out;
2634                 }
2635         }
2636 out:
2637         btrfs_free_path(path);
2638         return ret;
2639 }
2640
2641 static u32 btrfs_type_to_imode(u8 type)
2642 {
2643         static u32 imode_by_btrfs_type[] = {
2644                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2645                 [BTRFS_FT_DIR]          = S_IFDIR,
2646                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2647                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2648                 [BTRFS_FT_FIFO]         = S_IFIFO,
2649                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2650                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2651         };
2652
2653         return imode_by_btrfs_type[(type)];
2654 }
2655
2656 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2657                                 struct btrfs_root *root,
2658                                 struct btrfs_path *path,
2659                                 struct inode_record *rec)
2660 {
2661         u8 filetype;
2662         u32 mode = 0700;
2663         int type_recovered = 0;
2664         int ret = 0;
2665
2666         printf("Trying to rebuild inode:%llu\n", rec->ino);
2667
2668         type_recovered = !find_file_type(rec, &filetype);
2669
2670         /*
2671          * Try to determine inode type if type not found.
2672          *
2673          * For found regular file extent, it must be FILE.
2674          * For found dir_item/index, it must be DIR.
2675          *
2676          * For undetermined one, use FILE as fallback.
2677          *
2678          * TODO:
2679          * 1. If found backref(inode_index/item is already handled) to it,
2680          *    it must be DIR.
2681          *    Need new inode-inode ref structure to allow search for that.
2682          */
2683         if (!type_recovered) {
2684                 if (rec->found_file_extent &&
2685                     find_normal_file_extent(root, rec->ino)) {
2686                         type_recovered = 1;
2687                         filetype = BTRFS_FT_REG_FILE;
2688                 } else if (rec->found_dir_item) {
2689                         type_recovered = 1;
2690                         filetype = BTRFS_FT_DIR;
2691                 } else if (!list_empty(&rec->orphan_extents)) {
2692                         type_recovered = 1;
2693                         filetype = BTRFS_FT_REG_FILE;
2694                 } else{
2695                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2696                                rec->ino);
2697                         type_recovered = 1;
2698                         filetype = BTRFS_FT_REG_FILE;
2699                 }
2700         }
2701
2702         ret = btrfs_new_inode(trans, root, rec->ino,
2703                               mode | btrfs_type_to_imode(filetype));
2704         if (ret < 0)
2705                 goto out;
2706
2707         /*
2708          * Here inode rebuild is done, we only rebuild the inode item,
2709          * don't repair the nlink(like move to lost+found).
2710          * That is the job of nlink repair.
2711          *
2712          * We just fill the record and return
2713          */
2714         rec->found_dir_item = 1;
2715         rec->imode = mode | btrfs_type_to_imode(filetype);
2716         rec->nlink = 0;
2717         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2718         /* Ensure the inode_nlinks repair function will be called */
2719         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2720 out:
2721         return ret;
2722 }
2723
2724 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2725                                       struct btrfs_root *root,
2726                                       struct btrfs_path *path,
2727                                       struct inode_record *rec)
2728 {
2729         struct orphan_data_extent *orphan;
2730         struct orphan_data_extent *tmp;
2731         int ret = 0;
2732
2733         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2734                 /*
2735                  * Check for conflicting file extents
2736                  *
2737                  * Here we don't know whether the extents is compressed or not,
2738                  * so we can only assume it not compressed nor data offset,
2739                  * and use its disk_len as extent length.
2740                  */
2741                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2742                                        orphan->offset, orphan->disk_len, 0);
2743                 btrfs_release_path(path);
2744                 if (ret < 0)
2745                         goto out;
2746                 if (!ret) {
2747                         fprintf(stderr,
2748                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2749                                 orphan->disk_bytenr, orphan->disk_len);
2750                         ret = btrfs_free_extent(trans,
2751                                         root->fs_info->extent_root,
2752                                         orphan->disk_bytenr, orphan->disk_len,
2753                                         0, root->objectid, orphan->objectid,
2754                                         orphan->offset);
2755                         if (ret < 0)
2756                                 goto out;
2757                 }
2758                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2759                                 orphan->offset, orphan->disk_bytenr,
2760                                 orphan->disk_len, orphan->disk_len);
2761                 if (ret < 0)
2762                         goto out;
2763
2764                 /* Update file size info */
2765                 rec->found_size += orphan->disk_len;
2766                 if (rec->found_size == rec->nbytes)
2767                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2768
2769                 /* Update the file extent hole info too */
2770                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2771                                            orphan->disk_len);
2772                 if (ret < 0)
2773                         goto out;
2774                 if (RB_EMPTY_ROOT(&rec->holes))
2775                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2776
2777                 list_del(&orphan->list);
2778                 free(orphan);
2779         }
2780         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2781 out:
2782         return ret;
2783 }
2784
2785 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2786                                         struct btrfs_root *root,
2787                                         struct btrfs_path *path,
2788                                         struct inode_record *rec)
2789 {
2790         struct rb_node *node;
2791         struct file_extent_hole *hole;
2792         int found = 0;
2793         int ret = 0;
2794
2795         node = rb_first(&rec->holes);
2796
2797         while (node) {
2798                 found = 1;
2799                 hole = rb_entry(node, struct file_extent_hole, node);
2800                 ret = btrfs_punch_hole(trans, root, rec->ino,
2801                                        hole->start, hole->len);
2802                 if (ret < 0)
2803                         goto out;
2804                 ret = del_file_extent_hole(&rec->holes, hole->start,
2805                                            hole->len);
2806                 if (ret < 0)
2807                         goto out;
2808                 if (RB_EMPTY_ROOT(&rec->holes))
2809                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2810                 node = rb_first(&rec->holes);
2811         }
2812         /* special case for a file losing all its file extent */
2813         if (!found) {
2814                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2815                                        round_up(rec->isize, root->sectorsize));
2816                 if (ret < 0)
2817                         goto out;
2818         }
2819         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2820                rec->ino, root->objectid);
2821 out:
2822         return ret;
2823 }
2824
2825 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2826 {
2827         struct btrfs_trans_handle *trans;
2828         struct btrfs_path *path;
2829         int ret = 0;
2830
2831         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2832                              I_ERR_NO_ORPHAN_ITEM |
2833                              I_ERR_LINK_COUNT_WRONG |
2834                              I_ERR_NO_INODE_ITEM |
2835                              I_ERR_FILE_EXTENT_ORPHAN |
2836                              I_ERR_FILE_EXTENT_DISCOUNT|
2837                              I_ERR_FILE_NBYTES_WRONG)))
2838                 return rec->errors;
2839
2840         path = btrfs_alloc_path();
2841         if (!path)
2842                 return -ENOMEM;
2843
2844         /*
2845          * For nlink repair, it may create a dir and add link, so
2846          * 2 for parent(256)'s dir_index and dir_item
2847          * 2 for lost+found dir's inode_item and inode_ref
2848          * 1 for the new inode_ref of the file
2849          * 2 for lost+found dir's dir_index and dir_item for the file
2850          */
2851         trans = btrfs_start_transaction(root, 7);
2852         if (IS_ERR(trans)) {
2853                 btrfs_free_path(path);
2854                 return PTR_ERR(trans);
2855         }
2856
2857         if (rec->errors & I_ERR_NO_INODE_ITEM)
2858                 ret = repair_inode_no_item(trans, root, path, rec);
2859         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2860                 ret = repair_inode_orphan_extent(trans, root, path, rec);
2861         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2862                 ret = repair_inode_discount_extent(trans, root, path, rec);
2863         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2864                 ret = repair_inode_isize(trans, root, path, rec);
2865         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2866                 ret = repair_inode_orphan_item(trans, root, path, rec);
2867         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2868                 ret = repair_inode_nlinks(trans, root, path, rec);
2869         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2870                 ret = repair_inode_nbytes(trans, root, path, rec);
2871         btrfs_commit_transaction(trans, root);
2872         btrfs_free_path(path);
2873         return ret;
2874 }
2875
2876 static int check_inode_recs(struct btrfs_root *root,
2877                             struct cache_tree *inode_cache)
2878 {
2879         struct cache_extent *cache;
2880         struct ptr_node *node;
2881         struct inode_record *rec;
2882         struct inode_backref *backref;
2883         int stage = 0;
2884         int ret = 0;
2885         int err = 0;
2886         u64 error = 0;
2887         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2888
2889         if (btrfs_root_refs(&root->root_item) == 0) {
2890                 if (!cache_tree_empty(inode_cache))
2891                         fprintf(stderr, "warning line %d\n", __LINE__);
2892                 return 0;
2893         }
2894
2895         /*
2896          * We need to record the highest inode number for later 'lost+found'
2897          * dir creation.
2898          * We must select an ino not used/referred by any existing inode, or
2899          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2900          * this may cause 'lost+found' dir has wrong nlinks.
2901          */
2902         cache = last_cache_extent(inode_cache);
2903         if (cache) {
2904                 node = container_of(cache, struct ptr_node, cache);
2905                 rec = node->data;
2906                 if (rec->ino > root->highest_inode)
2907                         root->highest_inode = rec->ino;
2908         }
2909
2910         /*
2911          * We need to repair backrefs first because we could change some of the
2912          * errors in the inode recs.
2913          *
2914          * We also need to go through and delete invalid backrefs first and then
2915          * add the correct ones second.  We do this because we may get EEXIST
2916          * when adding back the correct index because we hadn't yet deleted the
2917          * invalid index.
2918          *
2919          * For example, if we were missing a dir index then the directories
2920          * isize would be wrong, so if we fixed the isize to what we thought it
2921          * would be and then fixed the backref we'd still have a invalid fs, so
2922          * we need to add back the dir index and then check to see if the isize
2923          * is still wrong.
2924          */
2925         while (stage < 3) {
2926                 stage++;
2927                 if (stage == 3 && !err)
2928                         break;
2929
2930                 cache = search_cache_extent(inode_cache, 0);
2931                 while (repair && cache) {
2932                         node = container_of(cache, struct ptr_node, cache);
2933                         rec = node->data;
2934                         cache = next_cache_extent(cache);
2935
2936                         /* Need to free everything up and rescan */
2937                         if (stage == 3) {
2938                                 remove_cache_extent(inode_cache, &node->cache);
2939                                 free(node);
2940                                 free_inode_rec(rec);
2941                                 continue;
2942                         }
2943
2944                         if (list_empty(&rec->backrefs))
2945                                 continue;
2946
2947                         ret = repair_inode_backrefs(root, rec, inode_cache,
2948                                                     stage == 1);
2949                         if (ret < 0) {
2950                                 err = ret;
2951                                 stage = 2;
2952                                 break;
2953                         } if (ret > 0) {
2954                                 err = -EAGAIN;
2955                         }
2956                 }
2957         }
2958         if (err)
2959                 return err;
2960
2961         rec = get_inode_rec(inode_cache, root_dirid, 0);
2962         BUG_ON(IS_ERR(rec));
2963         if (rec) {
2964                 ret = check_root_dir(rec);
2965                 if (ret) {
2966                         fprintf(stderr, "root %llu root dir %llu error\n",
2967                                 (unsigned long long)root->root_key.objectid,
2968                                 (unsigned long long)root_dirid);
2969                         print_inode_error(root, rec);
2970                         error++;
2971                 }
2972         } else {
2973                 if (repair) {
2974                         struct btrfs_trans_handle *trans;
2975
2976                         trans = btrfs_start_transaction(root, 1);
2977                         if (IS_ERR(trans)) {
2978                                 err = PTR_ERR(trans);
2979                                 return err;
2980                         }
2981
2982                         fprintf(stderr,
2983                                 "root %llu missing its root dir, recreating\n",
2984                                 (unsigned long long)root->objectid);
2985
2986                         ret = btrfs_make_root_dir(trans, root, root_dirid);
2987                         BUG_ON(ret);
2988
2989                         btrfs_commit_transaction(trans, root);
2990                         return -EAGAIN;
2991                 }
2992
2993                 fprintf(stderr, "root %llu root dir %llu not found\n",
2994                         (unsigned long long)root->root_key.objectid,
2995                         (unsigned long long)root_dirid);
2996         }
2997
2998         while (1) {
2999                 cache = search_cache_extent(inode_cache, 0);
3000                 if (!cache)
3001                         break;
3002                 node = container_of(cache, struct ptr_node, cache);
3003                 rec = node->data;
3004                 remove_cache_extent(inode_cache, &node->cache);
3005                 free(node);
3006                 if (rec->ino == root_dirid ||
3007                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3008                         free_inode_rec(rec);
3009                         continue;
3010                 }
3011
3012                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3013                         ret = check_orphan_item(root, rec->ino);
3014                         if (ret == 0)
3015                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3016                         if (can_free_inode_rec(rec)) {
3017                                 free_inode_rec(rec);
3018                                 continue;
3019                         }
3020                 }
3021
3022                 if (!rec->found_inode_item)
3023                         rec->errors |= I_ERR_NO_INODE_ITEM;
3024                 if (rec->found_link != rec->nlink)
3025                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3026                 if (repair) {
3027                         ret = try_repair_inode(root, rec);
3028                         if (ret == 0 && can_free_inode_rec(rec)) {
3029                                 free_inode_rec(rec);
3030                                 continue;
3031                         }
3032                         ret = 0;
3033                 }
3034
3035                 if (!(repair && ret == 0))
3036                         error++;
3037                 print_inode_error(root, rec);
3038                 list_for_each_entry(backref, &rec->backrefs, list) {
3039                         if (!backref->found_dir_item)
3040                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3041                         if (!backref->found_dir_index)
3042                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3043                         if (!backref->found_inode_ref)
3044                                 backref->errors |= REF_ERR_NO_INODE_REF;
3045                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3046                                 " namelen %u name %s filetype %d errors %x",
3047                                 (unsigned long long)backref->dir,
3048                                 (unsigned long long)backref->index,
3049                                 backref->namelen, backref->name,
3050                                 backref->filetype, backref->errors);
3051                         print_ref_error(backref->errors);
3052                 }
3053                 free_inode_rec(rec);
3054         }
3055         return (error > 0) ? -1 : 0;
3056 }
3057
3058 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3059                                         u64 objectid)
3060 {
3061         struct cache_extent *cache;
3062         struct root_record *rec = NULL;
3063         int ret;
3064
3065         cache = lookup_cache_extent(root_cache, objectid, 1);
3066         if (cache) {
3067                 rec = container_of(cache, struct root_record, cache);
3068         } else {
3069                 rec = calloc(1, sizeof(*rec));
3070                 if (!rec)
3071                         return ERR_PTR(-ENOMEM);
3072                 rec->objectid = objectid;
3073                 INIT_LIST_HEAD(&rec->backrefs);
3074                 rec->cache.start = objectid;
3075                 rec->cache.size = 1;
3076
3077                 ret = insert_cache_extent(root_cache, &rec->cache);
3078                 if (ret)
3079                         return ERR_PTR(-EEXIST);
3080         }
3081         return rec;
3082 }
3083
3084 static struct root_backref *get_root_backref(struct root_record *rec,
3085                                              u64 ref_root, u64 dir, u64 index,
3086                                              const char *name, int namelen)
3087 {
3088         struct root_backref *backref;
3089
3090         list_for_each_entry(backref, &rec->backrefs, list) {
3091                 if (backref->ref_root != ref_root || backref->dir != dir ||
3092                     backref->namelen != namelen)
3093                         continue;
3094                 if (memcmp(name, backref->name, namelen))
3095                         continue;
3096                 return backref;
3097         }
3098
3099         backref = calloc(1, sizeof(*backref) + namelen + 1);
3100         if (!backref)
3101                 return NULL;
3102         backref->ref_root = ref_root;
3103         backref->dir = dir;
3104         backref->index = index;
3105         backref->namelen = namelen;
3106         memcpy(backref->name, name, namelen);
3107         backref->name[namelen] = '\0';
3108         list_add_tail(&backref->list, &rec->backrefs);
3109         return backref;
3110 }
3111
3112 static void free_root_record(struct cache_extent *cache)
3113 {
3114         struct root_record *rec;
3115         struct root_backref *backref;
3116
3117         rec = container_of(cache, struct root_record, cache);
3118         while (!list_empty(&rec->backrefs)) {
3119                 backref = list_entry(rec->backrefs.next,
3120                                      struct root_backref, list);
3121                 list_del(&backref->list);
3122                 free(backref);
3123         }
3124
3125         kfree(rec);
3126 }
3127
3128 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3129
3130 static int add_root_backref(struct cache_tree *root_cache,
3131                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3132                             const char *name, int namelen,
3133                             int item_type, int errors)
3134 {
3135         struct root_record *rec;
3136         struct root_backref *backref;
3137
3138         rec = get_root_rec(root_cache, root_id);
3139         BUG_ON(IS_ERR(rec));
3140         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3141         BUG_ON(!backref);
3142
3143         backref->errors |= errors;
3144
3145         if (item_type != BTRFS_DIR_ITEM_KEY) {
3146                 if (backref->found_dir_index || backref->found_back_ref ||
3147                     backref->found_forward_ref) {
3148                         if (backref->index != index)
3149                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3150                 } else {
3151                         backref->index = index;
3152                 }
3153         }
3154
3155         if (item_type == BTRFS_DIR_ITEM_KEY) {
3156                 if (backref->found_forward_ref)
3157                         rec->found_ref++;
3158                 backref->found_dir_item = 1;
3159         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3160                 backref->found_dir_index = 1;
3161         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3162                 if (backref->found_forward_ref)
3163                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3164                 else if (backref->found_dir_item)
3165                         rec->found_ref++;
3166                 backref->found_forward_ref = 1;
3167         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3168                 if (backref->found_back_ref)
3169                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3170                 backref->found_back_ref = 1;
3171         } else {
3172                 BUG_ON(1);
3173         }
3174
3175         if (backref->found_forward_ref && backref->found_dir_item)
3176                 backref->reachable = 1;
3177         return 0;
3178 }
3179
3180 static int merge_root_recs(struct btrfs_root *root,
3181                            struct cache_tree *src_cache,
3182                            struct cache_tree *dst_cache)
3183 {
3184         struct cache_extent *cache;
3185         struct ptr_node *node;
3186         struct inode_record *rec;
3187         struct inode_backref *backref;
3188         int ret = 0;
3189
3190         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3191                 free_inode_recs_tree(src_cache);
3192                 return 0;
3193         }
3194
3195         while (1) {
3196                 cache = search_cache_extent(src_cache, 0);
3197                 if (!cache)
3198                         break;
3199                 node = container_of(cache, struct ptr_node, cache);
3200                 rec = node->data;
3201                 remove_cache_extent(src_cache, &node->cache);
3202                 free(node);
3203
3204                 ret = is_child_root(root, root->objectid, rec->ino);
3205                 if (ret < 0)
3206                         break;
3207                 else if (ret == 0)
3208                         goto skip;
3209
3210                 list_for_each_entry(backref, &rec->backrefs, list) {
3211                         BUG_ON(backref->found_inode_ref);
3212                         if (backref->found_dir_item)
3213                                 add_root_backref(dst_cache, rec->ino,
3214                                         root->root_key.objectid, backref->dir,
3215                                         backref->index, backref->name,
3216                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3217                                         backref->errors);
3218                         if (backref->found_dir_index)
3219                                 add_root_backref(dst_cache, rec->ino,
3220                                         root->root_key.objectid, backref->dir,
3221                                         backref->index, backref->name,
3222                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3223                                         backref->errors);
3224                 }
3225 skip:
3226                 free_inode_rec(rec);
3227         }
3228         if (ret < 0)
3229                 return ret;
3230         return 0;
3231 }
3232
3233 static int check_root_refs(struct btrfs_root *root,
3234                            struct cache_tree *root_cache)
3235 {
3236         struct root_record *rec;
3237         struct root_record *ref_root;
3238         struct root_backref *backref;
3239         struct cache_extent *cache;
3240         int loop = 1;
3241         int ret;
3242         int error;
3243         int errors = 0;
3244
3245         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3246         BUG_ON(IS_ERR(rec));
3247         rec->found_ref = 1;
3248
3249         /* fixme: this can not detect circular references */
3250         while (loop) {
3251                 loop = 0;
3252                 cache = search_cache_extent(root_cache, 0);
3253                 while (1) {
3254                         if (!cache)
3255                                 break;
3256                         rec = container_of(cache, struct root_record, cache);
3257                         cache = next_cache_extent(cache);
3258
3259                         if (rec->found_ref == 0)
3260                                 continue;
3261
3262                         list_for_each_entry(backref, &rec->backrefs, list) {
3263                                 if (!backref->reachable)
3264                                         continue;
3265
3266                                 ref_root = get_root_rec(root_cache,
3267                                                         backref->ref_root);
3268                                 BUG_ON(IS_ERR(ref_root));
3269                                 if (ref_root->found_ref > 0)
3270                                         continue;
3271
3272                                 backref->reachable = 0;
3273                                 rec->found_ref--;
3274                                 if (rec->found_ref == 0)
3275                                         loop = 1;
3276                         }
3277                 }
3278         }
3279
3280         cache = search_cache_extent(root_cache, 0);
3281         while (1) {
3282                 if (!cache)
3283                         break;
3284                 rec = container_of(cache, struct root_record, cache);
3285                 cache = next_cache_extent(cache);
3286
3287                 if (rec->found_ref == 0 &&
3288                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3289                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3290                         ret = check_orphan_item(root->fs_info->tree_root,
3291                                                 rec->objectid);
3292                         if (ret == 0)
3293                                 continue;
3294
3295                         /*
3296                          * If we don't have a root item then we likely just have
3297                          * a dir item in a snapshot for this root but no actual
3298                          * ref key or anything so it's meaningless.
3299                          */
3300                         if (!rec->found_root_item)
3301                                 continue;
3302                         errors++;
3303                         fprintf(stderr, "fs tree %llu not referenced\n",
3304                                 (unsigned long long)rec->objectid);
3305                 }
3306
3307                 error = 0;
3308                 if (rec->found_ref > 0 && !rec->found_root_item)
3309                         error = 1;
3310                 list_for_each_entry(backref, &rec->backrefs, list) {
3311                         if (!backref->found_dir_item)
3312                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3313                         if (!backref->found_dir_index)
3314                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3315                         if (!backref->found_back_ref)
3316                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3317                         if (!backref->found_forward_ref)
3318                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3319                         if (backref->reachable && backref->errors)
3320                                 error = 1;
3321                 }
3322                 if (!error)
3323                         continue;
3324
3325                 errors++;
3326                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3327                         (unsigned long long)rec->objectid, rec->found_ref,
3328                          rec->found_root_item ? "" : "not found");
3329
3330                 list_for_each_entry(backref, &rec->backrefs, list) {
3331                         if (!backref->reachable)
3332                                 continue;
3333                         if (!backref->errors && rec->found_root_item)
3334                                 continue;
3335                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3336                                 " index %llu namelen %u name %s errors %x\n",
3337                                 (unsigned long long)backref->ref_root,
3338                                 (unsigned long long)backref->dir,
3339                                 (unsigned long long)backref->index,
3340                                 backref->namelen, backref->name,
3341                                 backref->errors);
3342                         print_ref_error(backref->errors);
3343                 }
3344         }
3345         return errors > 0 ? 1 : 0;
3346 }
3347
3348 static int process_root_ref(struct extent_buffer *eb, int slot,
3349                             struct btrfs_key *key,
3350                             struct cache_tree *root_cache)
3351 {
3352         u64 dirid;
3353         u64 index;
3354         u32 len;
3355         u32 name_len;
3356         struct btrfs_root_ref *ref;
3357         char namebuf[BTRFS_NAME_LEN];
3358         int error;
3359
3360         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3361
3362         dirid = btrfs_root_ref_dirid(eb, ref);
3363         index = btrfs_root_ref_sequence(eb, ref);
3364         name_len = btrfs_root_ref_name_len(eb, ref);
3365
3366         if (name_len <= BTRFS_NAME_LEN) {
3367                 len = name_len;
3368                 error = 0;
3369         } else {
3370                 len = BTRFS_NAME_LEN;
3371                 error = REF_ERR_NAME_TOO_LONG;
3372         }
3373         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3374
3375         if (key->type == BTRFS_ROOT_REF_KEY) {
3376                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3377                                  index, namebuf, len, key->type, error);
3378         } else {
3379                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3380                                  index, namebuf, len, key->type, error);
3381         }
3382         return 0;
3383 }
3384
3385 static void free_corrupt_block(struct cache_extent *cache)
3386 {
3387         struct btrfs_corrupt_block *corrupt;
3388
3389         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3390         free(corrupt);
3391 }
3392
3393 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3394
3395 /*
3396  * Repair the btree of the given root.
3397  *
3398  * The fix is to remove the node key in corrupt_blocks cache_tree.
3399  * and rebalance the tree.
3400  * After the fix, the btree should be writeable.
3401  */
3402 static int repair_btree(struct btrfs_root *root,
3403                         struct cache_tree *corrupt_blocks)
3404 {
3405         struct btrfs_trans_handle *trans;
3406         struct btrfs_path *path;
3407         struct btrfs_corrupt_block *corrupt;
3408         struct cache_extent *cache;
3409         struct btrfs_key key;
3410         u64 offset;
3411         int level;
3412         int ret = 0;
3413
3414         if (cache_tree_empty(corrupt_blocks))
3415                 return 0;
3416
3417         path = btrfs_alloc_path();
3418         if (!path)
3419                 return -ENOMEM;
3420
3421         trans = btrfs_start_transaction(root, 1);
3422         if (IS_ERR(trans)) {
3423                 ret = PTR_ERR(trans);
3424                 fprintf(stderr, "Error starting transaction: %s\n",
3425                         strerror(-ret));
3426                 goto out_free_path;
3427         }
3428         cache = first_cache_extent(corrupt_blocks);
3429         while (cache) {
3430                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3431                                        cache);
3432                 level = corrupt->level;
3433                 path->lowest_level = level;
3434                 key.objectid = corrupt->key.objectid;
3435                 key.type = corrupt->key.type;
3436                 key.offset = corrupt->key.offset;
3437
3438                 /*
3439                  * Here we don't want to do any tree balance, since it may
3440                  * cause a balance with corrupted brother leaf/node,
3441                  * so ins_len set to 0 here.
3442                  * Balance will be done after all corrupt node/leaf is deleted.
3443                  */
3444                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3445                 if (ret < 0)
3446                         goto out;
3447                 offset = btrfs_node_blockptr(path->nodes[level],
3448                                              path->slots[level]);
3449
3450                 /* Remove the ptr */
3451                 ret = btrfs_del_ptr(trans, root, path, level,
3452                                     path->slots[level]);
3453                 if (ret < 0)
3454                         goto out;
3455                 /*
3456                  * Remove the corresponding extent
3457                  * return value is not concerned.
3458                  */
3459                 btrfs_release_path(path);
3460                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3461                                         0, root->root_key.objectid,
3462                                         level - 1, 0);
3463                 cache = next_cache_extent(cache);
3464         }
3465
3466         /* Balance the btree using btrfs_search_slot() */
3467         cache = first_cache_extent(corrupt_blocks);
3468         while (cache) {
3469                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3470                                        cache);
3471                 memcpy(&key, &corrupt->key, sizeof(key));
3472                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3473                 if (ret < 0)
3474                         goto out;
3475                 /* return will always >0 since it won't find the item */
3476                 ret = 0;
3477                 btrfs_release_path(path);
3478                 cache = next_cache_extent(cache);
3479         }
3480 out:
3481         btrfs_commit_transaction(trans, root);
3482 out_free_path:
3483         btrfs_free_path(path);
3484         return ret;
3485 }
3486
3487 static int check_fs_root(struct btrfs_root *root,
3488                          struct cache_tree *root_cache,
3489                          struct walk_control *wc)
3490 {
3491         int ret = 0;
3492         int err = 0;
3493         int wret;
3494         int level;
3495         struct btrfs_path path;
3496         struct shared_node root_node;
3497         struct root_record *rec;
3498         struct btrfs_root_item *root_item = &root->root_item;
3499         struct cache_tree corrupt_blocks;
3500         struct orphan_data_extent *orphan;
3501         struct orphan_data_extent *tmp;
3502         enum btrfs_tree_block_status status;
3503
3504         /*
3505          * Reuse the corrupt_block cache tree to record corrupted tree block
3506          *
3507          * Unlike the usage in extent tree check, here we do it in a per
3508          * fs/subvol tree base.
3509          */
3510         cache_tree_init(&corrupt_blocks);
3511         root->fs_info->corrupt_blocks = &corrupt_blocks;
3512
3513         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3514                 rec = get_root_rec(root_cache, root->root_key.objectid);
3515                 BUG_ON(IS_ERR(rec));
3516                 if (btrfs_root_refs(root_item) > 0)
3517                         rec->found_root_item = 1;
3518         }
3519
3520         btrfs_init_path(&path);
3521         memset(&root_node, 0, sizeof(root_node));
3522         cache_tree_init(&root_node.root_cache);
3523         cache_tree_init(&root_node.inode_cache);
3524
3525         /* Move the orphan extent record to corresponding inode_record */
3526         list_for_each_entry_safe(orphan, tmp,
3527                                  &root->orphan_data_extents, list) {
3528                 struct inode_record *inode;
3529
3530                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3531                                       1);
3532                 BUG_ON(IS_ERR(inode));
3533                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3534                 list_move(&orphan->list, &inode->orphan_extents);
3535         }
3536
3537         level = btrfs_header_level(root->node);
3538         memset(wc->nodes, 0, sizeof(wc->nodes));
3539         wc->nodes[level] = &root_node;
3540         wc->active_node = level;
3541         wc->root_level = level;
3542
3543         /* We may not have checked the root block, lets do that now */
3544         if (btrfs_is_leaf(root->node))
3545                 status = btrfs_check_leaf(root, NULL, root->node);
3546         else
3547                 status = btrfs_check_node(root, NULL, root->node);
3548         if (status != BTRFS_TREE_BLOCK_CLEAN)
3549                 return -EIO;
3550
3551         if (btrfs_root_refs(root_item) > 0 ||
3552             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3553                 path.nodes[level] = root->node;
3554                 extent_buffer_get(root->node);
3555                 path.slots[level] = 0;
3556         } else {
3557                 struct btrfs_key key;
3558                 struct btrfs_disk_key found_key;
3559
3560                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3561                 level = root_item->drop_level;
3562                 path.lowest_level = level;
3563                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3564                 if (wret < 0)
3565                         goto skip_walking;
3566                 btrfs_node_key(path.nodes[level], &found_key,
3567                                 path.slots[level]);
3568                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3569                                         sizeof(found_key)));
3570         }
3571
3572         while (1) {
3573                 wret = walk_down_tree(root, &path, wc, &level);
3574                 if (wret < 0)
3575                         ret = wret;
3576                 if (wret != 0)
3577                         break;
3578
3579                 wret = walk_up_tree(root, &path, wc, &level);
3580                 if (wret < 0)
3581                         ret = wret;
3582                 if (wret != 0)
3583                         break;
3584         }
3585 skip_walking:
3586         btrfs_release_path(&path);
3587
3588         if (!cache_tree_empty(&corrupt_blocks)) {
3589                 struct cache_extent *cache;
3590                 struct btrfs_corrupt_block *corrupt;
3591
3592                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3593                        root->root_key.objectid);
3594                 cache = first_cache_extent(&corrupt_blocks);
3595                 while (cache) {
3596                         corrupt = container_of(cache,
3597                                                struct btrfs_corrupt_block,
3598                                                cache);
3599                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3600                                cache->start, corrupt->level,
3601                                corrupt->key.objectid, corrupt->key.type,
3602                                corrupt->key.offset);
3603                         cache = next_cache_extent(cache);
3604                 }
3605                 if (repair) {
3606                         printf("Try to repair the btree for root %llu\n",
3607                                root->root_key.objectid);
3608                         ret = repair_btree(root, &corrupt_blocks);
3609                         if (ret < 0)
3610                                 fprintf(stderr, "Failed to repair btree: %s\n",
3611                                         strerror(-ret));
3612                         if (!ret)
3613                                 printf("Btree for root %llu is fixed\n",
3614                                        root->root_key.objectid);
3615                 }
3616         }
3617
3618         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3619         if (err < 0)
3620                 ret = err;
3621
3622         if (root_node.current) {
3623                 root_node.current->checked = 1;
3624                 maybe_free_inode_rec(&root_node.inode_cache,
3625                                 root_node.current);
3626         }
3627
3628         err = check_inode_recs(root, &root_node.inode_cache);
3629         if (!ret)
3630                 ret = err;
3631
3632         free_corrupt_blocks_tree(&corrupt_blocks);
3633         root->fs_info->corrupt_blocks = NULL;
3634         free_orphan_data_extents(&root->orphan_data_extents);
3635         return ret;
3636 }
3637
3638 static int fs_root_objectid(u64 objectid)
3639 {
3640         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3641             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3642                 return 1;
3643         return is_fstree(objectid);
3644 }
3645
3646 static int check_fs_roots(struct btrfs_root *root,
3647                           struct cache_tree *root_cache)
3648 {
3649         struct btrfs_path path;
3650         struct btrfs_key key;
3651         struct walk_control wc;
3652         struct extent_buffer *leaf, *tree_node;
3653         struct btrfs_root *tmp_root;
3654         struct btrfs_root *tree_root = root->fs_info->tree_root;
3655         int ret;
3656         int err = 0;
3657
3658         if (ctx.progress_enabled) {
3659                 ctx.tp = TASK_FS_ROOTS;
3660                 task_start(ctx.info);
3661         }
3662
3663         /*
3664          * Just in case we made any changes to the extent tree that weren't
3665          * reflected into the free space cache yet.
3666          */
3667         if (repair)
3668                 reset_cached_block_groups(root->fs_info);
3669         memset(&wc, 0, sizeof(wc));
3670         cache_tree_init(&wc.shared);
3671         btrfs_init_path(&path);
3672
3673 again:
3674         key.offset = 0;
3675         key.objectid = 0;
3676         key.type = BTRFS_ROOT_ITEM_KEY;
3677         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3678         if (ret < 0) {
3679                 err = 1;
3680                 goto out;
3681         }
3682         tree_node = tree_root->node;
3683         while (1) {
3684                 if (tree_node != tree_root->node) {
3685                         free_root_recs_tree(root_cache);
3686                         btrfs_release_path(&path);
3687                         goto again;
3688                 }
3689                 leaf = path.nodes[0];
3690                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3691                         ret = btrfs_next_leaf(tree_root, &path);
3692                         if (ret) {
3693                                 if (ret < 0)
3694                                         err = 1;
3695                                 break;
3696                         }
3697                         leaf = path.nodes[0];
3698                 }
3699                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3700                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3701                     fs_root_objectid(key.objectid)) {
3702                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3703                                 tmp_root = btrfs_read_fs_root_no_cache(
3704                                                 root->fs_info, &key);
3705                         } else {
3706                                 key.offset = (u64)-1;
3707                                 tmp_root = btrfs_read_fs_root(
3708                                                 root->fs_info, &key);
3709                         }
3710                         if (IS_ERR(tmp_root)) {
3711                                 err = 1;
3712                                 goto next;
3713                         }
3714                         ret = check_fs_root(tmp_root, root_cache, &wc);
3715                         if (ret == -EAGAIN) {
3716                                 free_root_recs_tree(root_cache);
3717                                 btrfs_release_path(&path);
3718                                 goto again;
3719                         }
3720                         if (ret)
3721                                 err = 1;
3722                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3723                                 btrfs_free_fs_root(tmp_root);
3724                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3725                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3726                         process_root_ref(leaf, path.slots[0], &key,
3727                                          root_cache);
3728                 }
3729 next:
3730                 path.slots[0]++;
3731         }
3732 out:
3733         btrfs_release_path(&path);
3734         if (err)
3735                 free_extent_cache_tree(&wc.shared);
3736         if (!cache_tree_empty(&wc.shared))
3737                 fprintf(stderr, "warning line %d\n", __LINE__);
3738
3739         task_stop(ctx.info);
3740
3741         return err;
3742 }
3743
3744 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3745 {
3746         struct list_head *cur = rec->backrefs.next;
3747         struct extent_backref *back;
3748         struct tree_backref *tback;
3749         struct data_backref *dback;
3750         u64 found = 0;
3751         int err = 0;
3752
3753         while(cur != &rec->backrefs) {
3754                 back = list_entry(cur, struct extent_backref, list);
3755                 cur = cur->next;
3756                 if (!back->found_extent_tree) {
3757                         err = 1;
3758                         if (!print_errs)
3759                                 goto out;
3760                         if (back->is_data) {
3761                                 dback = (struct data_backref *)back;
3762                                 fprintf(stderr, "Backref %llu %s %llu"
3763                                         " owner %llu offset %llu num_refs %lu"
3764                                         " not found in extent tree\n",
3765                                         (unsigned long long)rec->start,
3766                                         back->full_backref ?
3767                                         "parent" : "root",
3768                                         back->full_backref ?
3769                                         (unsigned long long)dback->parent:
3770                                         (unsigned long long)dback->root,
3771                                         (unsigned long long)dback->owner,
3772                                         (unsigned long long)dback->offset,
3773                                         (unsigned long)dback->num_refs);
3774                         } else {
3775                                 tback = (struct tree_backref *)back;
3776                                 fprintf(stderr, "Backref %llu parent %llu"
3777                                         " root %llu not found in extent tree\n",
3778                                         (unsigned long long)rec->start,
3779                                         (unsigned long long)tback->parent,
3780                                         (unsigned long long)tback->root);
3781                         }
3782                 }
3783                 if (!back->is_data && !back->found_ref) {
3784                         err = 1;
3785                         if (!print_errs)
3786                                 goto out;
3787                         tback = (struct tree_backref *)back;
3788                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3789                                 (unsigned long long)rec->start,
3790                                 back->full_backref ? "parent" : "root",
3791                                 back->full_backref ?
3792                                 (unsigned long long)tback->parent :
3793                                 (unsigned long long)tback->root, back);
3794                 }
3795                 if (back->is_data) {
3796                         dback = (struct data_backref *)back;
3797                         if (dback->found_ref != dback->num_refs) {
3798                                 err = 1;
3799                                 if (!print_errs)
3800                                         goto out;
3801                                 fprintf(stderr, "Incorrect local backref count"
3802                                         " on %llu %s %llu owner %llu"
3803                                         " offset %llu found %u wanted %u back %p\n",
3804                                         (unsigned long long)rec->start,
3805                                         back->full_backref ?
3806                                         "parent" : "root",
3807                                         back->full_backref ?
3808                                         (unsigned long long)dback->parent:
3809                                         (unsigned long long)dback->root,
3810                                         (unsigned long long)dback->owner,
3811                                         (unsigned long long)dback->offset,
3812                                         dback->found_ref, dback->num_refs, back);
3813                         }
3814                         if (dback->disk_bytenr != rec->start) {
3815                                 err = 1;
3816                                 if (!print_errs)
3817                                         goto out;
3818                                 fprintf(stderr, "Backref disk bytenr does not"
3819                                         " match extent record, bytenr=%llu, "
3820                                         "ref bytenr=%llu\n",
3821                                         (unsigned long long)rec->start,
3822                                         (unsigned long long)dback->disk_bytenr);
3823                         }
3824
3825                         if (dback->bytes != rec->nr) {
3826                                 err = 1;
3827                                 if (!print_errs)
3828                                         goto out;
3829                                 fprintf(stderr, "Backref bytes do not match "
3830                                         "extent backref, bytenr=%llu, ref "
3831                                         "bytes=%llu, backref bytes=%llu\n",
3832                                         (unsigned long long)rec->start,
3833                                         (unsigned long long)rec->nr,
3834                                         (unsigned long long)dback->bytes);
3835                         }
3836                 }
3837                 if (!back->is_data) {
3838                         found += 1;
3839                 } else {
3840                         dback = (struct data_backref *)back;
3841                         found += dback->found_ref;
3842                 }
3843         }
3844         if (found != rec->refs) {
3845                 err = 1;
3846                 if (!print_errs)
3847                         goto out;
3848                 fprintf(stderr, "Incorrect global backref count "
3849                         "on %llu found %llu wanted %llu\n",
3850                         (unsigned long long)rec->start,
3851                         (unsigned long long)found,
3852                         (unsigned long long)rec->refs);
3853         }
3854 out:
3855         return err;
3856 }
3857
3858 static int free_all_extent_backrefs(struct extent_record *rec)
3859 {
3860         struct extent_backref *back;
3861         struct list_head *cur;
3862         while (!list_empty(&rec->backrefs)) {
3863                 cur = rec->backrefs.next;
3864                 back = list_entry(cur, struct extent_backref, list);
3865                 list_del(cur);
3866                 free(back);
3867         }
3868         return 0;
3869 }
3870
3871 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3872                                      struct cache_tree *extent_cache)
3873 {
3874         struct cache_extent *cache;
3875         struct extent_record *rec;
3876
3877         while (1) {
3878                 cache = first_cache_extent(extent_cache);
3879                 if (!cache)
3880                         break;
3881                 rec = container_of(cache, struct extent_record, cache);
3882                 remove_cache_extent(extent_cache, cache);
3883                 free_all_extent_backrefs(rec);
3884                 free(rec);
3885         }
3886 }
3887
3888 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
3889                                  struct extent_record *rec)
3890 {
3891         if (rec->content_checked && rec->owner_ref_checked &&
3892             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
3893             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
3894             !rec->bad_full_backref && !rec->crossing_stripes &&
3895             !rec->wrong_chunk_type) {
3896                 remove_cache_extent(extent_cache, &rec->cache);
3897                 free_all_extent_backrefs(rec);
3898                 list_del_init(&rec->list);
3899                 free(rec);
3900         }
3901         return 0;
3902 }
3903
3904 static int check_owner_ref(struct btrfs_root *root,
3905                             struct extent_record *rec,
3906                             struct extent_buffer *buf)
3907 {
3908         struct extent_backref *node;
3909         struct tree_backref *back;
3910         struct btrfs_root *ref_root;
3911         struct btrfs_key key;
3912         struct btrfs_path path;
3913         struct extent_buffer *parent;
3914         int level;
3915         int found = 0;
3916         int ret;
3917
3918         list_for_each_entry(node, &rec->backrefs, list) {
3919                 if (node->is_data)
3920                         continue;
3921                 if (!node->found_ref)
3922                         continue;
3923                 if (node->full_backref)
3924                         continue;
3925                 back = (struct tree_backref *)node;
3926                 if (btrfs_header_owner(buf) == back->root)
3927                         return 0;
3928         }
3929         BUG_ON(rec->is_root);
3930
3931         /* try to find the block by search corresponding fs tree */
3932         key.objectid = btrfs_header_owner(buf);
3933         key.type = BTRFS_ROOT_ITEM_KEY;
3934         key.offset = (u64)-1;
3935
3936         ref_root = btrfs_read_fs_root(root->fs_info, &key);
3937         if (IS_ERR(ref_root))
3938                 return 1;
3939
3940         level = btrfs_header_level(buf);
3941         if (level == 0)
3942                 btrfs_item_key_to_cpu(buf, &key, 0);
3943         else
3944                 btrfs_node_key_to_cpu(buf, &key, 0);
3945
3946         btrfs_init_path(&path);
3947         path.lowest_level = level + 1;
3948         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
3949         if (ret < 0)
3950                 return 0;
3951
3952         parent = path.nodes[level + 1];
3953         if (parent && buf->start == btrfs_node_blockptr(parent,
3954                                                         path.slots[level + 1]))
3955                 found = 1;
3956
3957         btrfs_release_path(&path);
3958         return found ? 0 : 1;
3959 }
3960
3961 static int is_extent_tree_record(struct extent_record *rec)
3962 {
3963         struct list_head *cur = rec->backrefs.next;
3964         struct extent_backref *node;
3965         struct tree_backref *back;
3966         int is_extent = 0;
3967
3968         while(cur != &rec->backrefs) {
3969                 node = list_entry(cur, struct extent_backref, list);
3970                 cur = cur->next;
3971                 if (node->is_data)
3972                         return 0;
3973                 back = (struct tree_backref *)node;
3974                 if (node->full_backref)
3975                         return 0;
3976                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
3977                         is_extent = 1;
3978         }
3979         return is_extent;
3980 }
3981
3982
3983 static int record_bad_block_io(struct btrfs_fs_info *info,
3984                                struct cache_tree *extent_cache,
3985                                u64 start, u64 len)
3986 {
3987         struct extent_record *rec;
3988         struct cache_extent *cache;
3989         struct btrfs_key key;
3990
3991         cache = lookup_cache_extent(extent_cache, start, len);
3992         if (!cache)
3993                 return 0;
3994
3995         rec = container_of(cache, struct extent_record, cache);
3996         if (!is_extent_tree_record(rec))
3997                 return 0;
3998
3999         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4000         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4001 }
4002
4003 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4004                        struct extent_buffer *buf, int slot)
4005 {
4006         if (btrfs_header_level(buf)) {
4007                 struct btrfs_key_ptr ptr1, ptr2;
4008
4009                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4010                                    sizeof(struct btrfs_key_ptr));
4011                 read_extent_buffer(buf, &ptr2,
4012                                    btrfs_node_key_ptr_offset(slot + 1),
4013                                    sizeof(struct btrfs_key_ptr));
4014                 write_extent_buffer(buf, &ptr1,
4015                                     btrfs_node_key_ptr_offset(slot + 1),
4016                                     sizeof(struct btrfs_key_ptr));
4017                 write_extent_buffer(buf, &ptr2,
4018                                     btrfs_node_key_ptr_offset(slot),
4019                                     sizeof(struct btrfs_key_ptr));
4020                 if (slot == 0) {
4021                         struct btrfs_disk_key key;
4022                         btrfs_node_key(buf, &key, 0);
4023                         btrfs_fixup_low_keys(root, path, &key,
4024                                              btrfs_header_level(buf) + 1);
4025                 }
4026         } else {
4027                 struct btrfs_item *item1, *item2;
4028                 struct btrfs_key k1, k2;
4029                 char *item1_data, *item2_data;
4030                 u32 item1_offset, item2_offset, item1_size, item2_size;
4031
4032                 item1 = btrfs_item_nr(slot);
4033                 item2 = btrfs_item_nr(slot + 1);
4034                 btrfs_item_key_to_cpu(buf, &k1, slot);
4035                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4036                 item1_offset = btrfs_item_offset(buf, item1);
4037                 item2_offset = btrfs_item_offset(buf, item2);
4038                 item1_size = btrfs_item_size(buf, item1);
4039                 item2_size = btrfs_item_size(buf, item2);
4040
4041                 item1_data = malloc(item1_size);
4042                 if (!item1_data)
4043                         return -ENOMEM;
4044                 item2_data = malloc(item2_size);
4045                 if (!item2_data) {
4046                         free(item1_data);
4047                         return -ENOMEM;
4048                 }
4049
4050                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4051                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4052
4053                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4054                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4055                 free(item1_data);
4056                 free(item2_data);
4057
4058                 btrfs_set_item_offset(buf, item1, item2_offset);
4059                 btrfs_set_item_offset(buf, item2, item1_offset);
4060                 btrfs_set_item_size(buf, item1, item2_size);
4061                 btrfs_set_item_size(buf, item2, item1_size);
4062
4063                 path->slots[0] = slot;
4064                 btrfs_set_item_key_unsafe(root, path, &k2);
4065                 path->slots[0] = slot + 1;
4066                 btrfs_set_item_key_unsafe(root, path, &k1);
4067         }
4068         return 0;
4069 }
4070
4071 static int fix_key_order(struct btrfs_trans_handle *trans,
4072                          struct btrfs_root *root,
4073                          struct btrfs_path *path)
4074 {
4075         struct extent_buffer *buf;
4076         struct btrfs_key k1, k2;
4077         int i;
4078         int level = path->lowest_level;
4079         int ret = -EIO;
4080
4081         buf = path->nodes[level];
4082         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4083                 if (level) {
4084                         btrfs_node_key_to_cpu(buf, &k1, i);
4085                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
4086                 } else {
4087                         btrfs_item_key_to_cpu(buf, &k1, i);
4088                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
4089                 }
4090                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4091                         continue;
4092                 ret = swap_values(root, path, buf, i);
4093                 if (ret)
4094                         break;
4095                 btrfs_mark_buffer_dirty(buf);
4096                 i = 0;
4097         }
4098         return ret;
4099 }
4100
4101 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4102                              struct btrfs_root *root,
4103                              struct btrfs_path *path,
4104                              struct extent_buffer *buf, int slot)
4105 {
4106         struct btrfs_key key;
4107         int nritems = btrfs_header_nritems(buf);
4108
4109         btrfs_item_key_to_cpu(buf, &key, slot);
4110
4111         /* These are all the keys we can deal with missing. */
4112         if (key.type != BTRFS_DIR_INDEX_KEY &&
4113             key.type != BTRFS_EXTENT_ITEM_KEY &&
4114             key.type != BTRFS_METADATA_ITEM_KEY &&
4115             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4116             key.type != BTRFS_EXTENT_DATA_REF_KEY)
4117                 return -1;
4118
4119         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4120                (unsigned long long)key.objectid, key.type,
4121                (unsigned long long)key.offset, slot, buf->start);
4122         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4123                               btrfs_item_nr_offset(slot + 1),
4124                               sizeof(struct btrfs_item) *
4125                               (nritems - slot - 1));
4126         btrfs_set_header_nritems(buf, nritems - 1);
4127         if (slot == 0) {
4128                 struct btrfs_disk_key disk_key;
4129
4130                 btrfs_item_key(buf, &disk_key, 0);
4131                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4132         }
4133         btrfs_mark_buffer_dirty(buf);
4134         return 0;
4135 }
4136
4137 static int fix_item_offset(struct btrfs_trans_handle *trans,
4138                            struct btrfs_root *root,
4139                            struct btrfs_path *path)
4140 {
4141         struct extent_buffer *buf;
4142         int i;
4143         int ret = 0;
4144
4145         /* We should only get this for leaves */
4146         BUG_ON(path->lowest_level);
4147         buf = path->nodes[0];
4148 again:
4149         for (i = 0; i < btrfs_header_nritems(buf); i++) {
4150                 unsigned int shift = 0, offset;
4151
4152                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4153                     BTRFS_LEAF_DATA_SIZE(root)) {
4154                         if (btrfs_item_end_nr(buf, i) >
4155                             BTRFS_LEAF_DATA_SIZE(root)) {
4156                                 ret = delete_bogus_item(trans, root, path,
4157                                                         buf, i);
4158                                 if (!ret)
4159                                         goto again;
4160                                 fprintf(stderr, "item is off the end of the "
4161                                         "leaf, can't fix\n");
4162                                 ret = -EIO;
4163                                 break;
4164                         }
4165                         shift = BTRFS_LEAF_DATA_SIZE(root) -
4166                                 btrfs_item_end_nr(buf, i);
4167                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4168                            btrfs_item_offset_nr(buf, i - 1)) {
4169                         if (btrfs_item_end_nr(buf, i) >
4170                             btrfs_item_offset_nr(buf, i - 1)) {
4171                                 ret = delete_bogus_item(trans, root, path,
4172                                                         buf, i);
4173                                 if (!ret)
4174                                         goto again;
4175                                 fprintf(stderr, "items overlap, can't fix\n");
4176                                 ret = -EIO;
4177                                 break;
4178                         }
4179                         shift = btrfs_item_offset_nr(buf, i - 1) -
4180                                 btrfs_item_end_nr(buf, i);
4181                 }
4182                 if (!shift)
4183                         continue;
4184
4185                 printf("Shifting item nr %d by %u bytes in block %llu\n",
4186                        i, shift, (unsigned long long)buf->start);
4187                 offset = btrfs_item_offset_nr(buf, i);
4188                 memmove_extent_buffer(buf,
4189                                       btrfs_leaf_data(buf) + offset + shift,
4190                                       btrfs_leaf_data(buf) + offset,
4191                                       btrfs_item_size_nr(buf, i));
4192                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4193                                       offset + shift);
4194                 btrfs_mark_buffer_dirty(buf);
4195         }
4196
4197         /*
4198          * We may have moved things, in which case we want to exit so we don't
4199          * write those changes out.  Once we have proper abort functionality in
4200          * progs this can be changed to something nicer.
4201          */
4202         BUG_ON(ret);
4203         return ret;
4204 }
4205
4206 /*
4207  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
4208  * then just return -EIO.
4209  */
4210 static int try_to_fix_bad_block(struct btrfs_root *root,
4211                                 struct extent_buffer *buf,
4212                                 enum btrfs_tree_block_status status)
4213 {
4214         struct btrfs_trans_handle *trans;
4215         struct ulist *roots;
4216         struct ulist_node *node;
4217         struct btrfs_root *search_root;
4218         struct btrfs_path *path;
4219         struct ulist_iterator iter;
4220         struct btrfs_key root_key, key;
4221         int ret;
4222
4223         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4224             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4225                 return -EIO;
4226
4227         path = btrfs_alloc_path();
4228         if (!path)
4229                 return -EIO;
4230
4231         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4232                                    0, &roots);
4233         if (ret) {
4234                 btrfs_free_path(path);
4235                 return -EIO;
4236         }
4237
4238         ULIST_ITER_INIT(&iter);
4239         while ((node = ulist_next(roots, &iter))) {
4240                 root_key.objectid = node->val;
4241                 root_key.type = BTRFS_ROOT_ITEM_KEY;
4242                 root_key.offset = (u64)-1;
4243
4244                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4245                 if (IS_ERR(root)) {
4246                         ret = -EIO;
4247                         break;
4248                 }
4249
4250
4251                 trans = btrfs_start_transaction(search_root, 0);
4252                 if (IS_ERR(trans)) {
4253                         ret = PTR_ERR(trans);
4254                         break;
4255                 }
4256
4257                 path->lowest_level = btrfs_header_level(buf);
4258                 path->skip_check_block = 1;
4259                 if (path->lowest_level)
4260                         btrfs_node_key_to_cpu(buf, &key, 0);
4261                 else
4262                         btrfs_item_key_to_cpu(buf, &key, 0);
4263                 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4264                 if (ret) {
4265                         ret = -EIO;
4266                         btrfs_commit_transaction(trans, search_root);
4267                         break;
4268                 }
4269                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4270                         ret = fix_key_order(trans, search_root, path);
4271                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4272                         ret = fix_item_offset(trans, search_root, path);
4273                 if (ret) {
4274                         btrfs_commit_transaction(trans, search_root);
4275                         break;
4276                 }
4277                 btrfs_release_path(path);
4278                 btrfs_commit_transaction(trans, search_root);
4279         }
4280         ulist_free(roots);
4281         btrfs_free_path(path);
4282         return ret;
4283 }
4284
4285 static int check_block(struct btrfs_root *root,
4286                        struct cache_tree *extent_cache,
4287                        struct extent_buffer *buf, u64 flags)
4288 {
4289         struct extent_record *rec;
4290         struct cache_extent *cache;
4291         struct btrfs_key key;
4292         enum btrfs_tree_block_status status;
4293         int ret = 0;
4294         int level;
4295
4296         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4297         if (!cache)
4298                 return 1;
4299         rec = container_of(cache, struct extent_record, cache);
4300         rec->generation = btrfs_header_generation(buf);
4301
4302         level = btrfs_header_level(buf);
4303         if (btrfs_header_nritems(buf) > 0) {
4304
4305                 if (level == 0)
4306                         btrfs_item_key_to_cpu(buf, &key, 0);
4307                 else
4308                         btrfs_node_key_to_cpu(buf, &key, 0);
4309
4310                 rec->info_objectid = key.objectid;
4311         }
4312         rec->info_level = level;
4313
4314         if (btrfs_is_leaf(buf))
4315                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4316         else
4317                 status = btrfs_check_node(root, &rec->parent_key, buf);
4318
4319         if (status != BTRFS_TREE_BLOCK_CLEAN) {
4320                 if (repair)
4321                         status = try_to_fix_bad_block(root, buf, status);
4322                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4323                         ret = -EIO;
4324                         fprintf(stderr, "bad block %llu\n",
4325                                 (unsigned long long)buf->start);
4326                 } else {
4327                         /*
4328                          * Signal to callers we need to start the scan over
4329                          * again since we'll have cowed blocks.
4330                          */
4331                         ret = -EAGAIN;
4332                 }
4333         } else {
4334                 rec->content_checked = 1;
4335                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4336                         rec->owner_ref_checked = 1;
4337                 else {
4338                         ret = check_owner_ref(root, rec, buf);
4339                         if (!ret)
4340                                 rec->owner_ref_checked = 1;
4341                 }
4342         }
4343         if (!ret)
4344                 maybe_free_extent_rec(extent_cache, rec);
4345         return ret;
4346 }
4347
4348 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4349                                                 u64 parent, u64 root)
4350 {
4351         struct list_head *cur = rec->backrefs.next;
4352         struct extent_backref *node;
4353         struct tree_backref *back;
4354
4355         while(cur != &rec->backrefs) {
4356                 node = list_entry(cur, struct extent_backref, list);
4357                 cur = cur->next;
4358                 if (node->is_data)
4359                         continue;
4360                 back = (struct tree_backref *)node;
4361                 if (parent > 0) {
4362                         if (!node->full_backref)
4363                                 continue;
4364                         if (parent == back->parent)
4365                                 return back;
4366                 } else {
4367                         if (node->full_backref)
4368                                 continue;
4369                         if (back->root == root)
4370                                 return back;
4371                 }
4372         }
4373         return NULL;
4374 }
4375
4376 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4377                                                 u64 parent, u64 root)
4378 {
4379         struct tree_backref *ref = malloc(sizeof(*ref));
4380
4381         if (!ref)
4382                 return NULL;
4383         memset(&ref->node, 0, sizeof(ref->node));
4384         if (parent > 0) {
4385                 ref->parent = parent;
4386                 ref->node.full_backref = 1;
4387         } else {
4388                 ref->root = root;
4389                 ref->node.full_backref = 0;
4390         }
4391         list_add_tail(&ref->node.list, &rec->backrefs);
4392
4393         return ref;
4394 }
4395
4396 static struct data_backref *find_data_backref(struct extent_record *rec,
4397                                                 u64 parent, u64 root,
4398                                                 u64 owner, u64 offset,
4399                                                 int found_ref,
4400                                                 u64 disk_bytenr, u64 bytes)
4401 {
4402         struct list_head *cur = rec->backrefs.next;
4403         struct extent_backref *node;
4404         struct data_backref *back;
4405
4406         while(cur != &rec->backrefs) {
4407                 node = list_entry(cur, struct extent_backref, list);
4408                 cur = cur->next;
4409                 if (!node->is_data)
4410                         continue;
4411                 back = (struct data_backref *)node;
4412                 if (parent > 0) {
4413                         if (!node->full_backref)
4414                                 continue;
4415                         if (parent == back->parent)
4416                                 return back;
4417                 } else {
4418                         if (node->full_backref)
4419                                 continue;
4420                         if (back->root == root && back->owner == owner &&
4421                             back->offset == offset) {
4422                                 if (found_ref && node->found_ref &&
4423                                     (back->bytes != bytes ||
4424                                     back->disk_bytenr != disk_bytenr))
4425                                         continue;
4426                                 return back;
4427                         }
4428                 }
4429         }
4430         return NULL;
4431 }
4432
4433 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4434                                                 u64 parent, u64 root,
4435                                                 u64 owner, u64 offset,
4436                                                 u64 max_size)
4437 {
4438         struct data_backref *ref = malloc(sizeof(*ref));
4439
4440         if (!ref)
4441                 return NULL;
4442         memset(&ref->node, 0, sizeof(ref->node));
4443         ref->node.is_data = 1;
4444
4445         if (parent > 0) {
4446                 ref->parent = parent;
4447                 ref->owner = 0;
4448                 ref->offset = 0;
4449                 ref->node.full_backref = 1;
4450         } else {
4451                 ref->root = root;
4452                 ref->owner = owner;
4453                 ref->offset = offset;
4454                 ref->node.full_backref = 0;
4455         }
4456         ref->bytes = max_size;
4457         ref->found_ref = 0;
4458         ref->num_refs = 0;
4459         list_add_tail(&ref->node.list, &rec->backrefs);
4460         if (max_size > rec->max_size)
4461                 rec->max_size = max_size;
4462         return ref;
4463 }
4464
4465 /* Check if the type of extent matches with its chunk */
4466 static void check_extent_type(struct extent_record *rec)
4467 {
4468         struct btrfs_block_group_cache *bg_cache;
4469
4470         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4471         if (!bg_cache)
4472                 return;
4473
4474         /* data extent, check chunk directly*/
4475         if (!rec->metadata) {
4476                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4477                         rec->wrong_chunk_type = 1;
4478                 return;
4479         }
4480
4481         /* metadata extent, check the obvious case first */
4482         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4483                                  BTRFS_BLOCK_GROUP_METADATA))) {
4484                 rec->wrong_chunk_type = 1;
4485                 return;
4486         }
4487
4488         /*
4489          * Check SYSTEM extent, as it's also marked as metadata, we can only
4490          * make sure it's a SYSTEM extent by its backref
4491          */
4492         if (!list_empty(&rec->backrefs)) {
4493                 struct extent_backref *node;
4494                 struct tree_backref *tback;
4495                 u64 bg_type;
4496
4497                 node = list_entry(rec->backrefs.next, struct extent_backref,
4498                                   list);
4499                 if (node->is_data) {
4500                         /* tree block shouldn't have data backref */
4501                         rec->wrong_chunk_type = 1;
4502                         return;
4503                 }
4504                 tback = container_of(node, struct tree_backref, node);
4505
4506                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4507                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4508                 else
4509                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
4510                 if (!(bg_cache->flags & bg_type))
4511                         rec->wrong_chunk_type = 1;
4512         }
4513 }
4514
4515 /*
4516  * Allocate a new extent record, fill default values from @tmpl and insert int
4517  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4518  * the cache, otherwise it fails.
4519  */
4520 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4521                 struct extent_record *tmpl)
4522 {
4523         struct extent_record *rec;
4524         int ret = 0;
4525
4526         rec = malloc(sizeof(*rec));
4527         if (!rec)
4528                 return -ENOMEM;
4529         rec->start = tmpl->start;
4530         rec->max_size = tmpl->max_size;
4531         rec->nr = max(tmpl->nr, tmpl->max_size);
4532         rec->found_rec = tmpl->found_rec;
4533         rec->content_checked = tmpl->content_checked;
4534         rec->owner_ref_checked = tmpl->owner_ref_checked;
4535         rec->num_duplicates = 0;
4536         rec->metadata = tmpl->metadata;
4537         rec->flag_block_full_backref = FLAG_UNSET;
4538         rec->bad_full_backref = 0;
4539         rec->crossing_stripes = 0;
4540         rec->wrong_chunk_type = 0;
4541         rec->is_root = tmpl->is_root;
4542         rec->refs = tmpl->refs;
4543         rec->extent_item_refs = tmpl->extent_item_refs;
4544         rec->parent_generation = tmpl->parent_generation;
4545         INIT_LIST_HEAD(&rec->backrefs);
4546         INIT_LIST_HEAD(&rec->dups);
4547         INIT_LIST_HEAD(&rec->list);
4548         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4549         rec->cache.start = tmpl->start;
4550         rec->cache.size = tmpl->nr;
4551         ret = insert_cache_extent(extent_cache, &rec->cache);
4552         BUG_ON(ret);
4553         bytes_used += rec->nr;
4554
4555         if (tmpl->metadata)
4556                 rec->crossing_stripes = check_crossing_stripes(rec->start,
4557                                 global_info->tree_root->nodesize);
4558         check_extent_type(rec);
4559         return ret;
4560 }
4561
4562 /*
4563  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4564  * some are hints:
4565  * - refs              - if found, increase refs
4566  * - is_root           - if found, set
4567  * - content_checked   - if found, set
4568  * - owner_ref_checked - if found, set
4569  *
4570  * If not found, create a new one, initialize and insert.
4571  */
4572 static int add_extent_rec(struct cache_tree *extent_cache,
4573                 struct extent_record *tmpl)
4574 {
4575         struct extent_record *rec;
4576         struct cache_extent *cache;
4577         int ret = 0;
4578         int dup = 0;
4579
4580         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4581         if (cache) {
4582                 rec = container_of(cache, struct extent_record, cache);
4583                 if (tmpl->refs)
4584                         rec->refs++;
4585                 if (rec->nr == 1)
4586                         rec->nr = max(tmpl->nr, tmpl->max_size);
4587
4588                 /*
4589                  * We need to make sure to reset nr to whatever the extent
4590                  * record says was the real size, this way we can compare it to
4591                  * the backrefs.
4592                  */
4593                 if (tmpl->found_rec) {
4594                         if (tmpl->start != rec->start || rec->found_rec) {
4595                                 struct extent_record *tmp;
4596
4597                                 dup = 1;
4598                                 if (list_empty(&rec->list))
4599                                         list_add_tail(&rec->list,
4600                                                       &duplicate_extents);
4601
4602                                 /*
4603                                  * We have to do this song and dance in case we
4604                                  * find an extent record that falls inside of
4605                                  * our current extent record but does not have
4606                                  * the same objectid.
4607                                  */
4608                                 tmp = malloc(sizeof(*tmp));
4609                                 if (!tmp)
4610                                         return -ENOMEM;
4611                                 tmp->start = tmpl->start;
4612                                 tmp->max_size = tmpl->max_size;
4613                                 tmp->nr = tmpl->nr;
4614                                 tmp->found_rec = 1;
4615                                 tmp->metadata = tmpl->metadata;
4616                                 tmp->extent_item_refs = tmpl->extent_item_refs;
4617                                 INIT_LIST_HEAD(&tmp->list);
4618                                 list_add_tail(&tmp->list, &rec->dups);
4619                                 rec->num_duplicates++;
4620                         } else {
4621                                 rec->nr = tmpl->nr;
4622                                 rec->found_rec = 1;
4623                         }
4624                 }
4625
4626                 if (tmpl->extent_item_refs && !dup) {
4627                         if (rec->extent_item_refs) {
4628                                 fprintf(stderr, "block %llu rec "
4629                                         "extent_item_refs %llu, passed %llu\n",
4630                                         (unsigned long long)tmpl->start,
4631                                         (unsigned long long)
4632                                                         rec->extent_item_refs,
4633                                         (unsigned long long)tmpl->extent_item_refs);
4634                         }
4635                         rec->extent_item_refs = tmpl->extent_item_refs;
4636                 }
4637                 if (tmpl->is_root)
4638                         rec->is_root = 1;
4639                 if (tmpl->content_checked)
4640                         rec->content_checked = 1;
4641                 if (tmpl->owner_ref_checked)
4642                         rec->owner_ref_checked = 1;
4643                 memcpy(&rec->parent_key, &tmpl->parent_key,
4644                                 sizeof(tmpl->parent_key));
4645                 if (tmpl->parent_generation)
4646                         rec->parent_generation = tmpl->parent_generation;
4647                 if (rec->max_size < tmpl->max_size)
4648                         rec->max_size = tmpl->max_size;
4649
4650                 /*
4651                  * A metadata extent can't cross stripe_len boundary, otherwise
4652                  * kernel scrub won't be able to handle it.
4653                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4654                  * it.
4655                  */
4656                 if (tmpl->metadata)
4657                         rec->crossing_stripes = check_crossing_stripes(
4658                                 rec->start, global_info->tree_root->nodesize);
4659                 check_extent_type(rec);
4660                 maybe_free_extent_rec(extent_cache, rec);
4661                 return ret;
4662         }
4663
4664         ret = add_extent_rec_nolookup(extent_cache, tmpl);
4665
4666         return ret;
4667 }
4668
4669 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4670                             u64 parent, u64 root, int found_ref)
4671 {
4672         struct extent_record *rec;
4673         struct tree_backref *back;
4674         struct cache_extent *cache;
4675
4676         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4677         if (!cache) {
4678                 struct extent_record tmpl;
4679
4680                 memset(&tmpl, 0, sizeof(tmpl));
4681                 tmpl.start = bytenr;
4682                 tmpl.nr = 1;
4683                 tmpl.metadata = 1;
4684
4685                 add_extent_rec_nolookup(extent_cache, &tmpl);
4686
4687                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4688                 if (!cache)
4689                         abort();
4690         }
4691
4692         rec = container_of(cache, struct extent_record, cache);
4693         if (rec->start != bytenr) {
4694                 abort();
4695         }
4696
4697         back = find_tree_backref(rec, parent, root);
4698         if (!back) {
4699                 back = alloc_tree_backref(rec, parent, root);
4700                 BUG_ON(!back);
4701         }
4702
4703         if (found_ref) {
4704                 if (back->node.found_ref) {
4705                         fprintf(stderr, "Extent back ref already exists "
4706                                 "for %llu parent %llu root %llu \n",
4707                                 (unsigned long long)bytenr,
4708                                 (unsigned long long)parent,
4709                                 (unsigned long long)root);
4710                 }
4711                 back->node.found_ref = 1;
4712         } else {
4713                 if (back->node.found_extent_tree) {
4714                         fprintf(stderr, "Extent back ref already exists "
4715                                 "for %llu parent %llu root %llu \n",
4716                                 (unsigned long long)bytenr,
4717                                 (unsigned long long)parent,
4718                                 (unsigned long long)root);
4719                 }
4720                 back->node.found_extent_tree = 1;
4721         }
4722         check_extent_type(rec);
4723         maybe_free_extent_rec(extent_cache, rec);
4724         return 0;
4725 }
4726
4727 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4728                             u64 parent, u64 root, u64 owner, u64 offset,
4729                             u32 num_refs, int found_ref, u64 max_size)
4730 {
4731         struct extent_record *rec;
4732         struct data_backref *back;
4733         struct cache_extent *cache;
4734
4735         cache = lookup_cache_extent(extent_cache, bytenr, 1);
4736         if (!cache) {
4737                 struct extent_record tmpl;
4738
4739                 memset(&tmpl, 0, sizeof(tmpl));
4740                 tmpl.start = bytenr;
4741                 tmpl.nr = 1;
4742                 tmpl.max_size = max_size;
4743
4744                 add_extent_rec_nolookup(extent_cache, &tmpl);
4745
4746                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4747                 if (!cache)
4748                         abort();
4749         }
4750
4751         rec = container_of(cache, struct extent_record, cache);
4752         if (rec->max_size < max_size)
4753                 rec->max_size = max_size;
4754
4755         /*
4756          * If found_ref is set then max_size is the real size and must match the
4757          * existing refs.  So if we have already found a ref then we need to
4758          * make sure that this ref matches the existing one, otherwise we need
4759          * to add a new backref so we can notice that the backrefs don't match
4760          * and we need to figure out who is telling the truth.  This is to
4761          * account for that awful fsync bug I introduced where we'd end up with
4762          * a btrfs_file_extent_item that would have its length include multiple
4763          * prealloc extents or point inside of a prealloc extent.
4764          */
4765         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4766                                  bytenr, max_size);
4767         if (!back) {
4768                 back = alloc_data_backref(rec, parent, root, owner, offset,
4769                                           max_size);
4770                 BUG_ON(!back);
4771         }
4772
4773         if (found_ref) {
4774                 BUG_ON(num_refs != 1);
4775                 if (back->node.found_ref)
4776                         BUG_ON(back->bytes != max_size);
4777                 back->node.found_ref = 1;
4778                 back->found_ref += 1;
4779                 back->bytes = max_size;
4780                 back->disk_bytenr = bytenr;
4781                 rec->refs += 1;
4782                 rec->content_checked = 1;
4783                 rec->owner_ref_checked = 1;
4784         } else {
4785                 if (back->node.found_extent_tree) {
4786                         fprintf(stderr, "Extent back ref already exists "
4787                                 "for %llu parent %llu root %llu "
4788                                 "owner %llu offset %llu num_refs %lu\n",
4789                                 (unsigned long long)bytenr,
4790                                 (unsigned long long)parent,
4791                                 (unsigned long long)root,
4792                                 (unsigned long long)owner,
4793                                 (unsigned long long)offset,
4794                                 (unsigned long)num_refs);
4795                 }
4796                 back->num_refs = num_refs;
4797                 back->node.found_extent_tree = 1;
4798         }
4799         maybe_free_extent_rec(extent_cache, rec);
4800         return 0;
4801 }
4802
4803 static int add_pending(struct cache_tree *pending,
4804                        struct cache_tree *seen, u64 bytenr, u32 size)
4805 {
4806         int ret;
4807         ret = add_cache_extent(seen, bytenr, size);
4808         if (ret)
4809                 return ret;
4810         add_cache_extent(pending, bytenr, size);
4811         return 0;
4812 }
4813
4814 static int pick_next_pending(struct cache_tree *pending,
4815                         struct cache_tree *reada,
4816                         struct cache_tree *nodes,
4817                         u64 last, struct block_info *bits, int bits_nr,
4818                         int *reada_bits)
4819 {
4820         unsigned long node_start = last;
4821         struct cache_extent *cache;
4822         int ret;
4823
4824         cache = search_cache_extent(reada, 0);
4825         if (cache) {
4826                 bits[0].start = cache->start;
4827                 bits[0].size = cache->size;
4828                 *reada_bits = 1;
4829                 return 1;
4830         }
4831         *reada_bits = 0;
4832         if (node_start > 32768)
4833                 node_start -= 32768;
4834
4835         cache = search_cache_extent(nodes, node_start);
4836         if (!cache)
4837                 cache = search_cache_extent(nodes, 0);
4838
4839         if (!cache) {
4840                  cache = search_cache_extent(pending, 0);
4841                  if (!cache)
4842                          return 0;
4843                  ret = 0;
4844                  do {
4845                          bits[ret].start = cache->start;
4846                          bits[ret].size = cache->size;
4847                          cache = next_cache_extent(cache);
4848                          ret++;
4849                  } while (cache && ret < bits_nr);
4850                  return ret;
4851         }
4852
4853         ret = 0;
4854         do {
4855                 bits[ret].start = cache->start;
4856                 bits[ret].size = cache->size;
4857                 cache = next_cache_extent(cache);
4858                 ret++;
4859         } while (cache && ret < bits_nr);
4860
4861         if (bits_nr - ret > 8) {
4862                 u64 lookup = bits[0].start + bits[0].size;
4863                 struct cache_extent *next;
4864                 next = search_cache_extent(pending, lookup);
4865                 while(next) {
4866                         if (next->start - lookup > 32768)
4867                                 break;
4868                         bits[ret].start = next->start;
4869                         bits[ret].size = next->size;
4870                         lookup = next->start + next->size;
4871                         ret++;
4872                         if (ret == bits_nr)
4873                                 break;
4874                         next = next_cache_extent(next);
4875                         if (!next)
4876                                 break;
4877                 }
4878         }
4879         return ret;
4880 }
4881
4882 static void free_chunk_record(struct cache_extent *cache)
4883 {
4884         struct chunk_record *rec;
4885
4886         rec = container_of(cache, struct chunk_record, cache);
4887         list_del_init(&rec->list);
4888         list_del_init(&rec->dextents);
4889         free(rec);
4890 }
4891
4892 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
4893 {
4894         cache_tree_free_extents(chunk_cache, free_chunk_record);
4895 }
4896
4897 static void free_device_record(struct rb_node *node)
4898 {
4899         struct device_record *rec;
4900
4901         rec = container_of(node, struct device_record, node);
4902         free(rec);
4903 }
4904
4905 FREE_RB_BASED_TREE(device_cache, free_device_record);
4906
4907 int insert_block_group_record(struct block_group_tree *tree,
4908                               struct block_group_record *bg_rec)
4909 {
4910         int ret;
4911
4912         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
4913         if (ret)
4914                 return ret;
4915
4916         list_add_tail(&bg_rec->list, &tree->block_groups);
4917         return 0;
4918 }
4919
4920 static void free_block_group_record(struct cache_extent *cache)
4921 {
4922         struct block_group_record *rec;
4923
4924         rec = container_of(cache, struct block_group_record, cache);
4925         list_del_init(&rec->list);
4926         free(rec);
4927 }
4928
4929 void free_block_group_tree(struct block_group_tree *tree)
4930 {
4931         cache_tree_free_extents(&tree->tree, free_block_group_record);
4932 }
4933
4934 int insert_device_extent_record(struct device_extent_tree *tree,
4935                                 struct device_extent_record *de_rec)
4936 {
4937         int ret;
4938
4939         /*
4940          * Device extent is a bit different from the other extents, because
4941          * the extents which belong to the different devices may have the
4942          * same start and size, so we need use the special extent cache
4943          * search/insert functions.
4944          */
4945         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
4946         if (ret)
4947                 return ret;
4948
4949         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
4950         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
4951         return 0;
4952 }
4953
4954 static void free_device_extent_record(struct cache_extent *cache)
4955 {
4956         struct device_extent_record *rec;
4957
4958         rec = container_of(cache, struct device_extent_record, cache);
4959         if (!list_empty(&rec->chunk_list))
4960                 list_del_init(&rec->chunk_list);
4961         if (!list_empty(&rec->device_list))
4962                 list_del_init(&rec->device_list);
4963         free(rec);
4964 }
4965
4966 void free_device_extent_tree(struct device_extent_tree *tree)
4967 {
4968         cache_tree_free_extents(&tree->tree, free_device_extent_record);
4969 }
4970
4971 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4972 static int process_extent_ref_v0(struct cache_tree *extent_cache,
4973                                  struct extent_buffer *leaf, int slot)
4974 {
4975         struct btrfs_extent_ref_v0 *ref0;
4976         struct btrfs_key key;
4977
4978         btrfs_item_key_to_cpu(leaf, &key, slot);
4979         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
4980         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
4981                 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
4982         } else {
4983                 add_data_backref(extent_cache, key.objectid, key.offset, 0,
4984                                  0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
4985         }
4986         return 0;
4987 }
4988 #endif
4989
4990 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
4991                                             struct btrfs_key *key,
4992                                             int slot)
4993 {
4994         struct btrfs_chunk *ptr;
4995         struct chunk_record *rec;
4996         int num_stripes, i;
4997
4998         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
4999         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5000
5001         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5002         if (!rec) {
5003                 fprintf(stderr, "memory allocation failed\n");
5004                 exit(-1);
5005         }
5006
5007         INIT_LIST_HEAD(&rec->list);
5008         INIT_LIST_HEAD(&rec->dextents);
5009         rec->bg_rec = NULL;
5010
5011         rec->cache.start = key->offset;
5012         rec->cache.size = btrfs_chunk_length(leaf, ptr);
5013
5014         rec->generation = btrfs_header_generation(leaf);
5015
5016         rec->objectid = key->objectid;
5017         rec->type = key->type;
5018         rec->offset = key->offset;
5019
5020         rec->length = rec->cache.size;
5021         rec->owner = btrfs_chunk_owner(leaf, ptr);
5022         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5023         rec->type_flags = btrfs_chunk_type(leaf, ptr);
5024         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5025         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5026         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5027         rec->num_stripes = num_stripes;
5028         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5029
5030         for (i = 0; i < rec->num_stripes; ++i) {
5031                 rec->stripes[i].devid =
5032                         btrfs_stripe_devid_nr(leaf, ptr, i);
5033                 rec->stripes[i].offset =
5034                         btrfs_stripe_offset_nr(leaf, ptr, i);
5035                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5036                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5037                                 BTRFS_UUID_SIZE);
5038         }
5039
5040         return rec;
5041 }
5042
5043 static int process_chunk_item(struct cache_tree *chunk_cache,
5044                               struct btrfs_key *key, struct extent_buffer *eb,
5045                               int slot)
5046 {
5047         struct chunk_record *rec;
5048         int ret = 0;
5049
5050         rec = btrfs_new_chunk_record(eb, key, slot);
5051         ret = insert_cache_extent(chunk_cache, &rec->cache);
5052         if (ret) {
5053                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5054                         rec->offset, rec->length);
5055                 free(rec);
5056         }
5057
5058         return ret;
5059 }
5060
5061 static int process_device_item(struct rb_root *dev_cache,
5062                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5063 {
5064         struct btrfs_dev_item *ptr;
5065         struct device_record *rec;
5066         int ret = 0;
5067
5068         ptr = btrfs_item_ptr(eb,
5069                 slot, struct btrfs_dev_item);
5070
5071         rec = malloc(sizeof(*rec));
5072         if (!rec) {
5073                 fprintf(stderr, "memory allocation failed\n");
5074                 return -ENOMEM;
5075         }
5076
5077         rec->devid = key->offset;
5078         rec->generation = btrfs_header_generation(eb);
5079
5080         rec->objectid = key->objectid;
5081         rec->type = key->type;
5082         rec->offset = key->offset;
5083
5084         rec->devid = btrfs_device_id(eb, ptr);
5085         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5086         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5087
5088         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5089         if (ret) {
5090                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5091                 free(rec);
5092         }
5093
5094         return ret;
5095 }
5096
5097 struct block_group_record *
5098 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5099                              int slot)
5100 {
5101         struct btrfs_block_group_item *ptr;
5102         struct block_group_record *rec;
5103
5104         rec = calloc(1, sizeof(*rec));
5105         if (!rec) {
5106                 fprintf(stderr, "memory allocation failed\n");
5107                 exit(-1);
5108         }
5109
5110         rec->cache.start = key->objectid;
5111         rec->cache.size = key->offset;
5112
5113         rec->generation = btrfs_header_generation(leaf);
5114
5115         rec->objectid = key->objectid;
5116         rec->type = key->type;
5117         rec->offset = key->offset;
5118
5119         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5120         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5121
5122         INIT_LIST_HEAD(&rec->list);
5123
5124         return rec;
5125 }
5126
5127 static int process_block_group_item(struct block_group_tree *block_group_cache,
5128                                     struct btrfs_key *key,
5129                                     struct extent_buffer *eb, int slot)
5130 {
5131         struct block_group_record *rec;
5132         int ret = 0;
5133
5134         rec = btrfs_new_block_group_record(eb, key, slot);
5135         ret = insert_block_group_record(block_group_cache, rec);
5136         if (ret) {
5137                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5138                         rec->objectid, rec->offset);
5139                 free(rec);
5140         }
5141
5142         return ret;
5143 }
5144
5145 struct device_extent_record *
5146 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5147                                struct btrfs_key *key, int slot)
5148 {
5149         struct device_extent_record *rec;
5150         struct btrfs_dev_extent *ptr;
5151
5152         rec = calloc(1, sizeof(*rec));
5153         if (!rec) {
5154                 fprintf(stderr, "memory allocation failed\n");
5155                 exit(-1);
5156         }
5157
5158         rec->cache.objectid = key->objectid;
5159         rec->cache.start = key->offset;
5160
5161         rec->generation = btrfs_header_generation(leaf);
5162
5163         rec->objectid = key->objectid;
5164         rec->type = key->type;
5165         rec->offset = key->offset;
5166
5167         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5168         rec->chunk_objecteid =
5169                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5170         rec->chunk_offset =
5171                 btrfs_dev_extent_chunk_offset(leaf, ptr);
5172         rec->length = btrfs_dev_extent_length(leaf, ptr);
5173         rec->cache.size = rec->length;
5174
5175         INIT_LIST_HEAD(&rec->chunk_list);
5176         INIT_LIST_HEAD(&rec->device_list);
5177
5178         return rec;
5179 }
5180
5181 static int
5182 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5183                            struct btrfs_key *key, struct extent_buffer *eb,
5184                            int slot)
5185 {
5186         struct device_extent_record *rec;
5187         int ret;
5188
5189         rec = btrfs_new_device_extent_record(eb, key, slot);
5190         ret = insert_device_extent_record(dev_extent_cache, rec);
5191         if (ret) {
5192                 fprintf(stderr,
5193                         "Device extent[%llu, %llu, %llu] existed.\n",
5194                         rec->objectid, rec->offset, rec->length);
5195                 free(rec);
5196         }
5197
5198         return ret;
5199 }
5200
5201 static int process_extent_item(struct btrfs_root *root,
5202                                struct cache_tree *extent_cache,
5203                                struct extent_buffer *eb, int slot)
5204 {
5205         struct btrfs_extent_item *ei;
5206         struct btrfs_extent_inline_ref *iref;
5207         struct btrfs_extent_data_ref *dref;
5208         struct btrfs_shared_data_ref *sref;
5209         struct btrfs_key key;
5210         struct extent_record tmpl;
5211         unsigned long end;
5212         unsigned long ptr;
5213         int type;
5214         u32 item_size = btrfs_item_size_nr(eb, slot);
5215         u64 refs = 0;
5216         u64 offset;
5217         u64 num_bytes;
5218         int metadata = 0;
5219
5220         btrfs_item_key_to_cpu(eb, &key, slot);
5221
5222         if (key.type == BTRFS_METADATA_ITEM_KEY) {
5223                 metadata = 1;
5224                 num_bytes = root->nodesize;
5225         } else {
5226                 num_bytes = key.offset;
5227         }
5228
5229         if (item_size < sizeof(*ei)) {
5230 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5231                 struct btrfs_extent_item_v0 *ei0;
5232                 BUG_ON(item_size != sizeof(*ei0));
5233                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5234                 refs = btrfs_extent_refs_v0(eb, ei0);
5235 #else
5236                 BUG();
5237 #endif
5238                 memset(&tmpl, 0, sizeof(tmpl));
5239                 tmpl.start = key.objectid;
5240                 tmpl.nr = num_bytes;
5241                 tmpl.extent_item_refs = refs;
5242                 tmpl.metadata = metadata;
5243                 tmpl.found_rec = 1;
5244                 tmpl.max_size = num_bytes;
5245
5246                 return add_extent_rec(extent_cache, &tmpl);
5247         }
5248
5249         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5250         refs = btrfs_extent_refs(eb, ei);
5251         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5252                 metadata = 1;
5253         else
5254                 metadata = 0;
5255
5256         memset(&tmpl, 0, sizeof(tmpl));
5257         tmpl.start = key.objectid;
5258         tmpl.nr = num_bytes;
5259         tmpl.extent_item_refs = refs;
5260         tmpl.metadata = metadata;
5261         tmpl.found_rec = 1;
5262         tmpl.max_size = num_bytes;
5263         add_extent_rec(extent_cache, &tmpl);
5264
5265         ptr = (unsigned long)(ei + 1);
5266         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5267             key.type == BTRFS_EXTENT_ITEM_KEY)
5268                 ptr += sizeof(struct btrfs_tree_block_info);
5269
5270         end = (unsigned long)ei + item_size;
5271         while (ptr < end) {
5272                 iref = (struct btrfs_extent_inline_ref *)ptr;
5273                 type = btrfs_extent_inline_ref_type(eb, iref);
5274                 offset = btrfs_extent_inline_ref_offset(eb, iref);
5275                 switch (type) {
5276                 case BTRFS_TREE_BLOCK_REF_KEY:
5277                         add_tree_backref(extent_cache, key.objectid,
5278                                          0, offset, 0);
5279                         break;
5280                 case BTRFS_SHARED_BLOCK_REF_KEY:
5281                         add_tree_backref(extent_cache, key.objectid,
5282                                          offset, 0, 0);
5283                         break;
5284                 case BTRFS_EXTENT_DATA_REF_KEY:
5285                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5286                         add_data_backref(extent_cache, key.objectid, 0,
5287                                         btrfs_extent_data_ref_root(eb, dref),
5288                                         btrfs_extent_data_ref_objectid(eb,
5289                                                                        dref),
5290                                         btrfs_extent_data_ref_offset(eb, dref),
5291                                         btrfs_extent_data_ref_count(eb, dref),
5292                                         0, num_bytes);
5293                         break;
5294                 case BTRFS_SHARED_DATA_REF_KEY:
5295                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
5296                         add_data_backref(extent_cache, key.objectid, offset,
5297                                         0, 0, 0,
5298                                         btrfs_shared_data_ref_count(eb, sref),
5299                                         0, num_bytes);
5300                         break;
5301                 default:
5302                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5303                                 key.objectid, key.type, num_bytes);
5304                         goto out;
5305                 }
5306                 ptr += btrfs_extent_inline_ref_size(type);
5307         }
5308         WARN_ON(ptr > end);
5309 out:
5310         return 0;
5311 }
5312
5313 static int check_cache_range(struct btrfs_root *root,
5314                              struct btrfs_block_group_cache *cache,
5315                              u64 offset, u64 bytes)
5316 {
5317         struct btrfs_free_space *entry;
5318         u64 *logical;
5319         u64 bytenr;
5320         int stripe_len;
5321         int i, nr, ret;
5322
5323         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5324                 bytenr = btrfs_sb_offset(i);
5325                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5326                                        cache->key.objectid, bytenr, 0,
5327                                        &logical, &nr, &stripe_len);
5328                 if (ret)
5329                         return ret;
5330
5331                 while (nr--) {
5332                         if (logical[nr] + stripe_len <= offset)
5333                                 continue;
5334                         if (offset + bytes <= logical[nr])
5335                                 continue;
5336                         if (logical[nr] == offset) {
5337                                 if (stripe_len >= bytes) {
5338                                         kfree(logical);
5339                                         return 0;
5340                                 }
5341                                 bytes -= stripe_len;
5342                                 offset += stripe_len;
5343                         } else if (logical[nr] < offset) {
5344                                 if (logical[nr] + stripe_len >=
5345                                     offset + bytes) {
5346                                         kfree(logical);
5347                                         return 0;
5348                                 }
5349                                 bytes = (offset + bytes) -
5350                                         (logical[nr] + stripe_len);
5351                                 offset = logical[nr] + stripe_len;
5352                         } else {
5353                                 /*
5354                                  * Could be tricky, the super may land in the
5355                                  * middle of the area we're checking.  First
5356                                  * check the easiest case, it's at the end.
5357                                  */
5358                                 if (logical[nr] + stripe_len >=
5359                                     bytes + offset) {
5360                                         bytes = logical[nr] - offset;
5361                                         continue;
5362                                 }
5363
5364                                 /* Check the left side */
5365                                 ret = check_cache_range(root, cache,
5366                                                         offset,
5367                                                         logical[nr] - offset);
5368                                 if (ret) {
5369                                         kfree(logical);
5370                                         return ret;
5371                                 }
5372
5373                                 /* Now we continue with the right side */
5374                                 bytes = (offset + bytes) -
5375                                         (logical[nr] + stripe_len);
5376                                 offset = logical[nr] + stripe_len;
5377                         }
5378                 }
5379
5380                 kfree(logical);
5381         }
5382
5383         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5384         if (!entry) {
5385                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5386                         offset, offset+bytes);
5387                 return -EINVAL;
5388         }
5389
5390         if (entry->offset != offset) {
5391                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5392                         entry->offset);
5393                 return -EINVAL;
5394         }
5395
5396         if (entry->bytes != bytes) {
5397                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5398                         bytes, entry->bytes, offset);
5399                 return -EINVAL;
5400         }
5401
5402         unlink_free_space(cache->free_space_ctl, entry);
5403         free(entry);
5404         return 0;
5405 }
5406
5407 static int verify_space_cache(struct btrfs_root *root,
5408                               struct btrfs_block_group_cache *cache)
5409 {
5410         struct btrfs_path *path;
5411         struct extent_buffer *leaf;
5412         struct btrfs_key key;
5413         u64 last;
5414         int ret = 0;
5415
5416         path = btrfs_alloc_path();
5417         if (!path)
5418                 return -ENOMEM;
5419
5420         root = root->fs_info->extent_root;
5421
5422         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5423
5424         key.objectid = last;
5425         key.offset = 0;
5426         key.type = BTRFS_EXTENT_ITEM_KEY;
5427
5428         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5429         if (ret < 0)
5430                 goto out;
5431         ret = 0;
5432         while (1) {
5433                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5434                         ret = btrfs_next_leaf(root, path);
5435                         if (ret < 0)
5436                                 goto out;
5437                         if (ret > 0) {
5438                                 ret = 0;
5439                                 break;
5440                         }
5441                 }
5442                 leaf = path->nodes[0];
5443                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5444                 if (key.objectid >= cache->key.offset + cache->key.objectid)
5445                         break;
5446                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5447                     key.type != BTRFS_METADATA_ITEM_KEY) {
5448                         path->slots[0]++;
5449                         continue;
5450                 }
5451
5452                 if (last == key.objectid) {
5453                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
5454                                 last = key.objectid + key.offset;
5455                         else
5456                                 last = key.objectid + root->nodesize;
5457                         path->slots[0]++;
5458                         continue;
5459                 }
5460
5461                 ret = check_cache_range(root, cache, last,
5462                                         key.objectid - last);
5463                 if (ret)
5464                         break;
5465                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5466                         last = key.objectid + key.offset;
5467                 else
5468                         last = key.objectid + root->nodesize;
5469                 path->slots[0]++;
5470         }
5471
5472         if (last < cache->key.objectid + cache->key.offset)
5473                 ret = check_cache_range(root, cache, last,
5474                                         cache->key.objectid +
5475                                         cache->key.offset - last);
5476
5477 out:
5478         btrfs_free_path(path);
5479
5480         if (!ret &&
5481             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5482                 fprintf(stderr, "There are still entries left in the space "
5483                         "cache\n");
5484                 ret = -EINVAL;
5485         }
5486
5487         return ret;
5488 }
5489
5490 static int check_space_cache(struct btrfs_root *root)
5491 {
5492         struct btrfs_block_group_cache *cache;
5493         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5494         int ret;
5495         int error = 0;
5496
5497         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5498             btrfs_super_generation(root->fs_info->super_copy) !=
5499             btrfs_super_cache_generation(root->fs_info->super_copy)) {
5500                 printf("cache and super generation don't match, space cache "
5501                        "will be invalidated\n");
5502                 return 0;
5503         }
5504
5505         if (ctx.progress_enabled) {
5506                 ctx.tp = TASK_FREE_SPACE;
5507                 task_start(ctx.info);
5508         }
5509
5510         while (1) {
5511                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5512                 if (!cache)
5513                         break;
5514
5515                 start = cache->key.objectid + cache->key.offset;
5516                 if (!cache->free_space_ctl) {
5517                         if (btrfs_init_free_space_ctl(cache,
5518                                                       root->sectorsize)) {
5519                                 ret = -ENOMEM;
5520                                 break;
5521                         }
5522                 } else {
5523                         btrfs_remove_free_space_cache(cache);
5524                 }
5525
5526                 if (btrfs_fs_compat_ro(root->fs_info,
5527                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5528                         ret = exclude_super_stripes(root, cache);
5529                         if (ret) {
5530                                 fprintf(stderr, "could not exclude super stripes: %s\n",
5531                                         strerror(-ret));
5532                                 error++;
5533                                 continue;
5534                         }
5535                         ret = load_free_space_tree(root->fs_info, cache);
5536                         free_excluded_extents(root, cache);
5537                         if (ret < 0) {
5538                                 fprintf(stderr, "could not load free space tree: %s\n",
5539                                         strerror(-ret));
5540                                 error++;
5541                                 continue;
5542                         }
5543                         error += ret;
5544                 } else {
5545                         ret = load_free_space_cache(root->fs_info, cache);
5546                         if (!ret)
5547                                 continue;
5548                 }
5549
5550                 ret = verify_space_cache(root, cache);
5551                 if (ret) {
5552                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
5553                                 cache->key.objectid);
5554                         error++;
5555                 }
5556         }
5557
5558         task_stop(ctx.info);
5559
5560         return error ? -EINVAL : 0;
5561 }
5562
5563 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5564                         u64 num_bytes, unsigned long leaf_offset,
5565                         struct extent_buffer *eb) {
5566
5567         u64 offset = 0;
5568         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5569         char *data;
5570         unsigned long csum_offset;
5571         u32 csum;
5572         u32 csum_expected;
5573         u64 read_len;
5574         u64 data_checked = 0;
5575         u64 tmp;
5576         int ret = 0;
5577         int mirror;
5578         int num_copies;
5579
5580         if (num_bytes % root->sectorsize)
5581                 return -EINVAL;
5582
5583         data = malloc(num_bytes);
5584         if (!data)
5585                 return -ENOMEM;
5586
5587         while (offset < num_bytes) {
5588                 mirror = 0;
5589 again:
5590                 read_len = num_bytes - offset;
5591                 /* read as much space once a time */
5592                 ret = read_extent_data(root, data + offset,
5593                                 bytenr + offset, &read_len, mirror);
5594                 if (ret)
5595                         goto out;
5596                 data_checked = 0;
5597                 /* verify every 4k data's checksum */
5598                 while (data_checked < read_len) {
5599                         csum = ~(u32)0;
5600                         tmp = offset + data_checked;
5601
5602                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
5603                                                csum, root->sectorsize);
5604                         btrfs_csum_final(csum, (char *)&csum);
5605
5606                         csum_offset = leaf_offset +
5607                                  tmp / root->sectorsize * csum_size;
5608                         read_extent_buffer(eb, (char *)&csum_expected,
5609                                            csum_offset, csum_size);
5610                         /* try another mirror */
5611                         if (csum != csum_expected) {
5612                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5613                                                 mirror, bytenr + tmp,
5614                                                 csum, csum_expected);
5615                                 num_copies = btrfs_num_copies(
5616                                                 &root->fs_info->mapping_tree,
5617                                                 bytenr, num_bytes);
5618                                 if (mirror < num_copies - 1) {
5619                                         mirror += 1;
5620                                         goto again;
5621                                 }
5622                         }
5623                         data_checked += root->sectorsize;
5624                 }
5625                 offset += read_len;
5626         }
5627 out:
5628         free(data);
5629         return ret;
5630 }
5631
5632 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5633                                u64 num_bytes)
5634 {
5635         struct btrfs_path *path;
5636         struct extent_buffer *leaf;
5637         struct btrfs_key key;
5638         int ret;
5639
5640         path = btrfs_alloc_path();
5641         if (!path) {
5642                 fprintf(stderr, "Error allocating path\n");
5643                 return -ENOMEM;
5644         }
5645
5646         key.objectid = bytenr;
5647         key.type = BTRFS_EXTENT_ITEM_KEY;
5648         key.offset = (u64)-1;
5649
5650 again:
5651         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5652                                 0, 0);
5653         if (ret < 0) {
5654                 fprintf(stderr, "Error looking up extent record %d\n", ret);
5655                 btrfs_free_path(path);
5656                 return ret;
5657         } else if (ret) {
5658                 if (path->slots[0] > 0) {
5659                         path->slots[0]--;
5660                 } else {
5661                         ret = btrfs_prev_leaf(root, path);
5662                         if (ret < 0) {
5663                                 goto out;
5664                         } else if (ret > 0) {
5665                                 ret = 0;
5666                                 goto out;
5667                         }
5668                 }
5669         }
5670
5671         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5672
5673         /*
5674          * Block group items come before extent items if they have the same
5675          * bytenr, so walk back one more just in case.  Dear future traveller,
5676          * first congrats on mastering time travel.  Now if it's not too much
5677          * trouble could you go back to 2006 and tell Chris to make the
5678          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5679          * EXTENT_ITEM_KEY please?
5680          */
5681         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5682                 if (path->slots[0] > 0) {
5683                         path->slots[0]--;
5684                 } else {
5685                         ret = btrfs_prev_leaf(root, path);
5686                         if (ret < 0) {
5687                                 goto out;
5688                         } else if (ret > 0) {
5689                                 ret = 0;
5690                                 goto out;
5691                         }
5692                 }
5693                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5694         }
5695
5696         while (num_bytes) {
5697                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5698                         ret = btrfs_next_leaf(root, path);
5699                         if (ret < 0) {
5700                                 fprintf(stderr, "Error going to next leaf "
5701                                         "%d\n", ret);
5702                                 btrfs_free_path(path);
5703                                 return ret;
5704                         } else if (ret) {
5705                                 break;
5706                         }
5707                 }
5708                 leaf = path->nodes[0];
5709                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5710                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5711                         path->slots[0]++;
5712                         continue;
5713                 }
5714                 if (key.objectid + key.offset < bytenr) {
5715                         path->slots[0]++;
5716                         continue;
5717                 }
5718                 if (key.objectid > bytenr + num_bytes)
5719                         break;
5720
5721                 if (key.objectid == bytenr) {
5722                         if (key.offset >= num_bytes) {
5723                                 num_bytes = 0;
5724                                 break;
5725                         }
5726                         num_bytes -= key.offset;
5727                         bytenr += key.offset;
5728                 } else if (key.objectid < bytenr) {
5729                         if (key.objectid + key.offset >= bytenr + num_bytes) {
5730                                 num_bytes = 0;
5731                                 break;
5732                         }
5733                         num_bytes = (bytenr + num_bytes) -
5734                                 (key.objectid + key.offset);
5735                         bytenr = key.objectid + key.offset;
5736                 } else {
5737                         if (key.objectid + key.offset < bytenr + num_bytes) {
5738                                 u64 new_start = key.objectid + key.offset;
5739                                 u64 new_bytes = bytenr + num_bytes - new_start;
5740
5741                                 /*
5742                                  * Weird case, the extent is in the middle of
5743                                  * our range, we'll have to search one side
5744                                  * and then the other.  Not sure if this happens
5745                                  * in real life, but no harm in coding it up
5746                                  * anyway just in case.
5747                                  */
5748                                 btrfs_release_path(path);
5749                                 ret = check_extent_exists(root, new_start,
5750                                                           new_bytes);
5751                                 if (ret) {
5752                                         fprintf(stderr, "Right section didn't "
5753                                                 "have a record\n");
5754                                         break;
5755                                 }
5756                                 num_bytes = key.objectid - bytenr;
5757                                 goto again;
5758                         }
5759                         num_bytes = key.objectid - bytenr;
5760                 }
5761                 path->slots[0]++;
5762         }
5763         ret = 0;
5764
5765 out:
5766         if (num_bytes && !ret) {
5767                 fprintf(stderr, "There are no extents for csum range "
5768                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5769                 ret = 1;
5770         }
5771
5772         btrfs_free_path(path);
5773         return ret;
5774 }
5775
5776 static int check_csums(struct btrfs_root *root)
5777 {
5778         struct btrfs_path *path;
5779         struct extent_buffer *leaf;
5780         struct btrfs_key key;
5781         u64 offset = 0, num_bytes = 0;
5782         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5783         int errors = 0;
5784         int ret;
5785         u64 data_len;
5786         unsigned long leaf_offset;
5787
5788         root = root->fs_info->csum_root;
5789         if (!extent_buffer_uptodate(root->node)) {
5790                 fprintf(stderr, "No valid csum tree found\n");
5791                 return -ENOENT;
5792         }
5793
5794         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5795         key.type = BTRFS_EXTENT_CSUM_KEY;
5796         key.offset = 0;
5797
5798         path = btrfs_alloc_path();
5799         if (!path)
5800                 return -ENOMEM;
5801
5802         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5803         if (ret < 0) {
5804                 fprintf(stderr, "Error searching csum tree %d\n", ret);
5805                 btrfs_free_path(path);
5806                 return ret;
5807         }
5808
5809         if (ret > 0 && path->slots[0])
5810                 path->slots[0]--;
5811         ret = 0;
5812
5813         while (1) {
5814                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5815                         ret = btrfs_next_leaf(root, path);
5816                         if (ret < 0) {
5817                                 fprintf(stderr, "Error going to next leaf "
5818                                         "%d\n", ret);
5819                                 break;
5820                         }
5821                         if (ret)
5822                                 break;
5823                 }
5824                 leaf = path->nodes[0];
5825
5826                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5827                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5828                         path->slots[0]++;
5829                         continue;
5830                 }
5831
5832                 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5833                               csum_size) * root->sectorsize;
5834                 if (!check_data_csum)
5835                         goto skip_csum_check;
5836                 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5837                 ret = check_extent_csums(root, key.offset, data_len,
5838                                          leaf_offset, leaf);
5839                 if (ret)
5840                         break;
5841 skip_csum_check:
5842                 if (!num_bytes) {
5843                         offset = key.offset;
5844                 } else if (key.offset != offset + num_bytes) {
5845                         ret = check_extent_exists(root, offset, num_bytes);
5846                         if (ret) {
5847                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5848                                         "there is no extent record\n",
5849                                         offset, offset+num_bytes);
5850                                 errors++;
5851                         }
5852                         offset = key.offset;
5853                         num_bytes = 0;
5854                 }
5855                 num_bytes += data_len;
5856                 path->slots[0]++;
5857         }
5858
5859         btrfs_free_path(path);
5860         return errors;
5861 }
5862
5863 static int is_dropped_key(struct btrfs_key *key,
5864                           struct btrfs_key *drop_key) {
5865         if (key->objectid < drop_key->objectid)
5866                 return 1;
5867         else if (key->objectid == drop_key->objectid) {
5868                 if (key->type < drop_key->type)
5869                         return 1;
5870                 else if (key->type == drop_key->type) {
5871                         if (key->offset < drop_key->offset)
5872                                 return 1;
5873                 }
5874         }
5875         return 0;
5876 }
5877
5878 /*
5879  * Here are the rules for FULL_BACKREF.
5880  *
5881  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5882  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5883  *      FULL_BACKREF set.
5884  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
5885  *    if it happened after the relocation occurred since we'll have dropped the
5886  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5887  *    have no real way to know for sure.
5888  *
5889  * We process the blocks one root at a time, and we start from the lowest root
5890  * objectid and go to the highest.  So we can just lookup the owner backref for
5891  * the record and if we don't find it then we know it doesn't exist and we have
5892  * a FULL BACKREF.
5893  *
5894  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
5895  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
5896  * be set or not and then we can check later once we've gathered all the refs.
5897  */
5898 static int calc_extent_flag(struct btrfs_root *root,
5899                            struct cache_tree *extent_cache,
5900                            struct extent_buffer *buf,
5901                            struct root_item_record *ri,
5902                            u64 *flags)
5903 {
5904         struct extent_record *rec;
5905         struct cache_extent *cache;
5906         struct tree_backref *tback;
5907         u64 owner = 0;
5908
5909         cache = lookup_cache_extent(extent_cache, buf->start, 1);
5910         /* we have added this extent before */
5911         BUG_ON(!cache);
5912         rec = container_of(cache, struct extent_record, cache);
5913
5914         /*
5915          * Except file/reloc tree, we can not have
5916          * FULL BACKREF MODE
5917          */
5918         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
5919                 goto normal;
5920         /*
5921          * root node
5922          */
5923         if (buf->start == ri->bytenr)
5924                 goto normal;
5925
5926         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
5927                 goto full_backref;
5928
5929         owner = btrfs_header_owner(buf);
5930         if (owner == ri->objectid)
5931                 goto normal;
5932
5933         tback = find_tree_backref(rec, 0, owner);
5934         if (!tback)
5935                 goto full_backref;
5936 normal:
5937         *flags = 0;
5938         if (rec->flag_block_full_backref != FLAG_UNSET &&
5939             rec->flag_block_full_backref != 0)
5940                 rec->bad_full_backref = 1;
5941         return 0;
5942 full_backref:
5943         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5944         if (rec->flag_block_full_backref != FLAG_UNSET &&
5945             rec->flag_block_full_backref != 1)
5946                 rec->bad_full_backref = 1;
5947         return 0;
5948 }
5949
5950 static int run_next_block(struct btrfs_root *root,
5951                           struct block_info *bits,
5952                           int bits_nr,
5953                           u64 *last,
5954                           struct cache_tree *pending,
5955                           struct cache_tree *seen,
5956                           struct cache_tree *reada,
5957                           struct cache_tree *nodes,
5958                           struct cache_tree *extent_cache,
5959                           struct cache_tree *chunk_cache,
5960                           struct rb_root *dev_cache,
5961                           struct block_group_tree *block_group_cache,
5962                           struct device_extent_tree *dev_extent_cache,
5963                           struct root_item_record *ri)
5964 {
5965         struct extent_buffer *buf;
5966         struct extent_record *rec = NULL;
5967         u64 bytenr;
5968         u32 size;
5969         u64 parent;
5970         u64 owner;
5971         u64 flags;
5972         u64 ptr;
5973         u64 gen = 0;
5974         int ret = 0;
5975         int i;
5976         int nritems;
5977         struct btrfs_key key;
5978         struct cache_extent *cache;
5979         int reada_bits;
5980
5981         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
5982                                     bits_nr, &reada_bits);
5983         if (nritems == 0)
5984                 return 1;
5985
5986         if (!reada_bits) {
5987                 for(i = 0; i < nritems; i++) {
5988                         ret = add_cache_extent(reada, bits[i].start,
5989                                                bits[i].size);
5990                         if (ret == -EEXIST)
5991                                 continue;
5992
5993                         /* fixme, get the parent transid */
5994                         readahead_tree_block(root, bits[i].start,
5995                                              bits[i].size, 0);
5996                 }
5997         }
5998         *last = bits[0].start;
5999         bytenr = bits[0].start;
6000         size = bits[0].size;
6001
6002         cache = lookup_cache_extent(pending, bytenr, size);
6003         if (cache) {
6004                 remove_cache_extent(pending, cache);
6005                 free(cache);
6006         }
6007         cache = lookup_cache_extent(reada, bytenr, size);
6008         if (cache) {
6009                 remove_cache_extent(reada, cache);
6010                 free(cache);
6011         }
6012         cache = lookup_cache_extent(nodes, bytenr, size);
6013         if (cache) {
6014                 remove_cache_extent(nodes, cache);
6015                 free(cache);
6016         }
6017         cache = lookup_cache_extent(extent_cache, bytenr, size);
6018         if (cache) {
6019                 rec = container_of(cache, struct extent_record, cache);
6020                 gen = rec->parent_generation;
6021         }
6022
6023         /* fixme, get the real parent transid */
6024         buf = read_tree_block(root, bytenr, size, gen);
6025         if (!extent_buffer_uptodate(buf)) {
6026                 record_bad_block_io(root->fs_info,
6027                                     extent_cache, bytenr, size);
6028                 goto out;
6029         }
6030
6031         nritems = btrfs_header_nritems(buf);
6032
6033         flags = 0;
6034         if (!init_extent_tree) {
6035                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6036                                        btrfs_header_level(buf), 1, NULL,
6037                                        &flags);
6038                 if (ret < 0) {
6039                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6040                         if (ret < 0) {
6041                                 fprintf(stderr, "Couldn't calc extent flags\n");
6042                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6043                         }
6044                 }
6045         } else {
6046                 flags = 0;
6047                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6048                 if (ret < 0) {
6049                         fprintf(stderr, "Couldn't calc extent flags\n");
6050                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6051                 }
6052         }
6053
6054         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6055                 if (ri != NULL &&
6056                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6057                     ri->objectid == btrfs_header_owner(buf)) {
6058                         /*
6059                          * Ok we got to this block from it's original owner and
6060                          * we have FULL_BACKREF set.  Relocation can leave
6061                          * converted blocks over so this is altogether possible,
6062                          * however it's not possible if the generation > the
6063                          * last snapshot, so check for this case.
6064                          */
6065                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6066                             btrfs_header_generation(buf) > ri->last_snapshot) {
6067                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6068                                 rec->bad_full_backref = 1;
6069                         }
6070                 }
6071         } else {
6072                 if (ri != NULL &&
6073                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6074                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6075                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6076                         rec->bad_full_backref = 1;
6077                 }
6078         }
6079
6080         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6081                 rec->flag_block_full_backref = 1;
6082                 parent = bytenr;
6083                 owner = 0;
6084         } else {
6085                 rec->flag_block_full_backref = 0;
6086                 parent = 0;
6087                 owner = btrfs_header_owner(buf);
6088         }
6089
6090         ret = check_block(root, extent_cache, buf, flags);
6091         if (ret)
6092                 goto out;
6093
6094         if (btrfs_is_leaf(buf)) {
6095                 btree_space_waste += btrfs_leaf_free_space(root, buf);
6096                 for (i = 0; i < nritems; i++) {
6097                         struct btrfs_file_extent_item *fi;
6098                         btrfs_item_key_to_cpu(buf, &key, i);
6099                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6100                                 process_extent_item(root, extent_cache, buf,
6101                                                     i);
6102                                 continue;
6103                         }
6104                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6105                                 process_extent_item(root, extent_cache, buf,
6106                                                     i);
6107                                 continue;
6108                         }
6109                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6110                                 total_csum_bytes +=
6111                                         btrfs_item_size_nr(buf, i);
6112                                 continue;
6113                         }
6114                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6115                                 process_chunk_item(chunk_cache, &key, buf, i);
6116                                 continue;
6117                         }
6118                         if (key.type == BTRFS_DEV_ITEM_KEY) {
6119                                 process_device_item(dev_cache, &key, buf, i);
6120                                 continue;
6121                         }
6122                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6123                                 process_block_group_item(block_group_cache,
6124                                         &key, buf, i);
6125                                 continue;
6126                         }
6127                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
6128                                 process_device_extent_item(dev_extent_cache,
6129                                         &key, buf, i);
6130                                 continue;
6131
6132                         }
6133                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6134 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6135                                 process_extent_ref_v0(extent_cache, buf, i);
6136 #else
6137                                 BUG();
6138 #endif
6139                                 continue;
6140                         }
6141
6142                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6143                                 add_tree_backref(extent_cache, key.objectid, 0,
6144                                                  key.offset, 0);
6145                                 continue;
6146                         }
6147                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6148                                 add_tree_backref(extent_cache, key.objectid,
6149                                                  key.offset, 0, 0);
6150                                 continue;
6151                         }
6152                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6153                                 struct btrfs_extent_data_ref *ref;
6154                                 ref = btrfs_item_ptr(buf, i,
6155                                                 struct btrfs_extent_data_ref);
6156                                 add_data_backref(extent_cache,
6157                                         key.objectid, 0,
6158                                         btrfs_extent_data_ref_root(buf, ref),
6159                                         btrfs_extent_data_ref_objectid(buf,
6160                                                                        ref),
6161                                         btrfs_extent_data_ref_offset(buf, ref),
6162                                         btrfs_extent_data_ref_count(buf, ref),
6163                                         0, root->sectorsize);
6164                                 continue;
6165                         }
6166                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6167                                 struct btrfs_shared_data_ref *ref;
6168                                 ref = btrfs_item_ptr(buf, i,
6169                                                 struct btrfs_shared_data_ref);
6170                                 add_data_backref(extent_cache,
6171                                         key.objectid, key.offset, 0, 0, 0,
6172                                         btrfs_shared_data_ref_count(buf, ref),
6173                                         0, root->sectorsize);
6174                                 continue;
6175                         }
6176                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6177                                 struct bad_item *bad;
6178
6179                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6180                                         continue;
6181                                 if (!owner)
6182                                         continue;
6183                                 bad = malloc(sizeof(struct bad_item));
6184                                 if (!bad)
6185                                         continue;
6186                                 INIT_LIST_HEAD(&bad->list);
6187                                 memcpy(&bad->key, &key,
6188                                        sizeof(struct btrfs_key));
6189                                 bad->root_id = owner;
6190                                 list_add_tail(&bad->list, &delete_items);
6191                                 continue;
6192                         }
6193                         if (key.type != BTRFS_EXTENT_DATA_KEY)
6194                                 continue;
6195                         fi = btrfs_item_ptr(buf, i,
6196                                             struct btrfs_file_extent_item);
6197                         if (btrfs_file_extent_type(buf, fi) ==
6198                             BTRFS_FILE_EXTENT_INLINE)
6199                                 continue;
6200                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6201                                 continue;
6202
6203                         data_bytes_allocated +=
6204                                 btrfs_file_extent_disk_num_bytes(buf, fi);
6205                         if (data_bytes_allocated < root->sectorsize) {
6206                                 abort();
6207                         }
6208                         data_bytes_referenced +=
6209                                 btrfs_file_extent_num_bytes(buf, fi);
6210                         add_data_backref(extent_cache,
6211                                 btrfs_file_extent_disk_bytenr(buf, fi),
6212                                 parent, owner, key.objectid, key.offset -
6213                                 btrfs_file_extent_offset(buf, fi), 1, 1,
6214                                 btrfs_file_extent_disk_num_bytes(buf, fi));
6215                 }
6216         } else {
6217                 int level;
6218                 struct btrfs_key first_key;
6219
6220                 first_key.objectid = 0;
6221
6222                 if (nritems > 0)
6223                         btrfs_item_key_to_cpu(buf, &first_key, 0);
6224                 level = btrfs_header_level(buf);
6225                 for (i = 0; i < nritems; i++) {
6226                         struct extent_record tmpl;
6227
6228                         ptr = btrfs_node_blockptr(buf, i);
6229                         size = root->nodesize;
6230                         btrfs_node_key_to_cpu(buf, &key, i);
6231                         if (ri != NULL) {
6232                                 if ((level == ri->drop_level)
6233                                     && is_dropped_key(&key, &ri->drop_key)) {
6234                                         continue;
6235                                 }
6236                         }
6237
6238                         memset(&tmpl, 0, sizeof(tmpl));
6239                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6240                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6241                         tmpl.start = ptr;
6242                         tmpl.nr = size;
6243                         tmpl.refs = 1;
6244                         tmpl.metadata = 1;
6245                         tmpl.max_size = size;
6246                         ret = add_extent_rec(extent_cache, &tmpl);
6247                         BUG_ON(ret);
6248
6249                         add_tree_backref(extent_cache, ptr, parent, owner, 1);
6250
6251                         if (level > 1) {
6252                                 add_pending(nodes, seen, ptr, size);
6253                         } else {
6254                                 add_pending(pending, seen, ptr, size);
6255                         }
6256                 }
6257                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6258                                       nritems) * sizeof(struct btrfs_key_ptr);
6259         }
6260         total_btree_bytes += buf->len;
6261         if (fs_root_objectid(btrfs_header_owner(buf)))
6262                 total_fs_tree_bytes += buf->len;
6263         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6264                 total_extent_tree_bytes += buf->len;
6265         if (!found_old_backref &&
6266             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6267             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6268             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6269                 found_old_backref = 1;
6270 out:
6271         free_extent_buffer(buf);
6272         return ret;
6273 }
6274
6275 static int add_root_to_pending(struct extent_buffer *buf,
6276                                struct cache_tree *extent_cache,
6277                                struct cache_tree *pending,
6278                                struct cache_tree *seen,
6279                                struct cache_tree *nodes,
6280                                u64 objectid)
6281 {
6282         struct extent_record tmpl;
6283
6284         if (btrfs_header_level(buf) > 0)
6285                 add_pending(nodes, seen, buf->start, buf->len);
6286         else
6287                 add_pending(pending, seen, buf->start, buf->len);
6288
6289         memset(&tmpl, 0, sizeof(tmpl));
6290         tmpl.start = buf->start;
6291         tmpl.nr = buf->len;
6292         tmpl.is_root = 1;
6293         tmpl.refs = 1;
6294         tmpl.metadata = 1;
6295         tmpl.max_size = buf->len;
6296         add_extent_rec(extent_cache, &tmpl);
6297
6298         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6299             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6300                 add_tree_backref(extent_cache, buf->start, buf->start,
6301                                  0, 1);
6302         else
6303                 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6304         return 0;
6305 }
6306
6307 /* as we fix the tree, we might be deleting blocks that
6308  * we're tracking for repair.  This hook makes sure we
6309  * remove any backrefs for blocks as we are fixing them.
6310  */
6311 static int free_extent_hook(struct btrfs_trans_handle *trans,
6312                             struct btrfs_root *root,
6313                             u64 bytenr, u64 num_bytes, u64 parent,
6314                             u64 root_objectid, u64 owner, u64 offset,
6315                             int refs_to_drop)
6316 {
6317         struct extent_record *rec;
6318         struct cache_extent *cache;
6319         int is_data;
6320         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6321
6322         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6323         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6324         if (!cache)
6325                 return 0;
6326
6327         rec = container_of(cache, struct extent_record, cache);
6328         if (is_data) {
6329                 struct data_backref *back;
6330                 back = find_data_backref(rec, parent, root_objectid, owner,
6331                                          offset, 1, bytenr, num_bytes);
6332                 if (!back)
6333                         goto out;
6334                 if (back->node.found_ref) {
6335                         back->found_ref -= refs_to_drop;
6336                         if (rec->refs)
6337                                 rec->refs -= refs_to_drop;
6338                 }
6339                 if (back->node.found_extent_tree) {
6340                         back->num_refs -= refs_to_drop;
6341                         if (rec->extent_item_refs)
6342                                 rec->extent_item_refs -= refs_to_drop;
6343                 }
6344                 if (back->found_ref == 0)
6345                         back->node.found_ref = 0;
6346                 if (back->num_refs == 0)
6347                         back->node.found_extent_tree = 0;
6348
6349                 if (!back->node.found_extent_tree && back->node.found_ref) {
6350                         list_del(&back->node.list);
6351                         free(back);
6352                 }
6353         } else {
6354                 struct tree_backref *back;
6355                 back = find_tree_backref(rec, parent, root_objectid);
6356                 if (!back)
6357                         goto out;
6358                 if (back->node.found_ref) {
6359                         if (rec->refs)
6360                                 rec->refs--;
6361                         back->node.found_ref = 0;
6362                 }
6363                 if (back->node.found_extent_tree) {
6364                         if (rec->extent_item_refs)
6365                                 rec->extent_item_refs--;
6366                         back->node.found_extent_tree = 0;
6367                 }
6368                 if (!back->node.found_extent_tree && back->node.found_ref) {
6369                         list_del(&back->node.list);
6370                         free(back);
6371                 }
6372         }
6373         maybe_free_extent_rec(extent_cache, rec);
6374 out:
6375         return 0;
6376 }
6377
6378 static int delete_extent_records(struct btrfs_trans_handle *trans,
6379                                  struct btrfs_root *root,
6380                                  struct btrfs_path *path,
6381                                  u64 bytenr, u64 new_len)
6382 {
6383         struct btrfs_key key;
6384         struct btrfs_key found_key;
6385         struct extent_buffer *leaf;
6386         int ret;
6387         int slot;
6388
6389
6390         key.objectid = bytenr;
6391         key.type = (u8)-1;
6392         key.offset = (u64)-1;
6393
6394         while(1) {
6395                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6396                                         &key, path, 0, 1);
6397                 if (ret < 0)
6398                         break;
6399
6400                 if (ret > 0) {
6401                         ret = 0;
6402                         if (path->slots[0] == 0)
6403                                 break;
6404                         path->slots[0]--;
6405                 }
6406                 ret = 0;
6407
6408                 leaf = path->nodes[0];
6409                 slot = path->slots[0];
6410
6411                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6412                 if (found_key.objectid != bytenr)
6413                         break;
6414
6415                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6416                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
6417                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6418                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6419                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6420                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6421                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6422                         btrfs_release_path(path);
6423                         if (found_key.type == 0) {
6424                                 if (found_key.offset == 0)
6425                                         break;
6426                                 key.offset = found_key.offset - 1;
6427                                 key.type = found_key.type;
6428                         }
6429                         key.type = found_key.type - 1;
6430                         key.offset = (u64)-1;
6431                         continue;
6432                 }
6433
6434                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6435                         found_key.objectid, found_key.type, found_key.offset);
6436
6437                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6438                 if (ret)
6439                         break;
6440                 btrfs_release_path(path);
6441
6442                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6443                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
6444                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6445                                 found_key.offset : root->nodesize;
6446
6447                         ret = btrfs_update_block_group(trans, root, bytenr,
6448                                                        bytes, 0, 0);
6449                         if (ret)
6450                                 break;
6451                 }
6452         }
6453
6454         btrfs_release_path(path);
6455         return ret;
6456 }
6457
6458 /*
6459  * for a single backref, this will allocate a new extent
6460  * and add the backref to it.
6461  */
6462 static int record_extent(struct btrfs_trans_handle *trans,
6463                          struct btrfs_fs_info *info,
6464                          struct btrfs_path *path,
6465                          struct extent_record *rec,
6466                          struct extent_backref *back,
6467                          int allocated, u64 flags)
6468 {
6469         int ret;
6470         struct btrfs_root *extent_root = info->extent_root;
6471         struct extent_buffer *leaf;
6472         struct btrfs_key ins_key;
6473         struct btrfs_extent_item *ei;
6474         struct tree_backref *tback;
6475         struct data_backref *dback;
6476         struct btrfs_tree_block_info *bi;
6477
6478         if (!back->is_data)
6479                 rec->max_size = max_t(u64, rec->max_size,
6480                                     info->extent_root->nodesize);
6481
6482         if (!allocated) {
6483                 u32 item_size = sizeof(*ei);
6484
6485                 if (!back->is_data)
6486                         item_size += sizeof(*bi);
6487
6488                 ins_key.objectid = rec->start;
6489                 ins_key.offset = rec->max_size;
6490                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6491
6492                 ret = btrfs_insert_empty_item(trans, extent_root, path,
6493                                         &ins_key, item_size);
6494                 if (ret)
6495                         goto fail;
6496
6497                 leaf = path->nodes[0];
6498                 ei = btrfs_item_ptr(leaf, path->slots[0],
6499                                     struct btrfs_extent_item);
6500
6501                 btrfs_set_extent_refs(leaf, ei, 0);
6502                 btrfs_set_extent_generation(leaf, ei, rec->generation);
6503
6504                 if (back->is_data) {
6505                         btrfs_set_extent_flags(leaf, ei,
6506                                                BTRFS_EXTENT_FLAG_DATA);
6507                 } else {
6508                         struct btrfs_disk_key copy_key;;
6509
6510                         tback = (struct tree_backref *)back;
6511                         bi = (struct btrfs_tree_block_info *)(ei + 1);
6512                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
6513                                              sizeof(*bi));
6514
6515                         btrfs_set_disk_key_objectid(&copy_key,
6516                                                     rec->info_objectid);
6517                         btrfs_set_disk_key_type(&copy_key, 0);
6518                         btrfs_set_disk_key_offset(&copy_key, 0);
6519
6520                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6521                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
6522
6523                         btrfs_set_extent_flags(leaf, ei,
6524                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6525                 }
6526
6527                 btrfs_mark_buffer_dirty(leaf);
6528                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6529                                                rec->max_size, 1, 0);
6530                 if (ret)
6531                         goto fail;
6532                 btrfs_release_path(path);
6533         }
6534
6535         if (back->is_data) {
6536                 u64 parent;
6537                 int i;
6538
6539                 dback = (struct data_backref *)back;
6540                 if (back->full_backref)
6541                         parent = dback->parent;
6542                 else
6543                         parent = 0;
6544
6545                 for (i = 0; i < dback->found_ref; i++) {
6546                         /* if parent != 0, we're doing a full backref
6547                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6548                          * just makes the backref allocator create a data
6549                          * backref
6550                          */
6551                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
6552                                                    rec->start, rec->max_size,
6553                                                    parent,
6554                                                    dback->root,
6555                                                    parent ?
6556                                                    BTRFS_FIRST_FREE_OBJECTID :
6557                                                    dback->owner,
6558                                                    dback->offset);
6559                         if (ret)
6560                                 break;
6561                 }
6562                 fprintf(stderr, "adding new data backref"
6563                                 " on %llu %s %llu owner %llu"
6564                                 " offset %llu found %d\n",
6565                                 (unsigned long long)rec->start,
6566                                 back->full_backref ?
6567                                 "parent" : "root",
6568                                 back->full_backref ?
6569                                 (unsigned long long)parent :
6570                                 (unsigned long long)dback->root,
6571                                 (unsigned long long)dback->owner,
6572                                 (unsigned long long)dback->offset,
6573                                 dback->found_ref);
6574         } else {
6575                 u64 parent;
6576
6577                 tback = (struct tree_backref *)back;
6578                 if (back->full_backref)
6579                         parent = tback->parent;
6580                 else
6581                         parent = 0;
6582
6583                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6584                                            rec->start, rec->max_size,
6585                                            parent, tback->root, 0, 0);
6586                 fprintf(stderr, "adding new tree backref on "
6587                         "start %llu len %llu parent %llu root %llu\n",
6588                         rec->start, rec->max_size, parent, tback->root);
6589         }
6590 fail:
6591         btrfs_release_path(path);
6592         return ret;
6593 }
6594
6595 static struct extent_entry *find_entry(struct list_head *entries,
6596                                        u64 bytenr, u64 bytes)
6597 {
6598         struct extent_entry *entry = NULL;
6599
6600         list_for_each_entry(entry, entries, list) {
6601                 if (entry->bytenr == bytenr && entry->bytes == bytes)
6602                         return entry;
6603         }
6604
6605         return NULL;
6606 }
6607
6608 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6609 {
6610         struct extent_entry *entry, *best = NULL, *prev = NULL;
6611
6612         list_for_each_entry(entry, entries, list) {
6613                 if (!prev) {
6614                         prev = entry;
6615                         continue;
6616                 }
6617
6618                 /*
6619                  * If there are as many broken entries as entries then we know
6620                  * not to trust this particular entry.
6621                  */
6622                 if (entry->broken == entry->count)
6623                         continue;
6624
6625                 /*
6626                  * If our current entry == best then we can't be sure our best
6627                  * is really the best, so we need to keep searching.
6628                  */
6629                 if (best && best->count == entry->count) {
6630                         prev = entry;
6631                         best = NULL;
6632                         continue;
6633                 }
6634
6635                 /* Prev == entry, not good enough, have to keep searching */
6636                 if (!prev->broken && prev->count == entry->count)
6637                         continue;
6638
6639                 if (!best)
6640                         best = (prev->count > entry->count) ? prev : entry;
6641                 else if (best->count < entry->count)
6642                         best = entry;
6643                 prev = entry;
6644         }
6645
6646         return best;
6647 }
6648
6649 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6650                       struct data_backref *dback, struct extent_entry *entry)
6651 {
6652         struct btrfs_trans_handle *trans;
6653         struct btrfs_root *root;
6654         struct btrfs_file_extent_item *fi;
6655         struct extent_buffer *leaf;
6656         struct btrfs_key key;
6657         u64 bytenr, bytes;
6658         int ret, err;
6659
6660         key.objectid = dback->root;
6661         key.type = BTRFS_ROOT_ITEM_KEY;
6662         key.offset = (u64)-1;
6663         root = btrfs_read_fs_root(info, &key);
6664         if (IS_ERR(root)) {
6665                 fprintf(stderr, "Couldn't find root for our ref\n");
6666                 return -EINVAL;
6667         }
6668
6669         /*
6670          * The backref points to the original offset of the extent if it was
6671          * split, so we need to search down to the offset we have and then walk
6672          * forward until we find the backref we're looking for.
6673          */
6674         key.objectid = dback->owner;
6675         key.type = BTRFS_EXTENT_DATA_KEY;
6676         key.offset = dback->offset;
6677         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6678         if (ret < 0) {
6679                 fprintf(stderr, "Error looking up ref %d\n", ret);
6680                 return ret;
6681         }
6682
6683         while (1) {
6684                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6685                         ret = btrfs_next_leaf(root, path);
6686                         if (ret) {
6687                                 fprintf(stderr, "Couldn't find our ref, next\n");
6688                                 return -EINVAL;
6689                         }
6690                 }
6691                 leaf = path->nodes[0];
6692                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6693                 if (key.objectid != dback->owner ||
6694                     key.type != BTRFS_EXTENT_DATA_KEY) {
6695                         fprintf(stderr, "Couldn't find our ref, search\n");
6696                         return -EINVAL;
6697                 }
6698                 fi = btrfs_item_ptr(leaf, path->slots[0],
6699                                     struct btrfs_file_extent_item);
6700                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6701                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6702
6703                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6704                         break;
6705                 path->slots[0]++;
6706         }
6707
6708         btrfs_release_path(path);
6709
6710         trans = btrfs_start_transaction(root, 1);
6711         if (IS_ERR(trans))
6712                 return PTR_ERR(trans);
6713
6714         /*
6715          * Ok we have the key of the file extent we want to fix, now we can cow
6716          * down to the thing and fix it.
6717          */
6718         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6719         if (ret < 0) {
6720                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6721                         key.objectid, key.type, key.offset, ret);
6722                 goto out;
6723         }
6724         if (ret > 0) {
6725                 fprintf(stderr, "Well that's odd, we just found this key "
6726                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6727                         key.offset);
6728                 ret = -EINVAL;
6729                 goto out;
6730         }
6731         leaf = path->nodes[0];
6732         fi = btrfs_item_ptr(leaf, path->slots[0],
6733                             struct btrfs_file_extent_item);
6734
6735         if (btrfs_file_extent_compression(leaf, fi) &&
6736             dback->disk_bytenr != entry->bytenr) {
6737                 fprintf(stderr, "Ref doesn't match the record start and is "
6738                         "compressed, please take a btrfs-image of this file "
6739                         "system and send it to a btrfs developer so they can "
6740                         "complete this functionality for bytenr %Lu\n",
6741                         dback->disk_bytenr);
6742                 ret = -EINVAL;
6743                 goto out;
6744         }
6745
6746         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6747                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6748         } else if (dback->disk_bytenr > entry->bytenr) {
6749                 u64 off_diff, offset;
6750
6751                 off_diff = dback->disk_bytenr - entry->bytenr;
6752                 offset = btrfs_file_extent_offset(leaf, fi);
6753                 if (dback->disk_bytenr + offset +
6754                     btrfs_file_extent_num_bytes(leaf, fi) >
6755                     entry->bytenr + entry->bytes) {
6756                         fprintf(stderr, "Ref is past the entry end, please "
6757                                 "take a btrfs-image of this file system and "
6758                                 "send it to a btrfs developer, ref %Lu\n",
6759                                 dback->disk_bytenr);
6760                         ret = -EINVAL;
6761                         goto out;
6762                 }
6763                 offset += off_diff;
6764                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6765                 btrfs_set_file_extent_offset(leaf, fi, offset);
6766         } else if (dback->disk_bytenr < entry->bytenr) {
6767                 u64 offset;
6768
6769                 offset = btrfs_file_extent_offset(leaf, fi);
6770                 if (dback->disk_bytenr + offset < entry->bytenr) {
6771                         fprintf(stderr, "Ref is before the entry start, please"
6772                                 " take a btrfs-image of this file system and "
6773                                 "send it to a btrfs developer, ref %Lu\n",
6774                                 dback->disk_bytenr);
6775                         ret = -EINVAL;
6776                         goto out;
6777                 }
6778
6779                 offset += dback->disk_bytenr;
6780                 offset -= entry->bytenr;
6781                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6782                 btrfs_set_file_extent_offset(leaf, fi, offset);
6783         }
6784
6785         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6786
6787         /*
6788          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6789          * only do this if we aren't using compression, otherwise it's a
6790          * trickier case.
6791          */
6792         if (!btrfs_file_extent_compression(leaf, fi))
6793                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6794         else
6795                 printf("ram bytes may be wrong?\n");
6796         btrfs_mark_buffer_dirty(leaf);
6797 out:
6798         err = btrfs_commit_transaction(trans, root);
6799         btrfs_release_path(path);
6800         return ret ? ret : err;
6801 }
6802
6803 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6804                            struct extent_record *rec)
6805 {
6806         struct extent_backref *back;
6807         struct data_backref *dback;
6808         struct extent_entry *entry, *best = NULL;
6809         LIST_HEAD(entries);
6810         int nr_entries = 0;
6811         int broken_entries = 0;
6812         int ret = 0;
6813         short mismatch = 0;
6814
6815         /*
6816          * Metadata is easy and the backrefs should always agree on bytenr and
6817          * size, if not we've got bigger issues.
6818          */
6819         if (rec->metadata)
6820                 return 0;
6821
6822         list_for_each_entry(back, &rec->backrefs, list) {
6823                 if (back->full_backref || !back->is_data)
6824                         continue;
6825
6826                 dback = (struct data_backref *)back;
6827
6828                 /*
6829                  * We only pay attention to backrefs that we found a real
6830                  * backref for.
6831                  */
6832                 if (dback->found_ref == 0)
6833                         continue;
6834
6835                 /*
6836                  * For now we only catch when the bytes don't match, not the
6837                  * bytenr.  We can easily do this at the same time, but I want
6838                  * to have a fs image to test on before we just add repair
6839                  * functionality willy-nilly so we know we won't screw up the
6840                  * repair.
6841                  */
6842
6843                 entry = find_entry(&entries, dback->disk_bytenr,
6844                                    dback->bytes);
6845                 if (!entry) {
6846                         entry = malloc(sizeof(struct extent_entry));
6847                         if (!entry) {
6848                                 ret = -ENOMEM;
6849                                 goto out;
6850                         }
6851                         memset(entry, 0, sizeof(*entry));
6852                         entry->bytenr = dback->disk_bytenr;
6853                         entry->bytes = dback->bytes;
6854                         list_add_tail(&entry->list, &entries);
6855                         nr_entries++;
6856                 }
6857
6858                 /*
6859                  * If we only have on entry we may think the entries agree when
6860                  * in reality they don't so we have to do some extra checking.
6861                  */
6862                 if (dback->disk_bytenr != rec->start ||
6863                     dback->bytes != rec->nr || back->broken)
6864                         mismatch = 1;
6865
6866                 if (back->broken) {
6867                         entry->broken++;
6868                         broken_entries++;
6869                 }
6870
6871                 entry->count++;
6872         }
6873
6874         /* Yay all the backrefs agree, carry on good sir */
6875         if (nr_entries <= 1 && !mismatch)
6876                 goto out;
6877
6878         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6879                 "%Lu\n", rec->start);
6880
6881         /*
6882          * First we want to see if the backrefs can agree amongst themselves who
6883          * is right, so figure out which one of the entries has the highest
6884          * count.
6885          */
6886         best = find_most_right_entry(&entries);
6887
6888         /*
6889          * Ok so we may have an even split between what the backrefs think, so
6890          * this is where we use the extent ref to see what it thinks.
6891          */
6892         if (!best) {
6893                 entry = find_entry(&entries, rec->start, rec->nr);
6894                 if (!entry && (!broken_entries || !rec->found_rec)) {
6895                         fprintf(stderr, "Backrefs don't agree with each other "
6896                                 "and extent record doesn't agree with anybody,"
6897                                 " so we can't fix bytenr %Lu bytes %Lu\n",
6898                                 rec->start, rec->nr);
6899                         ret = -EINVAL;
6900                         goto out;
6901                 } else if (!entry) {
6902                         /*
6903                          * Ok our backrefs were broken, we'll assume this is the
6904                          * correct value and add an entry for this range.
6905                          */
6906                         entry = malloc(sizeof(struct extent_entry));
6907                         if (!entry) {
6908                                 ret = -ENOMEM;
6909                                 goto out;
6910                         }
6911                         memset(entry, 0, sizeof(*entry));
6912                         entry->bytenr = rec->start;
6913                         entry->bytes = rec->nr;
6914                         list_add_tail(&entry->list, &entries);
6915                         nr_entries++;
6916                 }
6917                 entry->count++;
6918                 best = find_most_right_entry(&entries);
6919                 if (!best) {
6920                         fprintf(stderr, "Backrefs and extent record evenly "
6921                                 "split on who is right, this is going to "
6922                                 "require user input to fix bytenr %Lu bytes "
6923                                 "%Lu\n", rec->start, rec->nr);
6924                         ret = -EINVAL;
6925                         goto out;
6926                 }
6927         }
6928
6929         /*
6930          * I don't think this can happen currently as we'll abort() if we catch
6931          * this case higher up, but in case somebody removes that we still can't
6932          * deal with it properly here yet, so just bail out of that's the case.
6933          */
6934         if (best->bytenr != rec->start) {
6935                 fprintf(stderr, "Extent start and backref starts don't match, "
6936                         "please use btrfs-image on this file system and send "
6937                         "it to a btrfs developer so they can make fsck fix "
6938                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
6939                         rec->start, rec->nr);
6940                 ret = -EINVAL;
6941                 goto out;
6942         }
6943
6944         /*
6945          * Ok great we all agreed on an extent record, let's go find the real
6946          * references and fix up the ones that don't match.
6947          */
6948         list_for_each_entry(back, &rec->backrefs, list) {
6949                 if (back->full_backref || !back->is_data)
6950                         continue;
6951
6952                 dback = (struct data_backref *)back;
6953
6954                 /*
6955                  * Still ignoring backrefs that don't have a real ref attached
6956                  * to them.
6957                  */
6958                 if (dback->found_ref == 0)
6959                         continue;
6960
6961                 if (dback->bytes == best->bytes &&
6962                     dback->disk_bytenr == best->bytenr)
6963                         continue;
6964
6965                 ret = repair_ref(info, path, dback, best);
6966                 if (ret)
6967                         goto out;
6968         }
6969
6970         /*
6971          * Ok we messed with the actual refs, which means we need to drop our
6972          * entire cache and go back and rescan.  I know this is a huge pain and
6973          * adds a lot of extra work, but it's the only way to be safe.  Once all
6974          * the backrefs agree we may not need to do anything to the extent
6975          * record itself.
6976          */
6977         ret = -EAGAIN;
6978 out:
6979         while (!list_empty(&entries)) {
6980                 entry = list_entry(entries.next, struct extent_entry, list);
6981                 list_del_init(&entry->list);
6982                 free(entry);
6983         }
6984         return ret;
6985 }
6986
6987 static int process_duplicates(struct btrfs_root *root,
6988                               struct cache_tree *extent_cache,
6989                               struct extent_record *rec)
6990 {
6991         struct extent_record *good, *tmp;
6992         struct cache_extent *cache;
6993         int ret;
6994
6995         /*
6996          * If we found a extent record for this extent then return, or if we
6997          * have more than one duplicate we are likely going to need to delete
6998          * something.
6999          */
7000         if (rec->found_rec || rec->num_duplicates > 1)
7001                 return 0;
7002
7003         /* Shouldn't happen but just in case */
7004         BUG_ON(!rec->num_duplicates);
7005
7006         /*
7007          * So this happens if we end up with a backref that doesn't match the
7008          * actual extent entry.  So either the backref is bad or the extent
7009          * entry is bad.  Either way we want to have the extent_record actually
7010          * reflect what we found in the extent_tree, so we need to take the
7011          * duplicate out and use that as the extent_record since the only way we
7012          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7013          */
7014         remove_cache_extent(extent_cache, &rec->cache);
7015
7016         good = list_entry(rec->dups.next, struct extent_record, list);
7017         list_del_init(&good->list);
7018         INIT_LIST_HEAD(&good->backrefs);
7019         INIT_LIST_HEAD(&good->dups);
7020         good->cache.start = good->start;
7021         good->cache.size = good->nr;
7022         good->content_checked = 0;
7023         good->owner_ref_checked = 0;
7024         good->num_duplicates = 0;
7025         good->refs = rec->refs;
7026         list_splice_init(&rec->backrefs, &good->backrefs);
7027         while (1) {
7028                 cache = lookup_cache_extent(extent_cache, good->start,
7029                                             good->nr);
7030                 if (!cache)
7031                         break;
7032                 tmp = container_of(cache, struct extent_record, cache);
7033
7034                 /*
7035                  * If we find another overlapping extent and it's found_rec is
7036                  * set then it's a duplicate and we need to try and delete
7037                  * something.
7038                  */
7039                 if (tmp->found_rec || tmp->num_duplicates > 0) {
7040                         if (list_empty(&good->list))
7041                                 list_add_tail(&good->list,
7042                                               &duplicate_extents);
7043                         good->num_duplicates += tmp->num_duplicates + 1;
7044                         list_splice_init(&tmp->dups, &good->dups);
7045                         list_del_init(&tmp->list);
7046                         list_add_tail(&tmp->list, &good->dups);
7047                         remove_cache_extent(extent_cache, &tmp->cache);
7048                         continue;
7049                 }
7050
7051                 /*
7052                  * Ok we have another non extent item backed extent rec, so lets
7053                  * just add it to this extent and carry on like we did above.
7054                  */
7055                 good->refs += tmp->refs;
7056                 list_splice_init(&tmp->backrefs, &good->backrefs);
7057                 remove_cache_extent(extent_cache, &tmp->cache);
7058                 free(tmp);
7059         }
7060         ret = insert_cache_extent(extent_cache, &good->cache);
7061         BUG_ON(ret);
7062         free(rec);
7063         return good->num_duplicates ? 0 : 1;
7064 }
7065
7066 static int delete_duplicate_records(struct btrfs_root *root,
7067                                     struct extent_record *rec)
7068 {
7069         struct btrfs_trans_handle *trans;
7070         LIST_HEAD(delete_list);
7071         struct btrfs_path *path;
7072         struct extent_record *tmp, *good, *n;
7073         int nr_del = 0;
7074         int ret = 0, err;
7075         struct btrfs_key key;
7076
7077         path = btrfs_alloc_path();
7078         if (!path) {
7079                 ret = -ENOMEM;
7080                 goto out;
7081         }
7082
7083         good = rec;
7084         /* Find the record that covers all of the duplicates. */
7085         list_for_each_entry(tmp, &rec->dups, list) {
7086                 if (good->start < tmp->start)
7087                         continue;
7088                 if (good->nr > tmp->nr)
7089                         continue;
7090
7091                 if (tmp->start + tmp->nr < good->start + good->nr) {
7092                         fprintf(stderr, "Ok we have overlapping extents that "
7093                                 "aren't completely covered by each other, this "
7094                                 "is going to require more careful thought.  "
7095                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7096                                 tmp->start, tmp->nr, good->start, good->nr);
7097                         abort();
7098                 }
7099                 good = tmp;
7100         }
7101
7102         if (good != rec)
7103                 list_add_tail(&rec->list, &delete_list);
7104
7105         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7106                 if (tmp == good)
7107                         continue;
7108                 list_move_tail(&tmp->list, &delete_list);
7109         }
7110
7111         root = root->fs_info->extent_root;
7112         trans = btrfs_start_transaction(root, 1);
7113         if (IS_ERR(trans)) {
7114                 ret = PTR_ERR(trans);
7115                 goto out;
7116         }
7117
7118         list_for_each_entry(tmp, &delete_list, list) {
7119                 if (tmp->found_rec == 0)
7120                         continue;
7121                 key.objectid = tmp->start;
7122                 key.type = BTRFS_EXTENT_ITEM_KEY;
7123                 key.offset = tmp->nr;
7124
7125                 /* Shouldn't happen but just in case */
7126                 if (tmp->metadata) {
7127                         fprintf(stderr, "Well this shouldn't happen, extent "
7128                                 "record overlaps but is metadata? "
7129                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7130                         abort();
7131                 }
7132
7133                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7134                 if (ret) {
7135                         if (ret > 0)
7136                                 ret = -EINVAL;
7137                         break;
7138                 }
7139                 ret = btrfs_del_item(trans, root, path);
7140                 if (ret)
7141                         break;
7142                 btrfs_release_path(path);
7143                 nr_del++;
7144         }
7145         err = btrfs_commit_transaction(trans, root);
7146         if (err && !ret)
7147                 ret = err;
7148 out:
7149         while (!list_empty(&delete_list)) {
7150                 tmp = list_entry(delete_list.next, struct extent_record, list);
7151                 list_del_init(&tmp->list);
7152                 if (tmp == rec)
7153                         continue;
7154                 free(tmp);
7155         }
7156
7157         while (!list_empty(&rec->dups)) {
7158                 tmp = list_entry(rec->dups.next, struct extent_record, list);
7159                 list_del_init(&tmp->list);
7160                 free(tmp);
7161         }
7162
7163         btrfs_free_path(path);
7164
7165         if (!ret && !nr_del)
7166                 rec->num_duplicates = 0;
7167
7168         return ret ? ret : nr_del;
7169 }
7170
7171 static int find_possible_backrefs(struct btrfs_fs_info *info,
7172                                   struct btrfs_path *path,
7173                                   struct cache_tree *extent_cache,
7174                                   struct extent_record *rec)
7175 {
7176         struct btrfs_root *root;
7177         struct extent_backref *back;
7178         struct data_backref *dback;
7179         struct cache_extent *cache;
7180         struct btrfs_file_extent_item *fi;
7181         struct btrfs_key key;
7182         u64 bytenr, bytes;
7183         int ret;
7184
7185         list_for_each_entry(back, &rec->backrefs, list) {
7186                 /* Don't care about full backrefs (poor unloved backrefs) */
7187                 if (back->full_backref || !back->is_data)
7188                         continue;
7189
7190                 dback = (struct data_backref *)back;
7191
7192                 /* We found this one, we don't need to do a lookup */
7193                 if (dback->found_ref)
7194                         continue;
7195
7196                 key.objectid = dback->root;
7197                 key.type = BTRFS_ROOT_ITEM_KEY;
7198                 key.offset = (u64)-1;
7199
7200                 root = btrfs_read_fs_root(info, &key);
7201
7202                 /* No root, definitely a bad ref, skip */
7203                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7204                         continue;
7205                 /* Other err, exit */
7206                 if (IS_ERR(root))
7207                         return PTR_ERR(root);
7208
7209                 key.objectid = dback->owner;
7210                 key.type = BTRFS_EXTENT_DATA_KEY;
7211                 key.offset = dback->offset;
7212                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7213                 if (ret) {
7214                         btrfs_release_path(path);
7215                         if (ret < 0)
7216                                 return ret;
7217                         /* Didn't find it, we can carry on */
7218                         ret = 0;
7219                         continue;
7220                 }
7221
7222                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7223                                     struct btrfs_file_extent_item);
7224                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7225                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7226                 btrfs_release_path(path);
7227                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7228                 if (cache) {
7229                         struct extent_record *tmp;
7230                         tmp = container_of(cache, struct extent_record, cache);
7231
7232                         /*
7233                          * If we found an extent record for the bytenr for this
7234                          * particular backref then we can't add it to our
7235                          * current extent record.  We only want to add backrefs
7236                          * that don't have a corresponding extent item in the
7237                          * extent tree since they likely belong to this record
7238                          * and we need to fix it if it doesn't match bytenrs.
7239                          */
7240                         if  (tmp->found_rec)
7241                                 continue;
7242                 }
7243
7244                 dback->found_ref += 1;
7245                 dback->disk_bytenr = bytenr;
7246                 dback->bytes = bytes;
7247
7248                 /*
7249                  * Set this so the verify backref code knows not to trust the
7250                  * values in this backref.
7251                  */
7252                 back->broken = 1;
7253         }
7254
7255         return 0;
7256 }
7257
7258 /*
7259  * Record orphan data ref into corresponding root.
7260  *
7261  * Return 0 if the extent item contains data ref and recorded.
7262  * Return 1 if the extent item contains no useful data ref
7263  *   On that case, it may contains only shared_dataref or metadata backref
7264  *   or the file extent exists(this should be handled by the extent bytenr
7265  *   recovery routine)
7266  * Return <0 if something goes wrong.
7267  */
7268 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7269                                       struct extent_record *rec)
7270 {
7271         struct btrfs_key key;
7272         struct btrfs_root *dest_root;
7273         struct extent_backref *back;
7274         struct data_backref *dback;
7275         struct orphan_data_extent *orphan;
7276         struct btrfs_path *path;
7277         int recorded_data_ref = 0;
7278         int ret = 0;
7279
7280         if (rec->metadata)
7281                 return 1;
7282         path = btrfs_alloc_path();
7283         if (!path)
7284                 return -ENOMEM;
7285         list_for_each_entry(back, &rec->backrefs, list) {
7286                 if (back->full_backref || !back->is_data ||
7287                     !back->found_extent_tree)
7288                         continue;
7289                 dback = (struct data_backref *)back;
7290                 if (dback->found_ref)
7291                         continue;
7292                 key.objectid = dback->root;
7293                 key.type = BTRFS_ROOT_ITEM_KEY;
7294                 key.offset = (u64)-1;
7295
7296                 dest_root = btrfs_read_fs_root(fs_info, &key);
7297
7298                 /* For non-exist root we just skip it */
7299                 if (IS_ERR(dest_root) || !dest_root)
7300                         continue;
7301
7302                 key.objectid = dback->owner;
7303                 key.type = BTRFS_EXTENT_DATA_KEY;
7304                 key.offset = dback->offset;
7305
7306                 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7307                 /*
7308                  * For ret < 0, it's OK since the fs-tree may be corrupted,
7309                  * we need to record it for inode/file extent rebuild.
7310                  * For ret > 0, we record it only for file extent rebuild.
7311                  * For ret == 0, the file extent exists but only bytenr
7312                  * mismatch, let the original bytenr fix routine to handle,
7313                  * don't record it.
7314                  */
7315                 if (ret == 0)
7316                         continue;
7317                 ret = 0;
7318                 orphan = malloc(sizeof(*orphan));
7319                 if (!orphan) {
7320                         ret = -ENOMEM;
7321                         goto out;
7322                 }
7323                 INIT_LIST_HEAD(&orphan->list);
7324                 orphan->root = dback->root;
7325                 orphan->objectid = dback->owner;
7326                 orphan->offset = dback->offset;
7327                 orphan->disk_bytenr = rec->cache.start;
7328                 orphan->disk_len = rec->cache.size;
7329                 list_add(&dest_root->orphan_data_extents, &orphan->list);
7330                 recorded_data_ref = 1;
7331         }
7332 out:
7333         btrfs_free_path(path);
7334         if (!ret)
7335                 return !recorded_data_ref;
7336         else
7337                 return ret;
7338 }
7339
7340 /*
7341  * when an incorrect extent item is found, this will delete
7342  * all of the existing entries for it and recreate them
7343  * based on what the tree scan found.
7344  */
7345 static int fixup_extent_refs(struct btrfs_fs_info *info,
7346                              struct cache_tree *extent_cache,
7347                              struct extent_record *rec)
7348 {
7349         struct btrfs_trans_handle *trans = NULL;
7350         int ret;
7351         struct btrfs_path *path;
7352         struct list_head *cur = rec->backrefs.next;
7353         struct cache_extent *cache;
7354         struct extent_backref *back;
7355         int allocated = 0;
7356         u64 flags = 0;
7357
7358         if (rec->flag_block_full_backref)
7359                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7360
7361         path = btrfs_alloc_path();
7362         if (!path)
7363                 return -ENOMEM;
7364
7365         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7366                 /*
7367                  * Sometimes the backrefs themselves are so broken they don't
7368                  * get attached to any meaningful rec, so first go back and
7369                  * check any of our backrefs that we couldn't find and throw
7370                  * them into the list if we find the backref so that
7371                  * verify_backrefs can figure out what to do.
7372                  */
7373                 ret = find_possible_backrefs(info, path, extent_cache, rec);
7374                 if (ret < 0)
7375                         goto out;
7376         }
7377
7378         /* step one, make sure all of the backrefs agree */
7379         ret = verify_backrefs(info, path, rec);
7380         if (ret < 0)
7381                 goto out;
7382
7383         trans = btrfs_start_transaction(info->extent_root, 1);
7384         if (IS_ERR(trans)) {
7385                 ret = PTR_ERR(trans);
7386                 goto out;
7387         }
7388
7389         /* step two, delete all the existing records */
7390         ret = delete_extent_records(trans, info->extent_root, path,
7391                                     rec->start, rec->max_size);
7392
7393         if (ret < 0)
7394                 goto out;
7395
7396         /* was this block corrupt?  If so, don't add references to it */
7397         cache = lookup_cache_extent(info->corrupt_blocks,
7398                                     rec->start, rec->max_size);
7399         if (cache) {
7400                 ret = 0;
7401                 goto out;
7402         }
7403
7404         /* step three, recreate all the refs we did find */
7405         while(cur != &rec->backrefs) {
7406                 back = list_entry(cur, struct extent_backref, list);
7407                 cur = cur->next;
7408
7409                 /*
7410                  * if we didn't find any references, don't create a
7411                  * new extent record
7412                  */
7413                 if (!back->found_ref)
7414                         continue;
7415
7416                 rec->bad_full_backref = 0;
7417                 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7418                 allocated = 1;
7419
7420                 if (ret)
7421                         goto out;
7422         }
7423 out:
7424         if (trans) {
7425                 int err = btrfs_commit_transaction(trans, info->extent_root);
7426                 if (!ret)
7427                         ret = err;
7428         }
7429
7430         btrfs_free_path(path);
7431         return ret;
7432 }
7433
7434 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7435                               struct extent_record *rec)
7436 {
7437         struct btrfs_trans_handle *trans;
7438         struct btrfs_root *root = fs_info->extent_root;
7439         struct btrfs_path *path;
7440         struct btrfs_extent_item *ei;
7441         struct btrfs_key key;
7442         u64 flags;
7443         int ret = 0;
7444
7445         key.objectid = rec->start;
7446         if (rec->metadata) {
7447                 key.type = BTRFS_METADATA_ITEM_KEY;
7448                 key.offset = rec->info_level;
7449         } else {
7450                 key.type = BTRFS_EXTENT_ITEM_KEY;
7451                 key.offset = rec->max_size;
7452         }
7453
7454         path = btrfs_alloc_path();
7455         if (!path)
7456                 return -ENOMEM;
7457
7458         trans = btrfs_start_transaction(root, 0);
7459         if (IS_ERR(trans)) {
7460                 btrfs_free_path(path);
7461                 return PTR_ERR(trans);
7462         }
7463
7464         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7465         if (ret < 0) {
7466                 btrfs_free_path(path);
7467                 btrfs_commit_transaction(trans, root);
7468                 return ret;
7469         } else if (ret) {
7470                 fprintf(stderr, "Didn't find extent for %llu\n",
7471                         (unsigned long long)rec->start);
7472                 btrfs_free_path(path);
7473                 btrfs_commit_transaction(trans, root);
7474                 return -ENOENT;
7475         }
7476
7477         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7478                             struct btrfs_extent_item);
7479         flags = btrfs_extent_flags(path->nodes[0], ei);
7480         if (rec->flag_block_full_backref) {
7481                 fprintf(stderr, "setting full backref on %llu\n",
7482                         (unsigned long long)key.objectid);
7483                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7484         } else {
7485                 fprintf(stderr, "clearing full backref on %llu\n",
7486                         (unsigned long long)key.objectid);
7487                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7488         }
7489         btrfs_set_extent_flags(path->nodes[0], ei, flags);
7490         btrfs_mark_buffer_dirty(path->nodes[0]);
7491         btrfs_free_path(path);
7492         return btrfs_commit_transaction(trans, root);
7493 }
7494
7495 /* right now we only prune from the extent allocation tree */
7496 static int prune_one_block(struct btrfs_trans_handle *trans,
7497                            struct btrfs_fs_info *info,
7498                            struct btrfs_corrupt_block *corrupt)
7499 {
7500         int ret;
7501         struct btrfs_path path;
7502         struct extent_buffer *eb;
7503         u64 found;
7504         int slot;
7505         int nritems;
7506         int level = corrupt->level + 1;
7507
7508         btrfs_init_path(&path);
7509 again:
7510         /* we want to stop at the parent to our busted block */
7511         path.lowest_level = level;
7512
7513         ret = btrfs_search_slot(trans, info->extent_root,
7514                                 &corrupt->key, &path, -1, 1);
7515
7516         if (ret < 0)
7517                 goto out;
7518
7519         eb = path.nodes[level];
7520         if (!eb) {
7521                 ret = -ENOENT;
7522                 goto out;
7523         }
7524
7525         /*
7526          * hopefully the search gave us the block we want to prune,
7527          * lets try that first
7528          */
7529         slot = path.slots[level];
7530         found =  btrfs_node_blockptr(eb, slot);
7531         if (found == corrupt->cache.start)
7532                 goto del_ptr;
7533
7534         nritems = btrfs_header_nritems(eb);
7535
7536         /* the search failed, lets scan this node and hope we find it */
7537         for (slot = 0; slot < nritems; slot++) {
7538                 found =  btrfs_node_blockptr(eb, slot);
7539                 if (found == corrupt->cache.start)
7540                         goto del_ptr;
7541         }
7542         /*
7543          * we couldn't find the bad block.  TODO, search all the nodes for pointers
7544          * to this block
7545          */
7546         if (eb == info->extent_root->node) {
7547                 ret = -ENOENT;
7548                 goto out;
7549         } else {
7550                 level++;
7551                 btrfs_release_path(&path);
7552                 goto again;
7553         }
7554
7555 del_ptr:
7556         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7557         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7558
7559 out:
7560         btrfs_release_path(&path);
7561         return ret;
7562 }
7563
7564 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7565 {
7566         struct btrfs_trans_handle *trans = NULL;
7567         struct cache_extent *cache;
7568         struct btrfs_corrupt_block *corrupt;
7569
7570         while (1) {
7571                 cache = search_cache_extent(info->corrupt_blocks, 0);
7572                 if (!cache)
7573                         break;
7574                 if (!trans) {
7575                         trans = btrfs_start_transaction(info->extent_root, 1);
7576                         if (IS_ERR(trans))
7577                                 return PTR_ERR(trans);
7578                 }
7579                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7580                 prune_one_block(trans, info, corrupt);
7581                 remove_cache_extent(info->corrupt_blocks, cache);
7582         }
7583         if (trans)
7584                 return btrfs_commit_transaction(trans, info->extent_root);
7585         return 0;
7586 }
7587
7588 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7589 {
7590         struct btrfs_block_group_cache *cache;
7591         u64 start, end;
7592         int ret;
7593
7594         while (1) {
7595                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7596                                             &start, &end, EXTENT_DIRTY);
7597                 if (ret)
7598                         break;
7599                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7600                                    GFP_NOFS);
7601         }
7602
7603         start = 0;
7604         while (1) {
7605                 cache = btrfs_lookup_first_block_group(fs_info, start);
7606                 if (!cache)
7607                         break;
7608                 if (cache->cached)
7609                         cache->cached = 0;
7610                 start = cache->key.objectid + cache->key.offset;
7611         }
7612 }
7613
7614 static int check_extent_refs(struct btrfs_root *root,
7615                              struct cache_tree *extent_cache)
7616 {
7617         struct extent_record *rec;
7618         struct cache_extent *cache;
7619         int err = 0;
7620         int ret = 0;
7621         int fixed = 0;
7622         int had_dups = 0;
7623         int recorded = 0;
7624
7625         if (repair) {
7626                 /*
7627                  * if we're doing a repair, we have to make sure
7628                  * we don't allocate from the problem extents.
7629                  * In the worst case, this will be all the
7630                  * extents in the FS
7631                  */
7632                 cache = search_cache_extent(extent_cache, 0);
7633                 while(cache) {
7634                         rec = container_of(cache, struct extent_record, cache);
7635                         set_extent_dirty(root->fs_info->excluded_extents,
7636                                          rec->start,
7637                                          rec->start + rec->max_size - 1,
7638                                          GFP_NOFS);
7639                         cache = next_cache_extent(cache);
7640                 }
7641
7642                 /* pin down all the corrupted blocks too */
7643                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7644                 while(cache) {
7645                         set_extent_dirty(root->fs_info->excluded_extents,
7646                                          cache->start,
7647                                          cache->start + cache->size - 1,
7648                                          GFP_NOFS);
7649                         cache = next_cache_extent(cache);
7650                 }
7651                 prune_corrupt_blocks(root->fs_info);
7652                 reset_cached_block_groups(root->fs_info);
7653         }
7654
7655         reset_cached_block_groups(root->fs_info);
7656
7657         /*
7658          * We need to delete any duplicate entries we find first otherwise we
7659          * could mess up the extent tree when we have backrefs that actually
7660          * belong to a different extent item and not the weird duplicate one.
7661          */
7662         while (repair && !list_empty(&duplicate_extents)) {
7663                 rec = list_entry(duplicate_extents.next, struct extent_record,
7664                                  list);
7665                 list_del_init(&rec->list);
7666
7667                 /* Sometimes we can find a backref before we find an actual
7668                  * extent, so we need to process it a little bit to see if there
7669                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7670                  * if this is a backref screwup.  If we need to delete stuff
7671                  * process_duplicates() will return 0, otherwise it will return
7672                  * 1 and we
7673                  */
7674                 if (process_duplicates(root, extent_cache, rec))
7675                         continue;
7676                 ret = delete_duplicate_records(root, rec);
7677                 if (ret < 0)
7678                         return ret;
7679                 /*
7680                  * delete_duplicate_records will return the number of entries
7681                  * deleted, so if it's greater than 0 then we know we actually
7682                  * did something and we need to remove.
7683                  */
7684                 if (ret)
7685                         had_dups = 1;
7686         }
7687
7688         if (had_dups)
7689                 return -EAGAIN;
7690
7691         while(1) {
7692                 int cur_err = 0;
7693
7694                 fixed = 0;
7695                 recorded = 0;
7696                 cache = search_cache_extent(extent_cache, 0);
7697                 if (!cache)
7698                         break;
7699                 rec = container_of(cache, struct extent_record, cache);
7700                 if (rec->num_duplicates) {
7701                         fprintf(stderr, "extent item %llu has multiple extent "
7702                                 "items\n", (unsigned long long)rec->start);
7703                         err = 1;
7704                         cur_err = 1;
7705                 }
7706
7707                 if (rec->refs != rec->extent_item_refs) {
7708                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
7709                                 (unsigned long long)rec->start,
7710                                 (unsigned long long)rec->nr);
7711                         fprintf(stderr, "extent item %llu, found %llu\n",
7712                                 (unsigned long long)rec->extent_item_refs,
7713                                 (unsigned long long)rec->refs);
7714                         ret = record_orphan_data_extents(root->fs_info, rec);
7715                         if (ret < 0)
7716                                 goto repair_abort;
7717                         if (ret == 0) {
7718                                 recorded = 1;
7719                         } else {
7720                                 /*
7721                                  * we can't use the extent to repair file
7722                                  * extent, let the fallback method handle it.
7723                                  */
7724                                 if (!fixed && repair) {
7725                                         ret = fixup_extent_refs(
7726                                                         root->fs_info,
7727                                                         extent_cache, rec);
7728                                         if (ret)
7729                                                 goto repair_abort;
7730                                         fixed = 1;
7731                                 }
7732                         }
7733                         err = 1;
7734                         cur_err = 1;
7735                 }
7736                 if (all_backpointers_checked(rec, 1)) {
7737                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7738                                 (unsigned long long)rec->start,
7739                                 (unsigned long long)rec->nr);
7740
7741                         if (!fixed && !recorded && repair) {
7742                                 ret = fixup_extent_refs(root->fs_info,
7743                                                         extent_cache, rec);
7744                                 if (ret)
7745                                         goto repair_abort;
7746                                 fixed = 1;
7747                         }
7748                         cur_err = 1;
7749                         err = 1;
7750                 }
7751                 if (!rec->owner_ref_checked) {
7752                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7753                                 (unsigned long long)rec->start,
7754                                 (unsigned long long)rec->nr);
7755                         if (!fixed && !recorded && repair) {
7756                                 ret = fixup_extent_refs(root->fs_info,
7757                                                         extent_cache, rec);
7758                                 if (ret)
7759                                         goto repair_abort;
7760                                 fixed = 1;
7761                         }
7762                         err = 1;
7763                         cur_err = 1;
7764                 }
7765                 if (rec->bad_full_backref) {
7766                         fprintf(stderr, "bad full backref, on [%llu]\n",
7767                                 (unsigned long long)rec->start);
7768                         if (repair) {
7769                                 ret = fixup_extent_flags(root->fs_info, rec);
7770                                 if (ret)
7771                                         goto repair_abort;
7772                                 fixed = 1;
7773                         }
7774                         err = 1;
7775                         cur_err = 1;
7776                 }
7777                 /*
7778                  * Although it's not a extent ref's problem, we reuse this
7779                  * routine for error reporting.
7780                  * No repair function yet.
7781                  */
7782                 if (rec->crossing_stripes) {
7783                         fprintf(stderr,
7784                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7785                                 rec->start, rec->start + rec->max_size);
7786                         err = 1;
7787                         cur_err = 1;
7788                 }
7789
7790                 if (rec->wrong_chunk_type) {
7791                         fprintf(stderr,
7792                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
7793                                 rec->start, rec->start + rec->max_size);
7794                         err = 1;
7795                         cur_err = 1;
7796                 }
7797
7798                 remove_cache_extent(extent_cache, cache);
7799                 free_all_extent_backrefs(rec);
7800                 if (!init_extent_tree && repair && (!cur_err || fixed))
7801                         clear_extent_dirty(root->fs_info->excluded_extents,
7802                                            rec->start,
7803                                            rec->start + rec->max_size - 1,
7804                                            GFP_NOFS);
7805                 free(rec);
7806         }
7807 repair_abort:
7808         if (repair) {
7809                 if (ret && ret != -EAGAIN) {
7810                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7811                         exit(1);
7812                 } else if (!ret) {
7813                         struct btrfs_trans_handle *trans;
7814
7815                         root = root->fs_info->extent_root;
7816                         trans = btrfs_start_transaction(root, 1);
7817                         if (IS_ERR(trans)) {
7818                                 ret = PTR_ERR(trans);
7819                                 goto repair_abort;
7820                         }
7821
7822                         btrfs_fix_block_accounting(trans, root);
7823                         ret = btrfs_commit_transaction(trans, root);
7824                         if (ret)
7825                                 goto repair_abort;
7826                 }
7827                 if (err)
7828                         fprintf(stderr, "repaired damaged extent references\n");
7829                 return ret;
7830         }
7831         return err;
7832 }
7833
7834 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7835 {
7836         u64 stripe_size;
7837
7838         if (type & BTRFS_BLOCK_GROUP_RAID0) {
7839                 stripe_size = length;
7840                 stripe_size /= num_stripes;
7841         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7842                 stripe_size = length * 2;
7843                 stripe_size /= num_stripes;
7844         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7845                 stripe_size = length;
7846                 stripe_size /= (num_stripes - 1);
7847         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7848                 stripe_size = length;
7849                 stripe_size /= (num_stripes - 2);
7850         } else {
7851                 stripe_size = length;
7852         }
7853         return stripe_size;
7854 }
7855
7856 /*
7857  * Check the chunk with its block group/dev list ref:
7858  * Return 0 if all refs seems valid.
7859  * Return 1 if part of refs seems valid, need later check for rebuild ref
7860  * like missing block group and needs to search extent tree to rebuild them.
7861  * Return -1 if essential refs are missing and unable to rebuild.
7862  */
7863 static int check_chunk_refs(struct chunk_record *chunk_rec,
7864                             struct block_group_tree *block_group_cache,
7865                             struct device_extent_tree *dev_extent_cache,
7866                             int silent)
7867 {
7868         struct cache_extent *block_group_item;
7869         struct block_group_record *block_group_rec;
7870         struct cache_extent *dev_extent_item;
7871         struct device_extent_record *dev_extent_rec;
7872         u64 devid;
7873         u64 offset;
7874         u64 length;
7875         int metadump_v2 = 0;
7876         int i;
7877         int ret = 0;
7878
7879         block_group_item = lookup_cache_extent(&block_group_cache->tree,
7880                                                chunk_rec->offset,
7881                                                chunk_rec->length);
7882         if (block_group_item) {
7883                 block_group_rec = container_of(block_group_item,
7884                                                struct block_group_record,
7885                                                cache);
7886                 if (chunk_rec->length != block_group_rec->offset ||
7887                     chunk_rec->offset != block_group_rec->objectid ||
7888                     (!metadump_v2 &&
7889                      chunk_rec->type_flags != block_group_rec->flags)) {
7890                         if (!silent)
7891                                 fprintf(stderr,
7892                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
7893                                         chunk_rec->objectid,
7894                                         chunk_rec->type,
7895                                         chunk_rec->offset,
7896                                         chunk_rec->length,
7897                                         chunk_rec->offset,
7898                                         chunk_rec->type_flags,
7899                                         block_group_rec->objectid,
7900                                         block_group_rec->type,
7901                                         block_group_rec->offset,
7902                                         block_group_rec->offset,
7903                                         block_group_rec->objectid,
7904                                         block_group_rec->flags);
7905                         ret = -1;
7906                 } else {
7907                         list_del_init(&block_group_rec->list);
7908                         chunk_rec->bg_rec = block_group_rec;
7909                 }
7910         } else {
7911                 if (!silent)
7912                         fprintf(stderr,
7913                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
7914                                 chunk_rec->objectid,
7915                                 chunk_rec->type,
7916                                 chunk_rec->offset,
7917                                 chunk_rec->length,
7918                                 chunk_rec->offset,
7919                                 chunk_rec->type_flags);
7920                 ret = 1;
7921         }
7922
7923         if (metadump_v2)
7924                 return ret;
7925
7926         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
7927                                     chunk_rec->num_stripes);
7928         for (i = 0; i < chunk_rec->num_stripes; ++i) {
7929                 devid = chunk_rec->stripes[i].devid;
7930                 offset = chunk_rec->stripes[i].offset;
7931                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
7932                                                        devid, offset, length);
7933                 if (dev_extent_item) {
7934                         dev_extent_rec = container_of(dev_extent_item,
7935                                                 struct device_extent_record,
7936                                                 cache);
7937                         if (dev_extent_rec->objectid != devid ||
7938                             dev_extent_rec->offset != offset ||
7939                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
7940                             dev_extent_rec->length != length) {
7941                                 if (!silent)
7942                                         fprintf(stderr,
7943                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
7944                                                 chunk_rec->objectid,
7945                                                 chunk_rec->type,
7946                                                 chunk_rec->offset,
7947                                                 chunk_rec->stripes[i].devid,
7948                                                 chunk_rec->stripes[i].offset,
7949                                                 dev_extent_rec->objectid,
7950                                                 dev_extent_rec->offset,
7951                                                 dev_extent_rec->length);
7952                                 ret = -1;
7953                         } else {
7954                                 list_move(&dev_extent_rec->chunk_list,
7955                                           &chunk_rec->dextents);
7956                         }
7957                 } else {
7958                         if (!silent)
7959                                 fprintf(stderr,
7960                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
7961                                         chunk_rec->objectid,
7962                                         chunk_rec->type,
7963                                         chunk_rec->offset,
7964                                         chunk_rec->stripes[i].devid,
7965                                         chunk_rec->stripes[i].offset);
7966                         ret = -1;
7967                 }
7968         }
7969         return ret;
7970 }
7971
7972 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
7973 int check_chunks(struct cache_tree *chunk_cache,
7974                  struct block_group_tree *block_group_cache,
7975                  struct device_extent_tree *dev_extent_cache,
7976                  struct list_head *good, struct list_head *bad,
7977                  struct list_head *rebuild, int silent)
7978 {
7979         struct cache_extent *chunk_item;
7980         struct chunk_record *chunk_rec;
7981         struct block_group_record *bg_rec;
7982         struct device_extent_record *dext_rec;
7983         int err;
7984         int ret = 0;
7985
7986         chunk_item = first_cache_extent(chunk_cache);
7987         while (chunk_item) {
7988                 chunk_rec = container_of(chunk_item, struct chunk_record,
7989                                          cache);
7990                 err = check_chunk_refs(chunk_rec, block_group_cache,
7991                                        dev_extent_cache, silent);
7992                 if (err < 0)
7993                         ret = err;
7994                 if (err == 0 && good)
7995                         list_add_tail(&chunk_rec->list, good);
7996                 if (err > 0 && rebuild)
7997                         list_add_tail(&chunk_rec->list, rebuild);
7998                 if (err < 0 && bad)
7999                         list_add_tail(&chunk_rec->list, bad);
8000                 chunk_item = next_cache_extent(chunk_item);
8001         }
8002
8003         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8004                 if (!silent)
8005                         fprintf(stderr,
8006                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8007                                 bg_rec->objectid,
8008                                 bg_rec->offset,
8009                                 bg_rec->flags);
8010                 if (!ret)
8011                         ret = 1;
8012         }
8013
8014         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8015                             chunk_list) {
8016                 if (!silent)
8017                         fprintf(stderr,
8018                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8019                                 dext_rec->objectid,
8020                                 dext_rec->offset,
8021                                 dext_rec->length);
8022                 if (!ret)
8023                         ret = 1;
8024         }
8025         return ret;
8026 }
8027
8028
8029 static int check_device_used(struct device_record *dev_rec,
8030                              struct device_extent_tree *dext_cache)
8031 {
8032         struct cache_extent *cache;
8033         struct device_extent_record *dev_extent_rec;
8034         u64 total_byte = 0;
8035
8036         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8037         while (cache) {
8038                 dev_extent_rec = container_of(cache,
8039                                               struct device_extent_record,
8040                                               cache);
8041                 if (dev_extent_rec->objectid != dev_rec->devid)
8042                         break;
8043
8044                 list_del_init(&dev_extent_rec->device_list);
8045                 total_byte += dev_extent_rec->length;
8046                 cache = next_cache_extent(cache);
8047         }
8048
8049         if (total_byte != dev_rec->byte_used) {
8050                 fprintf(stderr,
8051                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8052                         total_byte, dev_rec->byte_used, dev_rec->objectid,
8053                         dev_rec->type, dev_rec->offset);
8054                 return -1;
8055         } else {
8056                 return 0;
8057         }
8058 }
8059
8060 /* check btrfs_dev_item -> btrfs_dev_extent */
8061 static int check_devices(struct rb_root *dev_cache,
8062                          struct device_extent_tree *dev_extent_cache)
8063 {
8064         struct rb_node *dev_node;
8065         struct device_record *dev_rec;
8066         struct device_extent_record *dext_rec;
8067         int err;
8068         int ret = 0;
8069
8070         dev_node = rb_first(dev_cache);
8071         while (dev_node) {
8072                 dev_rec = container_of(dev_node, struct device_record, node);
8073                 err = check_device_used(dev_rec, dev_extent_cache);
8074                 if (err)
8075                         ret = err;
8076
8077                 dev_node = rb_next(dev_node);
8078         }
8079         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8080                             device_list) {
8081                 fprintf(stderr,
8082                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8083                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
8084                 if (!ret)
8085                         ret = 1;
8086         }
8087         return ret;
8088 }
8089
8090 static int add_root_item_to_list(struct list_head *head,
8091                                   u64 objectid, u64 bytenr, u64 last_snapshot,
8092                                   u8 level, u8 drop_level,
8093                                   int level_size, struct btrfs_key *drop_key)
8094 {
8095
8096         struct root_item_record *ri_rec;
8097         ri_rec = malloc(sizeof(*ri_rec));
8098         if (!ri_rec)
8099                 return -ENOMEM;
8100         ri_rec->bytenr = bytenr;
8101         ri_rec->objectid = objectid;
8102         ri_rec->level = level;
8103         ri_rec->level_size = level_size;
8104         ri_rec->drop_level = drop_level;
8105         ri_rec->last_snapshot = last_snapshot;
8106         if (drop_key)
8107                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8108         list_add_tail(&ri_rec->list, head);
8109
8110         return 0;
8111 }
8112
8113 static void free_root_item_list(struct list_head *list)
8114 {
8115         struct root_item_record *ri_rec;
8116
8117         while (!list_empty(list)) {
8118                 ri_rec = list_first_entry(list, struct root_item_record,
8119                                           list);
8120                 list_del_init(&ri_rec->list);
8121                 free(ri_rec);
8122         }
8123 }
8124
8125 static int deal_root_from_list(struct list_head *list,
8126                                struct btrfs_root *root,
8127                                struct block_info *bits,
8128                                int bits_nr,
8129                                struct cache_tree *pending,
8130                                struct cache_tree *seen,
8131                                struct cache_tree *reada,
8132                                struct cache_tree *nodes,
8133                                struct cache_tree *extent_cache,
8134                                struct cache_tree *chunk_cache,
8135                                struct rb_root *dev_cache,
8136                                struct block_group_tree *block_group_cache,
8137                                struct device_extent_tree *dev_extent_cache)
8138 {
8139         int ret = 0;
8140         u64 last;
8141
8142         while (!list_empty(list)) {
8143                 struct root_item_record *rec;
8144                 struct extent_buffer *buf;
8145                 rec = list_entry(list->next,
8146                                  struct root_item_record, list);
8147                 last = 0;
8148                 buf = read_tree_block(root->fs_info->tree_root,
8149                                       rec->bytenr, rec->level_size, 0);
8150                 if (!extent_buffer_uptodate(buf)) {
8151                         free_extent_buffer(buf);
8152                         ret = -EIO;
8153                         break;
8154                 }
8155                 add_root_to_pending(buf, extent_cache, pending,
8156                                     seen, nodes, rec->objectid);
8157                 /*
8158                  * To rebuild extent tree, we need deal with snapshot
8159                  * one by one, otherwise we deal with node firstly which
8160                  * can maximize readahead.
8161                  */
8162                 while (1) {
8163                         ret = run_next_block(root, bits, bits_nr, &last,
8164                                              pending, seen, reada, nodes,
8165                                              extent_cache, chunk_cache,
8166                                              dev_cache, block_group_cache,
8167                                              dev_extent_cache, rec);
8168                         if (ret != 0)
8169                                 break;
8170                 }
8171                 free_extent_buffer(buf);
8172                 list_del(&rec->list);
8173                 free(rec);
8174                 if (ret < 0)
8175                         break;
8176         }
8177         while (ret >= 0) {
8178                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8179                                      reada, nodes, extent_cache, chunk_cache,
8180                                      dev_cache, block_group_cache,
8181                                      dev_extent_cache, NULL);
8182                 if (ret != 0) {
8183                         if (ret > 0)
8184                                 ret = 0;
8185                         break;
8186                 }
8187         }
8188         return ret;
8189 }
8190
8191 static int check_chunks_and_extents(struct btrfs_root *root)
8192 {
8193         struct rb_root dev_cache;
8194         struct cache_tree chunk_cache;
8195         struct block_group_tree block_group_cache;
8196         struct device_extent_tree dev_extent_cache;
8197         struct cache_tree extent_cache;
8198         struct cache_tree seen;
8199         struct cache_tree pending;
8200         struct cache_tree reada;
8201         struct cache_tree nodes;
8202         struct extent_io_tree excluded_extents;
8203         struct cache_tree corrupt_blocks;
8204         struct btrfs_path path;
8205         struct btrfs_key key;
8206         struct btrfs_key found_key;
8207         int ret, err = 0;
8208         struct block_info *bits;
8209         int bits_nr;
8210         struct extent_buffer *leaf;
8211         int slot;
8212         struct btrfs_root_item ri;
8213         struct list_head dropping_trees;
8214         struct list_head normal_trees;
8215         struct btrfs_root *root1;
8216         u64 objectid;
8217         u32 level_size;
8218         u8 level;
8219
8220         dev_cache = RB_ROOT;
8221         cache_tree_init(&chunk_cache);
8222         block_group_tree_init(&block_group_cache);
8223         device_extent_tree_init(&dev_extent_cache);
8224
8225         cache_tree_init(&extent_cache);
8226         cache_tree_init(&seen);
8227         cache_tree_init(&pending);
8228         cache_tree_init(&nodes);
8229         cache_tree_init(&reada);
8230         cache_tree_init(&corrupt_blocks);
8231         extent_io_tree_init(&excluded_extents);
8232         INIT_LIST_HEAD(&dropping_trees);
8233         INIT_LIST_HEAD(&normal_trees);
8234
8235         if (repair) {
8236                 root->fs_info->excluded_extents = &excluded_extents;
8237                 root->fs_info->fsck_extent_cache = &extent_cache;
8238                 root->fs_info->free_extent_hook = free_extent_hook;
8239                 root->fs_info->corrupt_blocks = &corrupt_blocks;
8240         }
8241
8242         bits_nr = 1024;
8243         bits = malloc(bits_nr * sizeof(struct block_info));
8244         if (!bits) {
8245                 perror("malloc");
8246                 exit(1);
8247         }
8248
8249         if (ctx.progress_enabled) {
8250                 ctx.tp = TASK_EXTENTS;
8251                 task_start(ctx.info);
8252         }
8253
8254 again:
8255         root1 = root->fs_info->tree_root;
8256         level = btrfs_header_level(root1->node);
8257         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8258                                     root1->node->start, 0, level, 0,
8259                                     root1->nodesize, NULL);
8260         if (ret < 0)
8261                 goto out;
8262         root1 = root->fs_info->chunk_root;
8263         level = btrfs_header_level(root1->node);
8264         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8265                                     root1->node->start, 0, level, 0,
8266                                     root1->nodesize, NULL);
8267         if (ret < 0)
8268                 goto out;
8269         btrfs_init_path(&path);
8270         key.offset = 0;
8271         key.objectid = 0;
8272         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8273         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8274                                         &key, &path, 0, 0);
8275         if (ret < 0)
8276                 goto out;
8277         while(1) {
8278                 leaf = path.nodes[0];
8279                 slot = path.slots[0];
8280                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8281                         ret = btrfs_next_leaf(root, &path);
8282                         if (ret != 0)
8283                                 break;
8284                         leaf = path.nodes[0];
8285                         slot = path.slots[0];
8286                 }
8287                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8288                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8289                         unsigned long offset;
8290                         u64 last_snapshot;
8291
8292                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8293                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8294                         last_snapshot = btrfs_root_last_snapshot(&ri);
8295                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8296                                 level = btrfs_root_level(&ri);
8297                                 level_size = root->nodesize;
8298                                 ret = add_root_item_to_list(&normal_trees,
8299                                                 found_key.objectid,
8300                                                 btrfs_root_bytenr(&ri),
8301                                                 last_snapshot, level,
8302                                                 0, level_size, NULL);
8303                                 if (ret < 0)
8304                                         goto out;
8305                         } else {
8306                                 level = btrfs_root_level(&ri);
8307                                 level_size = root->nodesize;
8308                                 objectid = found_key.objectid;
8309                                 btrfs_disk_key_to_cpu(&found_key,
8310                                                       &ri.drop_progress);
8311                                 ret = add_root_item_to_list(&dropping_trees,
8312                                                 objectid,
8313                                                 btrfs_root_bytenr(&ri),
8314                                                 last_snapshot, level,
8315                                                 ri.drop_level,
8316                                                 level_size, &found_key);
8317                                 if (ret < 0)
8318                                         goto out;
8319                         }
8320                 }
8321                 path.slots[0]++;
8322         }
8323         btrfs_release_path(&path);
8324
8325         /*
8326          * check_block can return -EAGAIN if it fixes something, please keep
8327          * this in mind when dealing with return values from these functions, if
8328          * we get -EAGAIN we want to fall through and restart the loop.
8329          */
8330         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8331                                   &seen, &reada, &nodes, &extent_cache,
8332                                   &chunk_cache, &dev_cache, &block_group_cache,
8333                                   &dev_extent_cache);
8334         if (ret < 0) {
8335                 if (ret == -EAGAIN)
8336                         goto loop;
8337                 goto out;
8338         }
8339         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8340                                   &pending, &seen, &reada, &nodes,
8341                                   &extent_cache, &chunk_cache, &dev_cache,
8342                                   &block_group_cache, &dev_extent_cache);
8343         if (ret < 0) {
8344                 if (ret == -EAGAIN)
8345                         goto loop;
8346                 goto out;
8347         }
8348
8349         ret = check_chunks(&chunk_cache, &block_group_cache,
8350                            &dev_extent_cache, NULL, NULL, NULL, 0);
8351         if (ret) {
8352                 if (ret == -EAGAIN)
8353                         goto loop;
8354                 err = ret;
8355         }
8356
8357         ret = check_extent_refs(root, &extent_cache);
8358         if (ret < 0) {
8359                 if (ret == -EAGAIN)
8360                         goto loop;
8361                 goto out;
8362         }
8363
8364         ret = check_devices(&dev_cache, &dev_extent_cache);
8365         if (ret && err)
8366                 ret = err;
8367
8368 out:
8369         task_stop(ctx.info);
8370         if (repair) {
8371                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8372                 extent_io_tree_cleanup(&excluded_extents);
8373                 root->fs_info->fsck_extent_cache = NULL;
8374                 root->fs_info->free_extent_hook = NULL;
8375                 root->fs_info->corrupt_blocks = NULL;
8376                 root->fs_info->excluded_extents = NULL;
8377         }
8378         free(bits);
8379         free_chunk_cache_tree(&chunk_cache);
8380         free_device_cache_tree(&dev_cache);
8381         free_block_group_tree(&block_group_cache);
8382         free_device_extent_tree(&dev_extent_cache);
8383         free_extent_cache_tree(&seen);
8384         free_extent_cache_tree(&pending);
8385         free_extent_cache_tree(&reada);
8386         free_extent_cache_tree(&nodes);
8387         return ret;
8388 loop:
8389         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8390         free_extent_cache_tree(&seen);
8391         free_extent_cache_tree(&pending);
8392         free_extent_cache_tree(&reada);
8393         free_extent_cache_tree(&nodes);
8394         free_chunk_cache_tree(&chunk_cache);
8395         free_block_group_tree(&block_group_cache);
8396         free_device_cache_tree(&dev_cache);
8397         free_device_extent_tree(&dev_extent_cache);
8398         free_extent_record_cache(root->fs_info, &extent_cache);
8399         free_root_item_list(&normal_trees);
8400         free_root_item_list(&dropping_trees);
8401         extent_io_tree_cleanup(&excluded_extents);
8402         goto again;
8403 }
8404
8405 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8406                            struct btrfs_root *root, int overwrite)
8407 {
8408         struct extent_buffer *c;
8409         struct extent_buffer *old = root->node;
8410         int level;
8411         int ret;
8412         struct btrfs_disk_key disk_key = {0,0,0};
8413
8414         level = 0;
8415
8416         if (overwrite) {
8417                 c = old;
8418                 extent_buffer_get(c);
8419                 goto init;
8420         }
8421         c = btrfs_alloc_free_block(trans, root,
8422                                    root->nodesize,
8423                                    root->root_key.objectid,
8424                                    &disk_key, level, 0, 0);
8425         if (IS_ERR(c)) {
8426                 c = old;
8427                 extent_buffer_get(c);
8428                 overwrite = 1;
8429         }
8430 init:
8431         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8432         btrfs_set_header_level(c, level);
8433         btrfs_set_header_bytenr(c, c->start);
8434         btrfs_set_header_generation(c, trans->transid);
8435         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8436         btrfs_set_header_owner(c, root->root_key.objectid);
8437
8438         write_extent_buffer(c, root->fs_info->fsid,
8439                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
8440
8441         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8442                             btrfs_header_chunk_tree_uuid(c),
8443                             BTRFS_UUID_SIZE);
8444
8445         btrfs_mark_buffer_dirty(c);
8446         /*
8447          * this case can happen in the following case:
8448          *
8449          * 1.overwrite previous root.
8450          *
8451          * 2.reinit reloc data root, this is because we skip pin
8452          * down reloc data tree before which means we can allocate
8453          * same block bytenr here.
8454          */
8455         if (old->start == c->start) {
8456                 btrfs_set_root_generation(&root->root_item,
8457                                           trans->transid);
8458                 root->root_item.level = btrfs_header_level(root->node);
8459                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8460                                         &root->root_key, &root->root_item);
8461                 if (ret) {
8462                         free_extent_buffer(c);
8463                         return ret;
8464                 }
8465         }
8466         free_extent_buffer(old);
8467         root->node = c;
8468         add_root_to_dirty_list(root);
8469         return 0;
8470 }
8471
8472 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8473                                 struct extent_buffer *eb, int tree_root)
8474 {
8475         struct extent_buffer *tmp;
8476         struct btrfs_root_item *ri;
8477         struct btrfs_key key;
8478         u64 bytenr;
8479         u32 nodesize;
8480         int level = btrfs_header_level(eb);
8481         int nritems;
8482         int ret;
8483         int i;
8484
8485         /*
8486          * If we have pinned this block before, don't pin it again.
8487          * This can not only avoid forever loop with broken filesystem
8488          * but also give us some speedups.
8489          */
8490         if (test_range_bit(&fs_info->pinned_extents, eb->start,
8491                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8492                 return 0;
8493
8494         btrfs_pin_extent(fs_info, eb->start, eb->len);
8495
8496         nodesize = btrfs_super_nodesize(fs_info->super_copy);
8497         nritems = btrfs_header_nritems(eb);
8498         for (i = 0; i < nritems; i++) {
8499                 if (level == 0) {
8500                         btrfs_item_key_to_cpu(eb, &key, i);
8501                         if (key.type != BTRFS_ROOT_ITEM_KEY)
8502                                 continue;
8503                         /* Skip the extent root and reloc roots */
8504                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8505                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8506                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8507                                 continue;
8508                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8509                         bytenr = btrfs_disk_root_bytenr(eb, ri);
8510
8511                         /*
8512                          * If at any point we start needing the real root we
8513                          * will have to build a stump root for the root we are
8514                          * in, but for now this doesn't actually use the root so
8515                          * just pass in extent_root.
8516                          */
8517                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8518                                               nodesize, 0);
8519                         if (!extent_buffer_uptodate(tmp)) {
8520                                 fprintf(stderr, "Error reading root block\n");
8521                                 return -EIO;
8522                         }
8523                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
8524                         free_extent_buffer(tmp);
8525                         if (ret)
8526                                 return ret;
8527                 } else {
8528                         bytenr = btrfs_node_blockptr(eb, i);
8529
8530                         /* If we aren't the tree root don't read the block */
8531                         if (level == 1 && !tree_root) {
8532                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
8533                                 continue;
8534                         }
8535
8536                         tmp = read_tree_block(fs_info->extent_root, bytenr,
8537                                               nodesize, 0);
8538                         if (!extent_buffer_uptodate(tmp)) {
8539                                 fprintf(stderr, "Error reading tree block\n");
8540                                 return -EIO;
8541                         }
8542                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8543                         free_extent_buffer(tmp);
8544                         if (ret)
8545                                 return ret;
8546                 }
8547         }
8548
8549         return 0;
8550 }
8551
8552 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8553 {
8554         int ret;
8555
8556         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8557         if (ret)
8558                 return ret;
8559
8560         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8561 }
8562
8563 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8564 {
8565         struct btrfs_block_group_cache *cache;
8566         struct btrfs_path *path;
8567         struct extent_buffer *leaf;
8568         struct btrfs_chunk *chunk;
8569         struct btrfs_key key;
8570         int ret;
8571         u64 start;
8572
8573         path = btrfs_alloc_path();
8574         if (!path)
8575                 return -ENOMEM;
8576
8577         key.objectid = 0;
8578         key.type = BTRFS_CHUNK_ITEM_KEY;
8579         key.offset = 0;
8580
8581         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8582         if (ret < 0) {
8583                 btrfs_free_path(path);
8584                 return ret;
8585         }
8586
8587         /*
8588          * We do this in case the block groups were screwed up and had alloc
8589          * bits that aren't actually set on the chunks.  This happens with
8590          * restored images every time and could happen in real life I guess.
8591          */
8592         fs_info->avail_data_alloc_bits = 0;
8593         fs_info->avail_metadata_alloc_bits = 0;
8594         fs_info->avail_system_alloc_bits = 0;
8595
8596         /* First we need to create the in-memory block groups */
8597         while (1) {
8598                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8599                         ret = btrfs_next_leaf(fs_info->chunk_root, path);
8600                         if (ret < 0) {
8601                                 btrfs_free_path(path);
8602                                 return ret;
8603                         }
8604                         if (ret) {
8605                                 ret = 0;
8606                                 break;
8607                         }
8608                 }
8609                 leaf = path->nodes[0];
8610                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8611                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8612                         path->slots[0]++;
8613                         continue;
8614                 }
8615
8616                 chunk = btrfs_item_ptr(leaf, path->slots[0],
8617                                        struct btrfs_chunk);
8618                 btrfs_add_block_group(fs_info, 0,
8619                                       btrfs_chunk_type(leaf, chunk),
8620                                       key.objectid, key.offset,
8621                                       btrfs_chunk_length(leaf, chunk));
8622                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8623                                  key.offset + btrfs_chunk_length(leaf, chunk),
8624                                  GFP_NOFS);
8625                 path->slots[0]++;
8626         }
8627         start = 0;
8628         while (1) {
8629                 cache = btrfs_lookup_first_block_group(fs_info, start);
8630                 if (!cache)
8631                         break;
8632                 cache->cached = 1;
8633                 start = cache->key.objectid + cache->key.offset;
8634         }
8635
8636         btrfs_free_path(path);
8637         return 0;
8638 }
8639
8640 static int reset_balance(struct btrfs_trans_handle *trans,
8641                          struct btrfs_fs_info *fs_info)
8642 {
8643         struct btrfs_root *root = fs_info->tree_root;
8644         struct btrfs_path *path;
8645         struct extent_buffer *leaf;
8646         struct btrfs_key key;
8647         int del_slot, del_nr = 0;
8648         int ret;
8649         int found = 0;
8650
8651         path = btrfs_alloc_path();
8652         if (!path)
8653                 return -ENOMEM;
8654
8655         key.objectid = BTRFS_BALANCE_OBJECTID;
8656         key.type = BTRFS_BALANCE_ITEM_KEY;
8657         key.offset = 0;
8658
8659         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8660         if (ret) {
8661                 if (ret > 0)
8662                         ret = 0;
8663                 if (!ret)
8664                         goto reinit_data_reloc;
8665                 else
8666                         goto out;
8667         }
8668
8669         ret = btrfs_del_item(trans, root, path);
8670         if (ret)
8671                 goto out;
8672         btrfs_release_path(path);
8673
8674         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8675         key.type = BTRFS_ROOT_ITEM_KEY;
8676         key.offset = 0;
8677
8678         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8679         if (ret < 0)
8680                 goto out;
8681         while (1) {
8682                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8683                         if (!found)
8684                                 break;
8685
8686                         if (del_nr) {
8687                                 ret = btrfs_del_items(trans, root, path,
8688                                                       del_slot, del_nr);
8689                                 del_nr = 0;
8690                                 if (ret)
8691                                         goto out;
8692                         }
8693                         key.offset++;
8694                         btrfs_release_path(path);
8695
8696                         found = 0;
8697                         ret = btrfs_search_slot(trans, root, &key, path,
8698                                                 -1, 1);
8699                         if (ret < 0)
8700                                 goto out;
8701                         continue;
8702                 }
8703                 found = 1;
8704                 leaf = path->nodes[0];
8705                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8706                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8707                         break;
8708                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8709                         path->slots[0]++;
8710                         continue;
8711                 }
8712                 if (!del_nr) {
8713                         del_slot = path->slots[0];
8714                         del_nr = 1;
8715                 } else {
8716                         del_nr++;
8717                 }
8718                 path->slots[0]++;
8719         }
8720
8721         if (del_nr) {
8722                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8723                 if (ret)
8724                         goto out;
8725         }
8726         btrfs_release_path(path);
8727
8728 reinit_data_reloc:
8729         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8730         key.type = BTRFS_ROOT_ITEM_KEY;
8731         key.offset = (u64)-1;
8732         root = btrfs_read_fs_root(fs_info, &key);
8733         if (IS_ERR(root)) {
8734                 fprintf(stderr, "Error reading data reloc tree\n");
8735                 ret = PTR_ERR(root);
8736                 goto out;
8737         }
8738         record_root_in_trans(trans, root);
8739         ret = btrfs_fsck_reinit_root(trans, root, 0);
8740         if (ret)
8741                 goto out;
8742         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8743 out:
8744         btrfs_free_path(path);
8745         return ret;
8746 }
8747
8748 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8749                               struct btrfs_fs_info *fs_info)
8750 {
8751         u64 start = 0;
8752         int ret;
8753
8754         /*
8755          * The only reason we don't do this is because right now we're just
8756          * walking the trees we find and pinning down their bytes, we don't look
8757          * at any of the leaves.  In order to do mixed groups we'd have to check
8758          * the leaves of any fs roots and pin down the bytes for any file
8759          * extents we find.  Not hard but why do it if we don't have to?
8760          */
8761         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8762                 fprintf(stderr, "We don't support re-initing the extent tree "
8763                         "for mixed block groups yet, please notify a btrfs "
8764                         "developer you want to do this so they can add this "
8765                         "functionality.\n");
8766                 return -EINVAL;
8767         }
8768
8769         /*
8770          * first we need to walk all of the trees except the extent tree and pin
8771          * down the bytes that are in use so we don't overwrite any existing
8772          * metadata.
8773          */
8774         ret = pin_metadata_blocks(fs_info);
8775         if (ret) {
8776                 fprintf(stderr, "error pinning down used bytes\n");
8777                 return ret;
8778         }
8779
8780         /*
8781          * Need to drop all the block groups since we're going to recreate all
8782          * of them again.
8783          */
8784         btrfs_free_block_groups(fs_info);
8785         ret = reset_block_groups(fs_info);
8786         if (ret) {
8787                 fprintf(stderr, "error resetting the block groups\n");
8788                 return ret;
8789         }
8790
8791         /* Ok we can allocate now, reinit the extent root */
8792         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8793         if (ret) {
8794                 fprintf(stderr, "extent root initialization failed\n");
8795                 /*
8796                  * When the transaction code is updated we should end the
8797                  * transaction, but for now progs only knows about commit so
8798                  * just return an error.
8799                  */
8800                 return ret;
8801         }
8802
8803         /*
8804          * Now we have all the in-memory block groups setup so we can make
8805          * allocations properly, and the metadata we care about is safe since we
8806          * pinned all of it above.
8807          */
8808         while (1) {
8809                 struct btrfs_block_group_cache *cache;
8810
8811                 cache = btrfs_lookup_first_block_group(fs_info, start);
8812                 if (!cache)
8813                         break;
8814                 start = cache->key.objectid + cache->key.offset;
8815                 ret = btrfs_insert_item(trans, fs_info->extent_root,
8816                                         &cache->key, &cache->item,
8817                                         sizeof(cache->item));
8818                 if (ret) {
8819                         fprintf(stderr, "Error adding block group\n");
8820                         return ret;
8821                 }
8822                 btrfs_extent_post_op(trans, fs_info->extent_root);
8823         }
8824
8825         ret = reset_balance(trans, fs_info);
8826         if (ret)
8827                 fprintf(stderr, "error resetting the pending balance\n");
8828
8829         return ret;
8830 }
8831
8832 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8833 {
8834         struct btrfs_path *path;
8835         struct btrfs_trans_handle *trans;
8836         struct btrfs_key key;
8837         int ret;
8838
8839         printf("Recowing metadata block %llu\n", eb->start);
8840         key.objectid = btrfs_header_owner(eb);
8841         key.type = BTRFS_ROOT_ITEM_KEY;
8842         key.offset = (u64)-1;
8843
8844         root = btrfs_read_fs_root(root->fs_info, &key);
8845         if (IS_ERR(root)) {
8846                 fprintf(stderr, "Couldn't find owner root %llu\n",
8847                         key.objectid);
8848                 return PTR_ERR(root);
8849         }
8850
8851         path = btrfs_alloc_path();
8852         if (!path)
8853                 return -ENOMEM;
8854
8855         trans = btrfs_start_transaction(root, 1);
8856         if (IS_ERR(trans)) {
8857                 btrfs_free_path(path);
8858                 return PTR_ERR(trans);
8859         }
8860
8861         path->lowest_level = btrfs_header_level(eb);
8862         if (path->lowest_level)
8863                 btrfs_node_key_to_cpu(eb, &key, 0);
8864         else
8865                 btrfs_item_key_to_cpu(eb, &key, 0);
8866
8867         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8868         btrfs_commit_transaction(trans, root);
8869         btrfs_free_path(path);
8870         return ret;
8871 }
8872
8873 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8874 {
8875         struct btrfs_path *path;
8876         struct btrfs_trans_handle *trans;
8877         struct btrfs_key key;
8878         int ret;
8879
8880         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8881                bad->key.type, bad->key.offset);
8882         key.objectid = bad->root_id;
8883         key.type = BTRFS_ROOT_ITEM_KEY;
8884         key.offset = (u64)-1;
8885
8886         root = btrfs_read_fs_root(root->fs_info, &key);
8887         if (IS_ERR(root)) {
8888                 fprintf(stderr, "Couldn't find owner root %llu\n",
8889                         key.objectid);
8890                 return PTR_ERR(root);
8891         }
8892
8893         path = btrfs_alloc_path();
8894         if (!path)
8895                 return -ENOMEM;
8896
8897         trans = btrfs_start_transaction(root, 1);
8898         if (IS_ERR(trans)) {
8899                 btrfs_free_path(path);
8900                 return PTR_ERR(trans);
8901         }
8902
8903         ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
8904         if (ret) {
8905                 if (ret > 0)
8906                         ret = 0;
8907                 goto out;
8908         }
8909         ret = btrfs_del_item(trans, root, path);
8910 out:
8911         btrfs_commit_transaction(trans, root);
8912         btrfs_free_path(path);
8913         return ret;
8914 }
8915
8916 static int zero_log_tree(struct btrfs_root *root)
8917 {
8918         struct btrfs_trans_handle *trans;
8919         int ret;
8920
8921         trans = btrfs_start_transaction(root, 1);
8922         if (IS_ERR(trans)) {
8923                 ret = PTR_ERR(trans);
8924                 return ret;
8925         }
8926         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
8927         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
8928         ret = btrfs_commit_transaction(trans, root);
8929         return ret;
8930 }
8931
8932 static int populate_csum(struct btrfs_trans_handle *trans,
8933                          struct btrfs_root *csum_root, char *buf, u64 start,
8934                          u64 len)
8935 {
8936         u64 offset = 0;
8937         u64 sectorsize;
8938         int ret = 0;
8939
8940         while (offset < len) {
8941                 sectorsize = csum_root->sectorsize;
8942                 ret = read_extent_data(csum_root, buf, start + offset,
8943                                        &sectorsize, 0);
8944                 if (ret)
8945                         break;
8946                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
8947                                             start + offset, buf, sectorsize);
8948                 if (ret)
8949                         break;
8950                 offset += sectorsize;
8951         }
8952         return ret;
8953 }
8954
8955 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
8956                                       struct btrfs_root *csum_root,
8957                                       struct btrfs_root *cur_root)
8958 {
8959         struct btrfs_path *path;
8960         struct btrfs_key key;
8961         struct extent_buffer *node;
8962         struct btrfs_file_extent_item *fi;
8963         char *buf = NULL;
8964         u64 start = 0;
8965         u64 len = 0;
8966         int slot = 0;
8967         int ret = 0;
8968
8969         path = btrfs_alloc_path();
8970         if (!path)
8971                 return -ENOMEM;
8972         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
8973         if (!buf) {
8974                 ret = -ENOMEM;
8975                 goto out;
8976         }
8977
8978         key.objectid = 0;
8979         key.offset = 0;
8980         key.type = 0;
8981
8982         ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
8983         if (ret < 0)
8984                 goto out;
8985         /* Iterate all regular file extents and fill its csum */
8986         while (1) {
8987                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
8988
8989                 if (key.type != BTRFS_EXTENT_DATA_KEY)
8990                         goto next;
8991                 node = path->nodes[0];
8992                 slot = path->slots[0];
8993                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
8994                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
8995                         goto next;
8996                 start = btrfs_file_extent_disk_bytenr(node, fi);
8997                 len = btrfs_file_extent_disk_num_bytes(node, fi);
8998
8999                 ret = populate_csum(trans, csum_root, buf, start, len);
9000                 if (ret == -EEXIST)
9001                         ret = 0;
9002                 if (ret < 0)
9003                         goto out;
9004 next:
9005                 /*
9006                  * TODO: if next leaf is corrupted, jump to nearest next valid
9007                  * leaf.
9008                  */
9009                 ret = btrfs_next_item(cur_root, path);
9010                 if (ret < 0)
9011                         goto out;
9012                 if (ret > 0) {
9013                         ret = 0;
9014                         goto out;
9015                 }
9016         }
9017
9018 out:
9019         btrfs_free_path(path);
9020         free(buf);
9021         return ret;
9022 }
9023
9024 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9025                                   struct btrfs_root *csum_root)
9026 {
9027         struct btrfs_fs_info *fs_info = csum_root->fs_info;
9028         struct btrfs_path *path;
9029         struct btrfs_root *tree_root = fs_info->tree_root;
9030         struct btrfs_root *cur_root;
9031         struct extent_buffer *node;
9032         struct btrfs_key key;
9033         int slot = 0;
9034         int ret = 0;
9035
9036         path = btrfs_alloc_path();
9037         if (!path)
9038                 return -ENOMEM;
9039
9040         key.objectid = BTRFS_FS_TREE_OBJECTID;
9041         key.offset = 0;
9042         key.type = BTRFS_ROOT_ITEM_KEY;
9043
9044         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9045         if (ret < 0)
9046                 goto out;
9047         if (ret > 0) {
9048                 ret = -ENOENT;
9049                 goto out;
9050         }
9051
9052         while (1) {
9053                 node = path->nodes[0];
9054                 slot = path->slots[0];
9055                 btrfs_item_key_to_cpu(node, &key, slot);
9056                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9057                         goto out;
9058                 if (key.type != BTRFS_ROOT_ITEM_KEY)
9059                         goto next;
9060                 if (!is_fstree(key.objectid))
9061                         goto next;
9062                 key.offset = (u64)-1;
9063
9064                 cur_root = btrfs_read_fs_root(fs_info, &key);
9065                 if (IS_ERR(cur_root) || !cur_root) {
9066                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9067                                 key.objectid);
9068                         goto out;
9069                 }
9070                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9071                                 cur_root);
9072                 if (ret < 0)
9073                         goto out;
9074 next:
9075                 ret = btrfs_next_item(tree_root, path);
9076                 if (ret > 0) {
9077                         ret = 0;
9078                         goto out;
9079                 }
9080                 if (ret < 0)
9081                         goto out;
9082         }
9083
9084 out:
9085         btrfs_free_path(path);
9086         return ret;
9087 }
9088
9089 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9090                                       struct btrfs_root *csum_root)
9091 {
9092         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9093         struct btrfs_path *path;
9094         struct btrfs_extent_item *ei;
9095         struct extent_buffer *leaf;
9096         char *buf;
9097         struct btrfs_key key;
9098         int ret;
9099
9100         path = btrfs_alloc_path();
9101         if (!path)
9102                 return -ENOMEM;
9103
9104         key.objectid = 0;
9105         key.type = BTRFS_EXTENT_ITEM_KEY;
9106         key.offset = 0;
9107
9108         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9109         if (ret < 0) {
9110                 btrfs_free_path(path);
9111                 return ret;
9112         }
9113
9114         buf = malloc(csum_root->sectorsize);
9115         if (!buf) {
9116                 btrfs_free_path(path);
9117                 return -ENOMEM;
9118         }
9119
9120         while (1) {
9121                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9122                         ret = btrfs_next_leaf(extent_root, path);
9123                         if (ret < 0)
9124                                 break;
9125                         if (ret) {
9126                                 ret = 0;
9127                                 break;
9128                         }
9129                 }
9130                 leaf = path->nodes[0];
9131
9132                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9133                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9134                         path->slots[0]++;
9135                         continue;
9136                 }
9137
9138                 ei = btrfs_item_ptr(leaf, path->slots[0],
9139                                     struct btrfs_extent_item);
9140                 if (!(btrfs_extent_flags(leaf, ei) &
9141                       BTRFS_EXTENT_FLAG_DATA)) {
9142                         path->slots[0]++;
9143                         continue;
9144                 }
9145
9146                 ret = populate_csum(trans, csum_root, buf, key.objectid,
9147                                     key.offset);
9148                 if (ret)
9149                         break;
9150                 path->slots[0]++;
9151         }
9152
9153         btrfs_free_path(path);
9154         free(buf);
9155         return ret;
9156 }
9157
9158 /*
9159  * Recalculate the csum and put it into the csum tree.
9160  *
9161  * Extent tree init will wipe out all the extent info, so in that case, we
9162  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
9163  * will use fs/subvol trees to init the csum tree.
9164  */
9165 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9166                           struct btrfs_root *csum_root,
9167                           int search_fs_tree)
9168 {
9169         if (search_fs_tree)
9170                 return fill_csum_tree_from_fs(trans, csum_root);
9171         else
9172                 return fill_csum_tree_from_extent(trans, csum_root);
9173 }
9174
9175 static void free_roots_info_cache(void)
9176 {
9177         if (!roots_info_cache)
9178                 return;
9179
9180         while (!cache_tree_empty(roots_info_cache)) {
9181                 struct cache_extent *entry;
9182                 struct root_item_info *rii;
9183
9184                 entry = first_cache_extent(roots_info_cache);
9185                 if (!entry)
9186                         break;
9187                 remove_cache_extent(roots_info_cache, entry);
9188                 rii = container_of(entry, struct root_item_info, cache_extent);
9189                 free(rii);
9190         }
9191
9192         free(roots_info_cache);
9193         roots_info_cache = NULL;
9194 }
9195
9196 static int build_roots_info_cache(struct btrfs_fs_info *info)
9197 {
9198         int ret = 0;
9199         struct btrfs_key key;
9200         struct extent_buffer *leaf;
9201         struct btrfs_path *path;
9202
9203         if (!roots_info_cache) {
9204                 roots_info_cache = malloc(sizeof(*roots_info_cache));
9205                 if (!roots_info_cache)
9206                         return -ENOMEM;
9207                 cache_tree_init(roots_info_cache);
9208         }
9209
9210         path = btrfs_alloc_path();
9211         if (!path)
9212                 return -ENOMEM;
9213
9214         key.objectid = 0;
9215         key.type = BTRFS_EXTENT_ITEM_KEY;
9216         key.offset = 0;
9217
9218         ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9219         if (ret < 0)
9220                 goto out;
9221         leaf = path->nodes[0];
9222
9223         while (1) {
9224                 struct btrfs_key found_key;
9225                 struct btrfs_extent_item *ei;
9226                 struct btrfs_extent_inline_ref *iref;
9227                 int slot = path->slots[0];
9228                 int type;
9229                 u64 flags;
9230                 u64 root_id;
9231                 u8 level;
9232                 struct cache_extent *entry;
9233                 struct root_item_info *rii;
9234
9235                 if (slot >= btrfs_header_nritems(leaf)) {
9236                         ret = btrfs_next_leaf(info->extent_root, path);
9237                         if (ret < 0) {
9238                                 break;
9239                         } else if (ret) {
9240                                 ret = 0;
9241                                 break;
9242                         }
9243                         leaf = path->nodes[0];
9244                         slot = path->slots[0];
9245                 }
9246
9247                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9248
9249                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9250                     found_key.type != BTRFS_METADATA_ITEM_KEY)
9251                         goto next;
9252
9253                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9254                 flags = btrfs_extent_flags(leaf, ei);
9255
9256                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9257                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9258                         goto next;
9259
9260                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9261                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9262                         level = found_key.offset;
9263                 } else {
9264                         struct btrfs_tree_block_info *binfo;
9265
9266                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
9267                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9268                         level = btrfs_tree_block_level(leaf, binfo);
9269                 }
9270
9271                 /*
9272                  * For a root extent, it must be of the following type and the
9273                  * first (and only one) iref in the item.
9274                  */
9275                 type = btrfs_extent_inline_ref_type(leaf, iref);
9276                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9277                         goto next;
9278
9279                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9280                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9281                 if (!entry) {
9282                         rii = malloc(sizeof(struct root_item_info));
9283                         if (!rii) {
9284                                 ret = -ENOMEM;
9285                                 goto out;
9286                         }
9287                         rii->cache_extent.start = root_id;
9288                         rii->cache_extent.size = 1;
9289                         rii->level = (u8)-1;
9290                         entry = &rii->cache_extent;
9291                         ret = insert_cache_extent(roots_info_cache, entry);
9292                         ASSERT(ret == 0);
9293                 } else {
9294                         rii = container_of(entry, struct root_item_info,
9295                                            cache_extent);
9296                 }
9297
9298                 ASSERT(rii->cache_extent.start == root_id);
9299                 ASSERT(rii->cache_extent.size == 1);
9300
9301                 if (level > rii->level || rii->level == (u8)-1) {
9302                         rii->level = level;
9303                         rii->bytenr = found_key.objectid;
9304                         rii->gen = btrfs_extent_generation(leaf, ei);
9305                         rii->node_count = 1;
9306                 } else if (level == rii->level) {
9307                         rii->node_count++;
9308                 }
9309 next:
9310                 path->slots[0]++;
9311         }
9312
9313 out:
9314         btrfs_free_path(path);
9315
9316         return ret;
9317 }
9318
9319 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9320                                   struct btrfs_path *path,
9321                                   const struct btrfs_key *root_key,
9322                                   const int read_only_mode)
9323 {
9324         const u64 root_id = root_key->objectid;
9325         struct cache_extent *entry;
9326         struct root_item_info *rii;
9327         struct btrfs_root_item ri;
9328         unsigned long offset;
9329
9330         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9331         if (!entry) {
9332                 fprintf(stderr,
9333                         "Error: could not find extent items for root %llu\n",
9334                         root_key->objectid);
9335                 return -ENOENT;
9336         }
9337
9338         rii = container_of(entry, struct root_item_info, cache_extent);
9339         ASSERT(rii->cache_extent.start == root_id);
9340         ASSERT(rii->cache_extent.size == 1);
9341
9342         if (rii->node_count != 1) {
9343                 fprintf(stderr,
9344                         "Error: could not find btree root extent for root %llu\n",
9345                         root_id);
9346                 return -ENOENT;
9347         }
9348
9349         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9350         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9351
9352         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9353             btrfs_root_level(&ri) != rii->level ||
9354             btrfs_root_generation(&ri) != rii->gen) {
9355
9356                 /*
9357                  * If we're in repair mode but our caller told us to not update
9358                  * the root item, i.e. just check if it needs to be updated, don't
9359                  * print this message, since the caller will call us again shortly
9360                  * for the same root item without read only mode (the caller will
9361                  * open a transaction first).
9362                  */
9363                 if (!(read_only_mode && repair))
9364                         fprintf(stderr,
9365                                 "%sroot item for root %llu,"
9366                                 " current bytenr %llu, current gen %llu, current level %u,"
9367                                 " new bytenr %llu, new gen %llu, new level %u\n",
9368                                 (read_only_mode ? "" : "fixing "),
9369                                 root_id,
9370                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9371                                 btrfs_root_level(&ri),
9372                                 rii->bytenr, rii->gen, rii->level);
9373
9374                 if (btrfs_root_generation(&ri) > rii->gen) {
9375                         fprintf(stderr,
9376                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9377                                 root_id, btrfs_root_generation(&ri), rii->gen);
9378                         return -EINVAL;
9379                 }
9380
9381                 if (!read_only_mode) {
9382                         btrfs_set_root_bytenr(&ri, rii->bytenr);
9383                         btrfs_set_root_level(&ri, rii->level);
9384                         btrfs_set_root_generation(&ri, rii->gen);
9385                         write_extent_buffer(path->nodes[0], &ri,
9386                                             offset, sizeof(ri));
9387                 }
9388
9389                 return 1;
9390         }
9391
9392         return 0;
9393 }
9394
9395 /*
9396  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9397  * caused read-only snapshots to be corrupted if they were created at a moment
9398  * when the source subvolume/snapshot had orphan items. The issue was that the
9399  * on-disk root items became incorrect, referring to the pre orphan cleanup root
9400  * node instead of the post orphan cleanup root node.
9401  * So this function, and its callees, just detects and fixes those cases. Even
9402  * though the regression was for read-only snapshots, this function applies to
9403  * any snapshot/subvolume root.
9404  * This must be run before any other repair code - not doing it so, makes other
9405  * repair code delete or modify backrefs in the extent tree for example, which
9406  * will result in an inconsistent fs after repairing the root items.
9407  */
9408 static int repair_root_items(struct btrfs_fs_info *info)
9409 {
9410         struct btrfs_path *path = NULL;
9411         struct btrfs_key key;
9412         struct extent_buffer *leaf;
9413         struct btrfs_trans_handle *trans = NULL;
9414         int ret = 0;
9415         int bad_roots = 0;
9416         int need_trans = 0;
9417
9418         ret = build_roots_info_cache(info);
9419         if (ret)
9420                 goto out;
9421
9422         path = btrfs_alloc_path();
9423         if (!path) {
9424                 ret = -ENOMEM;
9425                 goto out;
9426         }
9427
9428         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9429         key.type = BTRFS_ROOT_ITEM_KEY;
9430         key.offset = 0;
9431
9432 again:
9433         /*
9434          * Avoid opening and committing transactions if a leaf doesn't have
9435          * any root items that need to be fixed, so that we avoid rotating
9436          * backup roots unnecessarily.
9437          */
9438         if (need_trans) {
9439                 trans = btrfs_start_transaction(info->tree_root, 1);
9440                 if (IS_ERR(trans)) {
9441                         ret = PTR_ERR(trans);
9442                         goto out;
9443                 }
9444         }
9445
9446         ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9447                                 0, trans ? 1 : 0);
9448         if (ret < 0)
9449                 goto out;
9450         leaf = path->nodes[0];
9451
9452         while (1) {
9453                 struct btrfs_key found_key;
9454
9455                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9456                         int no_more_keys = find_next_key(path, &key);
9457
9458                         btrfs_release_path(path);
9459                         if (trans) {
9460                                 ret = btrfs_commit_transaction(trans,
9461                                                                info->tree_root);
9462                                 trans = NULL;
9463                                 if (ret < 0)
9464                                         goto out;
9465                         }
9466                         need_trans = 0;
9467                         if (no_more_keys)
9468                                 break;
9469                         goto again;
9470                 }
9471
9472                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9473
9474                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9475                         goto next;
9476                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9477                         goto next;
9478
9479                 ret = maybe_repair_root_item(info, path, &found_key,
9480                                              trans ? 0 : 1);
9481                 if (ret < 0)
9482                         goto out;
9483                 if (ret) {
9484                         if (!trans && repair) {
9485                                 need_trans = 1;
9486                                 key = found_key;
9487                                 btrfs_release_path(path);
9488                                 goto again;
9489                         }
9490                         bad_roots++;
9491                 }
9492 next:
9493                 path->slots[0]++;
9494         }
9495         ret = 0;
9496 out:
9497         free_roots_info_cache();
9498         btrfs_free_path(path);
9499         if (trans)
9500                 btrfs_commit_transaction(trans, info->tree_root);
9501         if (ret < 0)
9502                 return ret;
9503
9504         return bad_roots;
9505 }
9506
9507 const char * const cmd_check_usage[] = {
9508         "btrfs check [options] <device>",
9509         "Check structural integrity of a filesystem (unmounted).",
9510         "Check structural integrity of an unmounted filesystem. Verify internal",
9511         "trees' consistency and item connectivity. In the repair mode try to",
9512         "fix the problems found.",
9513         "WARNING: the repair mode is considered dangerous",
9514         "",
9515         "-s|--super <superblock>     use this superblock copy",
9516         "-b|--backup                 use the first valid backup root copy",
9517         "--repair                    try to repair the filesystem",
9518         "--readonly                  run in read-only mode (default)",
9519         "--init-csum-tree            create a new CRC tree",
9520         "--init-extent-tree          create a new extent tree",
9521         "--check-data-csum           verify checksums of data blocks",
9522         "-Q|--qgroup-report           print a report on qgroup consistency",
9523         "-E|--subvol-extents <subvolid>",
9524         "                            print subvolume extents and sharing state",
9525         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
9526         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
9527         "-p|--progress               indicate progress",
9528         NULL
9529 };
9530
9531 int cmd_check(int argc, char **argv)
9532 {
9533         struct cache_tree root_cache;
9534         struct btrfs_root *root;
9535         struct btrfs_fs_info *info;
9536         u64 bytenr = 0;
9537         u64 subvolid = 0;
9538         u64 tree_root_bytenr = 0;
9539         u64 chunk_root_bytenr = 0;
9540         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9541         int ret;
9542         u64 num;
9543         int init_csum_tree = 0;
9544         int readonly = 0;
9545         int qgroup_report = 0;
9546         enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9547
9548         while(1) {
9549                 int c;
9550                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9551                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9552                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9553                 static const struct option long_options[] = {
9554                         { "super", required_argument, NULL, 's' },
9555                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9556                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9557                         { "init-csum-tree", no_argument, NULL,
9558                                 GETOPT_VAL_INIT_CSUM },
9559                         { "init-extent-tree", no_argument, NULL,
9560                                 GETOPT_VAL_INIT_EXTENT },
9561                         { "check-data-csum", no_argument, NULL,
9562                                 GETOPT_VAL_CHECK_CSUM },
9563                         { "backup", no_argument, NULL, 'b' },
9564                         { "subvol-extents", required_argument, NULL, 'E' },
9565                         { "qgroup-report", no_argument, NULL, 'Q' },
9566                         { "tree-root", required_argument, NULL, 'r' },
9567                         { "chunk-root", required_argument, NULL,
9568                                 GETOPT_VAL_CHUNK_TREE },
9569                         { "progress", no_argument, NULL, 'p' },
9570                         { NULL, 0, NULL, 0}
9571                 };
9572
9573                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9574                 if (c < 0)
9575                         break;
9576                 switch(c) {
9577                         case 'a': /* ignored */ break;
9578                         case 'b':
9579                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9580                                 break;
9581                         case 's':
9582                                 num = arg_strtou64(optarg);
9583                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9584                                         fprintf(stderr,
9585                                                 "ERROR: super mirror should be less than: %d\n",
9586                                                 BTRFS_SUPER_MIRROR_MAX);
9587                                         exit(1);
9588                                 }
9589                                 bytenr = btrfs_sb_offset(((int)num));
9590                                 printf("using SB copy %llu, bytenr %llu\n", num,
9591                                        (unsigned long long)bytenr);
9592                                 break;
9593                         case 'Q':
9594                                 qgroup_report = 1;
9595                                 break;
9596                         case 'E':
9597                                 subvolid = arg_strtou64(optarg);
9598                                 break;
9599                         case 'r':
9600                                 tree_root_bytenr = arg_strtou64(optarg);
9601                                 break;
9602                         case GETOPT_VAL_CHUNK_TREE:
9603                                 chunk_root_bytenr = arg_strtou64(optarg);
9604                                 break;
9605                         case 'p':
9606                                 ctx.progress_enabled = true;
9607                                 break;
9608                         case '?':
9609                         case 'h':
9610                                 usage(cmd_check_usage);
9611                         case GETOPT_VAL_REPAIR:
9612                                 printf("enabling repair mode\n");
9613                                 repair = 1;
9614                                 ctree_flags |= OPEN_CTREE_WRITES;
9615                                 break;
9616                         case GETOPT_VAL_READONLY:
9617                                 readonly = 1;
9618                                 break;
9619                         case GETOPT_VAL_INIT_CSUM:
9620                                 printf("Creating a new CRC tree\n");
9621                                 init_csum_tree = 1;
9622                                 repair = 1;
9623                                 ctree_flags |= OPEN_CTREE_WRITES;
9624                                 break;
9625                         case GETOPT_VAL_INIT_EXTENT:
9626                                 init_extent_tree = 1;
9627                                 ctree_flags |= (OPEN_CTREE_WRITES |
9628                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
9629                                 repair = 1;
9630                                 break;
9631                         case GETOPT_VAL_CHECK_CSUM:
9632                                 check_data_csum = 1;
9633                                 break;
9634                 }
9635         }
9636
9637         if (check_argc_exact(argc - optind, 1))
9638                 usage(cmd_check_usage);
9639
9640         if (ctx.progress_enabled) {
9641                 ctx.tp = TASK_NOTHING;
9642                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9643         }
9644
9645         /* This check is the only reason for --readonly to exist */
9646         if (readonly && repair) {
9647                 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9648                 exit(1);
9649         }
9650
9651         radix_tree_init();
9652         cache_tree_init(&root_cache);
9653
9654         if((ret = check_mounted(argv[optind])) < 0) {
9655                 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9656                 goto err_out;
9657         } else if(ret) {
9658                 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9659                 ret = -EBUSY;
9660                 goto err_out;
9661         }
9662
9663         /* only allow partial opening under repair mode */
9664         if (repair)
9665                 ctree_flags |= OPEN_CTREE_PARTIAL;
9666
9667         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9668                                   chunk_root_bytenr, ctree_flags);
9669         if (!info) {
9670                 fprintf(stderr, "Couldn't open file system\n");
9671                 ret = -EIO;
9672                 goto err_out;
9673         }
9674
9675         global_info = info;
9676         root = info->fs_root;
9677
9678         /*
9679          * repair mode will force us to commit transaction which
9680          * will make us fail to load log tree when mounting.
9681          */
9682         if (repair && btrfs_super_log_root(info->super_copy)) {
9683                 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9684                 if (!ret) {
9685                         ret = 1;
9686                         goto close_out;
9687                 }
9688                 ret = zero_log_tree(root);
9689                 if (ret) {
9690                         fprintf(stderr, "fail to zero log tree\n");
9691                         goto close_out;
9692                 }
9693         }
9694
9695         uuid_unparse(info->super_copy->fsid, uuidbuf);
9696         if (qgroup_report) {
9697                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9698                        uuidbuf);
9699                 ret = qgroup_verify_all(info);
9700                 if (ret == 0)
9701                         ret = report_qgroups(1);
9702                 goto close_out;
9703         }
9704         if (subvolid) {
9705                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9706                        subvolid, argv[optind], uuidbuf);
9707                 ret = print_extent_state(info, subvolid);
9708                 goto close_out;
9709         }
9710         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9711
9712         if (!extent_buffer_uptodate(info->tree_root->node) ||
9713             !extent_buffer_uptodate(info->dev_root->node) ||
9714             !extent_buffer_uptodate(info->chunk_root->node)) {
9715                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9716                 ret = -EIO;
9717                 goto close_out;
9718         }
9719
9720         if (init_extent_tree || init_csum_tree) {
9721                 struct btrfs_trans_handle *trans;
9722
9723                 trans = btrfs_start_transaction(info->extent_root, 0);
9724                 if (IS_ERR(trans)) {
9725                         fprintf(stderr, "Error starting transaction\n");
9726                         ret = PTR_ERR(trans);
9727                         goto close_out;
9728                 }
9729
9730                 if (init_extent_tree) {
9731                         printf("Creating a new extent tree\n");
9732                         ret = reinit_extent_tree(trans, info);
9733                         if (ret)
9734                                 goto close_out;
9735                 }
9736
9737                 if (init_csum_tree) {
9738                         fprintf(stderr, "Reinit crc root\n");
9739                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9740                         if (ret) {
9741                                 fprintf(stderr, "crc root initialization failed\n");
9742                                 ret = -EIO;
9743                                 goto close_out;
9744                         }
9745
9746                         ret = fill_csum_tree(trans, info->csum_root,
9747                                              init_extent_tree);
9748                         if (ret) {
9749                                 fprintf(stderr, "crc refilling failed\n");
9750                                 return -EIO;
9751                         }
9752                 }
9753                 /*
9754                  * Ok now we commit and run the normal fsck, which will add
9755                  * extent entries for all of the items it finds.
9756                  */
9757                 ret = btrfs_commit_transaction(trans, info->extent_root);
9758                 if (ret)
9759                         goto close_out;
9760         }
9761         if (!extent_buffer_uptodate(info->extent_root->node)) {
9762                 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9763                 ret = -EIO;
9764                 goto close_out;
9765         }
9766         if (!extent_buffer_uptodate(info->csum_root->node)) {
9767                 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9768                 ret = -EIO;
9769                 goto close_out;
9770         }
9771
9772         if (!ctx.progress_enabled)
9773                 fprintf(stderr, "checking extents\n");
9774         ret = check_chunks_and_extents(root);
9775         if (ret)
9776                 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9777
9778         ret = repair_root_items(info);
9779         if (ret < 0)
9780                 goto close_out;
9781         if (repair) {
9782                 fprintf(stderr, "Fixed %d roots.\n", ret);
9783                 ret = 0;
9784         } else if (ret > 0) {
9785                 fprintf(stderr,
9786                        "Found %d roots with an outdated root item.\n",
9787                        ret);
9788                 fprintf(stderr,
9789                         "Please run a filesystem check with the option --repair to fix them.\n");
9790                 ret = 1;
9791                 goto close_out;
9792         }
9793
9794         if (!ctx.progress_enabled) {
9795                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9796                         fprintf(stderr, "checking free space tree\n");
9797                 else
9798                         fprintf(stderr, "checking free space cache\n");
9799         }
9800         ret = check_space_cache(root);
9801         if (ret)
9802                 goto out;
9803
9804         /*
9805          * We used to have to have these hole extents in between our real
9806          * extents so if we don't have this flag set we need to make sure there
9807          * are no gaps in the file extents for inodes, otherwise we can just
9808          * ignore it when this happens.
9809          */
9810         no_holes = btrfs_fs_incompat(root->fs_info,
9811                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9812         if (!ctx.progress_enabled)
9813                 fprintf(stderr, "checking fs roots\n");
9814         ret = check_fs_roots(root, &root_cache);
9815         if (ret)
9816                 goto out;
9817
9818         fprintf(stderr, "checking csums\n");
9819         ret = check_csums(root);
9820         if (ret)
9821                 goto out;
9822
9823         fprintf(stderr, "checking root refs\n");
9824         ret = check_root_refs(root, &root_cache);
9825         if (ret)
9826                 goto out;
9827
9828         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9829                 struct extent_buffer *eb;
9830
9831                 eb = list_first_entry(&root->fs_info->recow_ebs,
9832                                       struct extent_buffer, recow);
9833                 list_del_init(&eb->recow);
9834                 ret = recow_extent_buffer(root, eb);
9835                 if (ret)
9836                         break;
9837         }
9838
9839         while (!list_empty(&delete_items)) {
9840                 struct bad_item *bad;
9841
9842                 bad = list_first_entry(&delete_items, struct bad_item, list);
9843                 list_del_init(&bad->list);
9844                 if (repair)
9845                         ret = delete_bad_item(root, bad);
9846                 free(bad);
9847         }
9848
9849         if (info->quota_enabled) {
9850                 int err;
9851                 fprintf(stderr, "checking quota groups\n");
9852                 err = qgroup_verify_all(info);
9853                 if (err)
9854                         goto out;
9855         }
9856
9857         if (!list_empty(&root->fs_info->recow_ebs)) {
9858                 fprintf(stderr, "Transid errors in file system\n");
9859                 ret = 1;
9860         }
9861 out:
9862         /* Don't override original ret */
9863         if (ret)
9864                 report_qgroups(0);
9865         else
9866                 ret = report_qgroups(0);
9867         if (found_old_backref) { /*
9868                  * there was a disk format change when mixed
9869                  * backref was in testing tree. The old format
9870                  * existed about one week.
9871                  */
9872                 printf("\n * Found old mixed backref format. "
9873                        "The old format is not supported! *"
9874                        "\n * Please mount the FS in readonly mode, "
9875                        "backup data and re-format the FS. *\n\n");
9876                 ret = 1;
9877         }
9878         printf("found %llu bytes used err is %d\n",
9879                (unsigned long long)bytes_used, ret);
9880         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9881         printf("total tree bytes: %llu\n",
9882                (unsigned long long)total_btree_bytes);
9883         printf("total fs tree bytes: %llu\n",
9884                (unsigned long long)total_fs_tree_bytes);
9885         printf("total extent tree bytes: %llu\n",
9886                (unsigned long long)total_extent_tree_bytes);
9887         printf("btree space waste bytes: %llu\n",
9888                (unsigned long long)btree_space_waste);
9889         printf("file data blocks allocated: %llu\n referenced %llu\n",
9890                 (unsigned long long)data_bytes_allocated,
9891                 (unsigned long long)data_bytes_referenced);
9892
9893         free_root_recs_tree(&root_cache);
9894 close_out:
9895         close_ctree(root);
9896 err_out:
9897         if (ctx.progress_enabled)
9898                 task_deinit(ctx.info);
9899
9900         return ret;
9901 }